perf record: Implement -z,--compression_level[=<n>] option
Implemented -z,--compression_level[=<n>] option that enables compression of mmaped kernel data buffers content in runtime during perf record mode collection. Default option value is 1 (fastest compression). Compression overhead has been measured for serial and AIO streaming when profiling matrix multiplication workload: ------------------------------------------------------------- | SERIAL | AIO-1 | ----------------------------------------------------------------| |-z | OVH(x) | ratio(x) size(MiB) | OVH(x) | ratio(x) size(MiB) | |---------------------------------------------------------------| | 0 | 1,00 | 1,000 179,424 | 1,00 | 1,000 187,527 | | 1 | 1,04 | 8,427 181,148 | 1,01 | 8,474 188,562 | | 2 | 1,07 | 8,055 186,953 | 1,03 | 7,912 191,773 | | 3 | 1,04 | 8,283 181,908 | 1,03 | 8,220 191,078 | | 5 | 1,09 | 8,101 187,705 | 1,05 | 7,780 190,065 | | 8 | 1,05 | 9,217 179,191 | 1,12 | 6,111 193,024 | ----------------------------------------------------------------- OVH = (Execution time with -z N) / (Execution time with -z 0) ratio - compression ratio size - number of bytes that was compressed size ~= trace size x ratio Committer notes: Testing it I noticed that it failed to disable build id processing when compression is enabled, and as we'd have to uncompress everything to look for the PERF_RECORD_{MMAP,SAMPLE,etc} to figure out which build ids to read from DSOs, we better disable build id processing when compression is enabled, logging with pr_debug() when doing so: Original patch: # perf record -z2 ^C[ perf record: Woken up 1 times to write data ] 0x1746e0 [0x76]: failed to process type: 81 [Invalid argument] [ perf record: Captured and wrote 1.568 MB perf.data, compressed (original 0.452 MB, ratio is 3.995) ] # After auto-disabling build id processing when compression is enabled: $ perf record -z2 sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.001 MB perf.data, compressed (original 0.001 MB, ratio is 2.292) ] $ perf record -v -z2 sleep 1 Compression enabled, disabling build id collection at the end of the session. <SNIP extra -v pr_debug() messages> [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.001 MB perf.data, compressed (original 0.001 MB, ratio is 2.305) ] $ Also, with parts of the patch originally after this one moved to just before this one we get: $ perf record -z2 sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.001 MB perf.data, compressed (original 0.001 MB, ratio is 2.371) ] $ perf report -D | grep COMPRESS 0 0x1b8 [0x155]: PERF_RECORD_COMPRESSED: unhandled! 0 0x30d [0x80]: PERF_RECORD_COMPRESSED: unhandled! COMPRESSED events: 2 COMPRESSED events: 0 $ I.e. when faced with PERF_RECORD_COMPRESSED that we still have no code to process, we just show it as not being handled, skip them and continue, while before we had: $ perf report -D | grep COMPRESS 0x1b8 [0x169]: failed to process type: 81 [Invalid argument] Error: failed to process sample 0 0x1b8 [0x169]: PERF_RECORD_COMPRESSED $ Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com> Reviewed-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/9ff06518-ae63-a908-e44d-5d9e56dd66d9@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Родитель
61a7773ca8
Коммит
504c1ad116
|
@ -478,6 +478,11 @@ Also at some cases executing less output write syscalls with bigger data size
|
|||
can take less time than executing more output write syscalls with smaller data
|
||||
size thus lowering runtime profiling overhead.
|
||||
|
||||
-z::
|
||||
--compression-level[=n]::
|
||||
Produce compressed trace using specified level n (default: 1 - fastest compression,
|
||||
22 - smallest trace)
|
||||
|
||||
--all-kernel::
|
||||
Configure all used events to run in kernel space.
|
||||
|
||||
|
|
|
@ -443,6 +443,25 @@ static int record__mmap_flush_parse(const struct option *opt,
|
|||
return 0;
|
||||
}
|
||||
|
||||
#ifdef HAVE_ZSTD_SUPPORT
|
||||
static unsigned int comp_level_default = 1;
|
||||
|
||||
static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
|
||||
{
|
||||
struct record_opts *opts = opt->value;
|
||||
|
||||
if (unset) {
|
||||
opts->comp_level = 0;
|
||||
} else {
|
||||
if (str)
|
||||
opts->comp_level = strtol(str, NULL, 0);
|
||||
if (!opts->comp_level)
|
||||
opts->comp_level = comp_level_default;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
static unsigned int comp_level_max = 22;
|
||||
|
||||
static int record__comp_enabled(struct record *rec)
|
||||
|
@ -2200,6 +2219,11 @@ static struct option __record_options[] = {
|
|||
OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
|
||||
"Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
|
||||
record__parse_affinity),
|
||||
#ifdef HAVE_ZSTD_SUPPORT
|
||||
OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default,
|
||||
"n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
|
||||
record__parse_comp_level),
|
||||
#endif
|
||||
OPT_END()
|
||||
};
|
||||
|
||||
|
@ -2259,6 +2283,12 @@ int cmd_record(int argc, const char **argv)
|
|||
"cgroup monitoring only available in system-wide mode");
|
||||
|
||||
}
|
||||
|
||||
if (rec->opts.comp_level != 0) {
|
||||
pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
|
||||
rec->no_buildid = true;
|
||||
}
|
||||
|
||||
if (rec->opts.record_switch_events &&
|
||||
!perf_can_record_switch_events()) {
|
||||
ui__error("kernel does not support recording context switch events\n");
|
||||
|
|
Загрузка…
Ссылка в новой задаче