perf kwork: Implement perf kwork latency
Implements framework of perf kwork latency, which is used to report time properties such as delay time and frequency. Test cases: # perf kwork lat -h Usage: perf kwork latency [<options>] -C, --cpu <cpu> list of cpus to profile -i, --input <file> input file name -n, --name <name> event name to profile -s, --sort <key[,key2...]> sort by key(s): avg, max, count --time <str> Time span for analysis (start,stop) # perf kwork lat -C 199 Requested CPU 199 too large. Consider raising MAX_NR_CPUS Invalid cpu bitmap # perf kwork lat -i perf_no_exist.data failed to open perf_no_exist.data: No such file or directory # perf kwork lat -s avg1 Error: Unknown --sort key: `avg1' Usage: perf kwork latency [<options>] -C, --cpu <cpu> list of cpus to profile -i, --input <file> input file name -n, --name <name> event name to profile -s, --sort <key[,key2...]> sort by key(s): avg, max, count --time <str> Time span for analysis (start,stop) # perf kwork lat --time FFFF, Invalid time span # perf kwork lat Kwork Name | Cpu | Avg delay | Count | Max delay | Max delay start | Max delay end | -------------------------------------------------------------------------------------------------------------------------------- -------------------------------------------------------------------------------------------------------------------------------- INFO: 36.570% skipped events (31537 including 0 raise, 31537 entry, 0 exit) Since there are no latency-enabled events, the output is empty. Signed-off-by: Yang Jihong <yangjihong1@huawei.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Clarke <pc@us.ibm.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: https://lore.kernel.org/r/20220709015033.38326-11-yangjihong1@huawei.com [ Add {} for multiline if blocks ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Родитель
8dbc3c8689
Коммит
ad3d9f7a92
|
@ -19,9 +19,12 @@ There are several variants of 'perf kwork':
|
|||
|
||||
'perf kwork report' to report the per kwork runtime.
|
||||
|
||||
'perf kwork latency' to report the per kwork latencies.
|
||||
|
||||
Example usage:
|
||||
perf kwork record -- sleep 1
|
||||
perf kwork report
|
||||
perf kwork latency
|
||||
|
||||
OPTIONS
|
||||
-------
|
||||
|
@ -71,6 +74,32 @@ OPTIONS for 'perf kwork report'
|
|||
stop time is not given (i.e, time string is 'x.y,') then analysis goes
|
||||
to end of file.
|
||||
|
||||
OPTIONS for 'perf kwork latency'
|
||||
----------------------------
|
||||
|
||||
-C::
|
||||
--cpu::
|
||||
Only show events for the given CPU(s) (comma separated list).
|
||||
|
||||
-i::
|
||||
--input::
|
||||
Input file name. (default: perf.data unless stdin is a fifo)
|
||||
|
||||
-n::
|
||||
--name::
|
||||
Only show events for the given name.
|
||||
|
||||
-s::
|
||||
--sort::
|
||||
Sort by key(s): avg, max, count
|
||||
|
||||
--time::
|
||||
Only analyze samples within given time window: <start>,<stop>. Times
|
||||
have the format seconds.microseconds. If start is not given (i.e., time
|
||||
string is ',x.y') then analysis starts at the beginning of the file. If
|
||||
stop time is not given (i.e, time string is 'x.y,') then analysis goes
|
||||
to end of file.
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
linkperf:perf-record[1]
|
||||
|
|
|
@ -31,12 +31,14 @@
|
|||
#define PRINT_CPU_WIDTH 4
|
||||
#define PRINT_COUNT_WIDTH 9
|
||||
#define PRINT_RUNTIME_WIDTH 10
|
||||
#define PRINT_LATENCY_WIDTH 10
|
||||
#define PRINT_TIMESTAMP_WIDTH 17
|
||||
#define PRINT_KWORK_NAME_WIDTH 30
|
||||
#define RPINT_DECIMAL_WIDTH 3
|
||||
#define PRINT_TIME_UNIT_SEC_WIDTH 2
|
||||
#define PRINT_TIME_UNIT_MESC_WIDTH 3
|
||||
#define PRINT_RUNTIME_HEADER_WIDTH (PRINT_RUNTIME_WIDTH + PRINT_TIME_UNIT_MESC_WIDTH)
|
||||
#define PRINT_LATENCY_HEADER_WIDTH (PRINT_LATENCY_WIDTH + PRINT_TIME_UNIT_MESC_WIDTH)
|
||||
#define PRINT_TIMESTAMP_HEADER_WIDTH (PRINT_TIMESTAMP_WIDTH + PRINT_TIME_UNIT_SEC_WIDTH)
|
||||
|
||||
struct sort_dimension {
|
||||
|
@ -90,6 +92,36 @@ static int max_runtime_cmp(struct kwork_work *l, struct kwork_work *r)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int avg_latency_cmp(struct kwork_work *l, struct kwork_work *r)
|
||||
{
|
||||
u64 avgl, avgr;
|
||||
|
||||
if (!r->nr_atoms)
|
||||
return 1;
|
||||
if (!l->nr_atoms)
|
||||
return -1;
|
||||
|
||||
avgl = l->total_latency / l->nr_atoms;
|
||||
avgr = r->total_latency / r->nr_atoms;
|
||||
|
||||
if (avgl > avgr)
|
||||
return 1;
|
||||
if (avgl < avgr)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int max_latency_cmp(struct kwork_work *l, struct kwork_work *r)
|
||||
{
|
||||
if (l->max_latency > r->max_latency)
|
||||
return 1;
|
||||
if (l->max_latency < r->max_latency)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sort_dimension__add(struct perf_kwork *kwork __maybe_unused,
|
||||
const char *tok, struct list_head *list)
|
||||
{
|
||||
|
@ -110,13 +142,21 @@ static int sort_dimension__add(struct perf_kwork *kwork __maybe_unused,
|
|||
.name = "count",
|
||||
.cmp = count_cmp,
|
||||
};
|
||||
static struct sort_dimension avg_sort_dimension = {
|
||||
.name = "avg",
|
||||
.cmp = avg_latency_cmp,
|
||||
};
|
||||
struct sort_dimension *available_sorts[] = {
|
||||
&id_sort_dimension,
|
||||
&max_sort_dimension,
|
||||
&count_sort_dimension,
|
||||
&runtime_sort_dimension,
|
||||
&avg_sort_dimension,
|
||||
};
|
||||
|
||||
if (kwork->report == KWORK_REPORT_LATENCY)
|
||||
max_sort_dimension.cmp = max_latency_cmp;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(available_sorts); i++) {
|
||||
if (!strcmp(available_sorts[i]->name, tok)) {
|
||||
list_add_tail(&available_sorts[i]->list, list);
|
||||
|
@ -479,6 +519,61 @@ static int report_exit_event(struct perf_kwork *kwork,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void latency_update_entry_event(struct kwork_work *work,
|
||||
struct kwork_atom *atom,
|
||||
struct perf_sample *sample)
|
||||
{
|
||||
u64 delta;
|
||||
u64 entry_time = sample->time;
|
||||
u64 raise_time = atom->time;
|
||||
|
||||
if ((raise_time != 0) && (entry_time >= raise_time)) {
|
||||
delta = entry_time - raise_time;
|
||||
if ((delta > work->max_latency) ||
|
||||
(work->max_latency == 0)) {
|
||||
work->max_latency = delta;
|
||||
work->max_latency_start = raise_time;
|
||||
work->max_latency_end = entry_time;
|
||||
}
|
||||
work->total_latency += delta;
|
||||
work->nr_atoms++;
|
||||
}
|
||||
}
|
||||
|
||||
static int latency_raise_event(struct perf_kwork *kwork,
|
||||
struct kwork_class *class,
|
||||
struct evsel *evsel,
|
||||
struct perf_sample *sample,
|
||||
struct machine *machine)
|
||||
{
|
||||
return work_push_atom(kwork, class, KWORK_TRACE_RAISE,
|
||||
KWORK_TRACE_MAX, evsel, sample,
|
||||
machine, NULL);
|
||||
}
|
||||
|
||||
static int latency_entry_event(struct perf_kwork *kwork,
|
||||
struct kwork_class *class,
|
||||
struct evsel *evsel,
|
||||
struct perf_sample *sample,
|
||||
struct machine *machine)
|
||||
{
|
||||
struct kwork_atom *atom = NULL;
|
||||
struct kwork_work *work = NULL;
|
||||
|
||||
atom = work_pop_atom(kwork, class, KWORK_TRACE_ENTRY,
|
||||
KWORK_TRACE_RAISE, evsel, sample,
|
||||
machine, &work);
|
||||
if (work == NULL)
|
||||
return -1;
|
||||
|
||||
if (atom != NULL) {
|
||||
latency_update_entry_event(work, atom, sample);
|
||||
atom_del(atom);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct kwork_class kwork_irq;
|
||||
static int process_irq_handler_entry_event(struct perf_tool *tool,
|
||||
struct evsel *evsel,
|
||||
|
@ -756,6 +851,7 @@ static int report_print_work(struct perf_kwork *kwork, struct kwork_work *work)
|
|||
int ret = 0;
|
||||
char kwork_name[PRINT_KWORK_NAME_WIDTH];
|
||||
char max_runtime_start[32], max_runtime_end[32];
|
||||
char max_latency_start[32], max_latency_end[32];
|
||||
|
||||
printf(" ");
|
||||
|
||||
|
@ -782,6 +878,11 @@ static int report_print_work(struct perf_kwork *kwork, struct kwork_work *work)
|
|||
ret += printf(" %*.*f ms |",
|
||||
PRINT_RUNTIME_WIDTH, RPINT_DECIMAL_WIDTH,
|
||||
(double)work->total_runtime / NSEC_PER_MSEC);
|
||||
} else if (kwork->report == KWORK_REPORT_LATENCY) { // avg delay
|
||||
ret += printf(" %*.*f ms |",
|
||||
PRINT_LATENCY_WIDTH, RPINT_DECIMAL_WIDTH,
|
||||
(double)work->total_latency /
|
||||
work->nr_atoms / NSEC_PER_MSEC);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -805,6 +906,22 @@ static int report_print_work(struct perf_kwork *kwork, struct kwork_work *work)
|
|||
PRINT_TIMESTAMP_WIDTH, max_runtime_start,
|
||||
PRINT_TIMESTAMP_WIDTH, max_runtime_end);
|
||||
}
|
||||
/*
|
||||
* max delay, max delay start, max delay end
|
||||
*/
|
||||
else if (kwork->report == KWORK_REPORT_LATENCY) {
|
||||
timestamp__scnprintf_usec(work->max_latency_start,
|
||||
max_latency_start,
|
||||
sizeof(max_latency_start));
|
||||
timestamp__scnprintf_usec(work->max_latency_end,
|
||||
max_latency_end,
|
||||
sizeof(max_latency_end));
|
||||
ret += printf(" %*.*f ms | %*s s | %*s s |",
|
||||
PRINT_LATENCY_WIDTH, RPINT_DECIMAL_WIDTH,
|
||||
(double)work->max_latency / NSEC_PER_MSEC,
|
||||
PRINT_TIMESTAMP_WIDTH, max_latency_start,
|
||||
PRINT_TIMESTAMP_WIDTH, max_latency_end);
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
return ret;
|
||||
|
@ -822,6 +939,9 @@ static int report_print_header(struct perf_kwork *kwork)
|
|||
if (kwork->report == KWORK_REPORT_RUNTIME) {
|
||||
ret += printf(" %-*s |",
|
||||
PRINT_RUNTIME_HEADER_WIDTH, "Total Runtime");
|
||||
} else if (kwork->report == KWORK_REPORT_LATENCY) {
|
||||
ret += printf(" %-*s |",
|
||||
PRINT_LATENCY_HEADER_WIDTH, "Avg delay");
|
||||
}
|
||||
|
||||
ret += printf(" %-*s |", PRINT_COUNT_WIDTH, "Count");
|
||||
|
@ -831,6 +951,11 @@ static int report_print_header(struct perf_kwork *kwork)
|
|||
PRINT_RUNTIME_HEADER_WIDTH, "Max runtime",
|
||||
PRINT_TIMESTAMP_HEADER_WIDTH, "Max runtime start",
|
||||
PRINT_TIMESTAMP_HEADER_WIDTH, "Max runtime end");
|
||||
} else if (kwork->report == KWORK_REPORT_LATENCY) {
|
||||
ret += printf(" %-*s | %-*s | %-*s |",
|
||||
PRINT_LATENCY_HEADER_WIDTH, "Max delay",
|
||||
PRINT_TIMESTAMP_HEADER_WIDTH, "Max delay start",
|
||||
PRINT_TIMESTAMP_HEADER_WIDTH, "Max delay end");
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
|
@ -865,6 +990,7 @@ static void print_skipped_events(struct perf_kwork *kwork)
|
|||
{
|
||||
int i;
|
||||
const char *const kwork_event_str[] = {
|
||||
[KWORK_TRACE_RAISE] = "raise",
|
||||
[KWORK_TRACE_ENTRY] = "entry",
|
||||
[KWORK_TRACE_EXIT] = "exit",
|
||||
};
|
||||
|
@ -937,11 +1063,18 @@ static int perf_kwork__check_config(struct perf_kwork *kwork,
|
|||
.entry_event = report_entry_event,
|
||||
.exit_event = report_exit_event,
|
||||
};
|
||||
static struct trace_kwork_handler latency_ops = {
|
||||
.raise_event = latency_raise_event,
|
||||
.entry_event = latency_entry_event,
|
||||
};
|
||||
|
||||
switch (kwork->report) {
|
||||
case KWORK_REPORT_RUNTIME:
|
||||
kwork->tp_handler = &report_ops;
|
||||
break;
|
||||
case KWORK_REPORT_LATENCY:
|
||||
kwork->tp_handler = &latency_ops;
|
||||
break;
|
||||
default:
|
||||
pr_debug("Invalid report type %d\n", kwork->report);
|
||||
return -1;
|
||||
|
@ -1219,6 +1352,7 @@ int cmd_kwork(int argc, const char **argv)
|
|||
.nr_skipped_events = { 0 },
|
||||
};
|
||||
static const char default_report_sort_order[] = "runtime, max, count";
|
||||
static const char default_latency_sort_order[] = "avg, max, count";
|
||||
const struct option kwork_options[] = {
|
||||
OPT_INCR('v', "verbose", &verbose,
|
||||
"be more verbose (show symbol address, etc)"),
|
||||
|
@ -1244,6 +1378,19 @@ int cmd_kwork(int argc, const char **argv)
|
|||
"Show summary with statistics"),
|
||||
OPT_PARENT(kwork_options)
|
||||
};
|
||||
const struct option latency_options[] = {
|
||||
OPT_STRING('s', "sort", &kwork.sort_order, "key[,key2...]",
|
||||
"sort by key(s): avg, max, count"),
|
||||
OPT_STRING('C', "cpu", &kwork.cpu_list, "cpu",
|
||||
"list of cpus to profile"),
|
||||
OPT_STRING('n', "name", &kwork.profile_name, "name",
|
||||
"event name to profile"),
|
||||
OPT_STRING(0, "time", &kwork.time_str, "str",
|
||||
"Time span for analysis (start,stop)"),
|
||||
OPT_STRING('i', "input", &input_name, "file",
|
||||
"input file name"),
|
||||
OPT_PARENT(kwork_options)
|
||||
};
|
||||
const char *kwork_usage[] = {
|
||||
NULL,
|
||||
NULL
|
||||
|
@ -1252,8 +1399,12 @@ int cmd_kwork(int argc, const char **argv)
|
|||
"perf kwork report [<options>]",
|
||||
NULL
|
||||
};
|
||||
const char * const latency_usage[] = {
|
||||
"perf kwork latency [<options>]",
|
||||
NULL
|
||||
};
|
||||
const char *const kwork_subcommands[] = {
|
||||
"record", "report", NULL
|
||||
"record", "report", "latency", NULL
|
||||
};
|
||||
|
||||
argc = parse_options_subcommand(argc, argv, kwork_options,
|
||||
|
@ -1277,6 +1428,16 @@ int cmd_kwork(int argc, const char **argv)
|
|||
kwork.report = KWORK_REPORT_RUNTIME;
|
||||
setup_sorting(&kwork, report_options, report_usage);
|
||||
return perf_kwork__report(&kwork);
|
||||
} else if (strlen(argv[0]) > 2 && strstarts("latency", argv[0])) {
|
||||
kwork.sort_order = default_latency_sort_order;
|
||||
if (argc > 1) {
|
||||
argc = parse_options(argc, argv, latency_options, latency_usage, 0);
|
||||
if (argc)
|
||||
usage_with_options(latency_usage, latency_options);
|
||||
}
|
||||
kwork.report = KWORK_REPORT_LATENCY;
|
||||
setup_sorting(&kwork, latency_options, latency_usage);
|
||||
return perf_kwork__report(&kwork);
|
||||
} else
|
||||
usage_with_options(kwork_usage, kwork_options);
|
||||
|
||||
|
|
|
@ -21,9 +21,11 @@ enum kwork_class_type {
|
|||
|
||||
enum kwork_report_type {
|
||||
KWORK_REPORT_RUNTIME,
|
||||
KWORK_REPORT_LATENCY,
|
||||
};
|
||||
|
||||
enum kwork_trace_type {
|
||||
KWORK_TRACE_RAISE,
|
||||
KWORK_TRACE_ENTRY,
|
||||
KWORK_TRACE_EXIT,
|
||||
KWORK_TRACE_MAX,
|
||||
|
@ -116,6 +118,14 @@ struct kwork_work {
|
|||
u64 max_runtime_start;
|
||||
u64 max_runtime_end;
|
||||
u64 total_runtime;
|
||||
|
||||
/*
|
||||
* latency report
|
||||
*/
|
||||
u64 max_latency;
|
||||
u64 max_latency_start;
|
||||
u64 max_latency_end;
|
||||
u64 total_latency;
|
||||
};
|
||||
|
||||
struct kwork_class {
|
||||
|
@ -143,6 +153,10 @@ struct kwork_class {
|
|||
|
||||
struct perf_kwork;
|
||||
struct trace_kwork_handler {
|
||||
int (*raise_event)(struct perf_kwork *kwork,
|
||||
struct kwork_class *class, struct evsel *evsel,
|
||||
struct perf_sample *sample, struct machine *machine);
|
||||
|
||||
int (*entry_event)(struct perf_kwork *kwork,
|
||||
struct kwork_class *class, struct evsel *evsel,
|
||||
struct perf_sample *sample, struct machine *machine);
|
||||
|
|
Загрузка…
Ссылка в новой задаче