perf stat: Abstract stat metrics printing

Abstract the printing of shadow metrics. Instead of every metric calling
fprintf directly and taking care of indentation, use two call backs: one
to print metrics and another to start a new line.

This will allow adding metrics to CSV mode and also using them for other
purposes.

The computation of padding is now done in the central callback, instead
of every metric doing it manually.  This makes it easier to add new
metrics.

v2: Refactor functions, printout now does more. Move
shadow printing. Improve fallback callbacks. Don't
use void * callback data.
v3: Remove unnecessary hunk. Add typedef for new_line
v4: Remove unnecessary hunk. Don't print metrics for CSV/interval
mode yet.  Move printout change to separate patch.
v5: Fix bisect bugs. Avoid bogus frontend cycles printing.
Fix indentation in different aggregation modes.
v6: Delay newline handling

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1454173616-17710-2-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Andi Kleen 2016-01-30 09:06:49 -08:00 коммит произвёл Arnaldo Carvalho de Melo
Родитель 720e98b5fa
Коммит 140aeadc1f
3 изменённых файлов: 194 добавлений и 103 удалений

Просмотреть файл

@ -735,6 +735,58 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
}
}
struct outstate {
FILE *fh;
bool newline;
};
#define METRIC_LEN 35
static void new_line_std(void *ctx)
{
struct outstate *os = ctx;
os->newline = true;
}
static void do_new_line_std(struct outstate *os)
{
fputc('\n', os->fh);
if (stat_config.aggr_mode == AGGR_NONE)
fprintf(os->fh, " ");
if (stat_config.aggr_mode == AGGR_CORE)
fprintf(os->fh, " ");
if (stat_config.aggr_mode == AGGR_SOCKET)
fprintf(os->fh, " ");
fprintf(os->fh, " ");
}
static void print_metric_std(void *ctx, const char *color, const char *fmt,
const char *unit, double val)
{
struct outstate *os = ctx;
FILE *out = os->fh;
int n;
bool newline = os->newline;
os->newline = false;
if (unit == NULL || fmt == NULL) {
fprintf(out, "%-*s", METRIC_LEN, "");
return;
}
if (newline)
do_new_line_std(os);
n = fprintf(out, " # ");
if (color)
n += color_fprintf(out, color, fmt, val);
else
n += fprintf(out, fmt, val);
fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
}
static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
{
FILE *output = stat_config.output;
@ -795,20 +847,27 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
static void printout(int id, int nr, struct perf_evsel *counter, double uval)
{
int cpu = cpu_map__id_to_cpu(id);
struct outstate os = { .fh = stat_config.output };
struct perf_stat_output_ctx out;
print_metric_t pm = print_metric_std;
void (*nl)(void *);
if (stat_config.aggr_mode == AGGR_GLOBAL)
cpu = 0;
nl = new_line_std;
if (nsec_counter(counter))
nsec_printout(id, nr, counter, uval);
else
abs_printout(id, nr, counter, uval);
out.print_metric = pm;
out.new_line = nl;
out.ctx = &os;
if (!csv_output && !stat_config.interval)
perf_stat__print_shadow_stats(stat_config.output, counter,
uval, cpu,
stat_config.aggr_mode);
perf_stat__print_shadow_stats(counter, uval,
stat_config.aggr_mode == AGGR_GLOBAL ? 0 :
cpu_map__id_to_cpu(id),
&out);
}
static void print_aggr(char *prefix)

Просмотреть файл

@ -137,9 +137,10 @@ static const char *get_ratio_color(enum grc_type type, double ratio)
return color;
}
static void print_stalled_cycles_frontend(FILE *out, int cpu,
static void print_stalled_cycles_frontend(int cpu,
struct perf_evsel *evsel
__maybe_unused, double avg)
__maybe_unused, double avg,
struct perf_stat_output_ctx *out)
{
double total, ratio = 0.0;
const char *color;
@ -152,14 +153,17 @@ static void print_stalled_cycles_frontend(FILE *out, int cpu,
color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
fprintf(out, " # ");
color_fprintf(out, color, "%6.2f%%", ratio);
fprintf(out, " frontend cycles idle ");
if (ratio)
out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle",
ratio);
else
out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0);
}
static void print_stalled_cycles_backend(FILE *out, int cpu,
static void print_stalled_cycles_backend(int cpu,
struct perf_evsel *evsel
__maybe_unused, double avg)
__maybe_unused, double avg,
struct perf_stat_output_ctx *out)
{
double total, ratio = 0.0;
const char *color;
@ -172,14 +176,13 @@ static void print_stalled_cycles_backend(FILE *out, int cpu,
color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
fprintf(out, " # ");
color_fprintf(out, color, "%6.2f%%", ratio);
fprintf(out, " backend cycles idle ");
out->print_metric(out->ctx, color, "%6.2f%%", "backend cycles idle", ratio);
}
static void print_branch_misses(FILE *out, int cpu,
static void print_branch_misses(int cpu,
struct perf_evsel *evsel __maybe_unused,
double avg)
double avg,
struct perf_stat_output_ctx *out)
{
double total, ratio = 0.0;
const char *color;
@ -192,14 +195,13 @@ static void print_branch_misses(FILE *out, int cpu,
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(out, " # ");
color_fprintf(out, color, "%6.2f%%", ratio);
fprintf(out, " of all branches ");
out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio);
}
static void print_l1_dcache_misses(FILE *out, int cpu,
static void print_l1_dcache_misses(int cpu,
struct perf_evsel *evsel __maybe_unused,
double avg)
double avg,
struct perf_stat_output_ctx *out)
{
double total, ratio = 0.0;
const char *color;
@ -212,14 +214,13 @@ static void print_l1_dcache_misses(FILE *out, int cpu,
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(out, " # ");
color_fprintf(out, color, "%6.2f%%", ratio);
fprintf(out, " of all L1-dcache hits ");
out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio);
}
static void print_l1_icache_misses(FILE *out, int cpu,
static void print_l1_icache_misses(int cpu,
struct perf_evsel *evsel __maybe_unused,
double avg)
double avg,
struct perf_stat_output_ctx *out)
{
double total, ratio = 0.0;
const char *color;
@ -231,15 +232,13 @@ static void print_l1_icache_misses(FILE *out, int cpu,
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(out, " # ");
color_fprintf(out, color, "%6.2f%%", ratio);
fprintf(out, " of all L1-icache hits ");
out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio);
}
static void print_dtlb_cache_misses(FILE *out, int cpu,
static void print_dtlb_cache_misses(int cpu,
struct perf_evsel *evsel __maybe_unused,
double avg)
double avg,
struct perf_stat_output_ctx *out)
{
double total, ratio = 0.0;
const char *color;
@ -251,15 +250,13 @@ static void print_dtlb_cache_misses(FILE *out, int cpu,
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(out, " # ");
color_fprintf(out, color, "%6.2f%%", ratio);
fprintf(out, " of all dTLB cache hits ");
out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio);
}
static void print_itlb_cache_misses(FILE *out, int cpu,
static void print_itlb_cache_misses(int cpu,
struct perf_evsel *evsel __maybe_unused,
double avg)
double avg,
struct perf_stat_output_ctx *out)
{
double total, ratio = 0.0;
const char *color;
@ -271,15 +268,13 @@ static void print_itlb_cache_misses(FILE *out, int cpu,
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(out, " # ");
color_fprintf(out, color, "%6.2f%%", ratio);
fprintf(out, " of all iTLB cache hits ");
out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio);
}
static void print_ll_cache_misses(FILE *out, int cpu,
static void print_ll_cache_misses(int cpu,
struct perf_evsel *evsel __maybe_unused,
double avg)
double avg,
struct perf_stat_output_ctx *out)
{
double total, ratio = 0.0;
const char *color;
@ -291,15 +286,15 @@ static void print_ll_cache_misses(FILE *out, int cpu,
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(out, " # ");
color_fprintf(out, color, "%6.2f%%", ratio);
fprintf(out, " of all LL-cache hits ");
out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
}
void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
double avg, int cpu, enum aggr_mode aggr)
void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
double avg, int cpu,
struct perf_stat_output_ctx *out)
{
void *ctxp = out->ctx;
print_metric_t print_metric = out->print_metric;
double total, ratio = 0.0, total2;
int ctx = evsel_context(evsel);
@ -307,119 +302,145 @@ void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
if (total) {
ratio = avg / total;
fprintf(out, " # %5.2f insns per cycle ", ratio);
print_metric(ctxp, NULL, "%7.2f ",
"insn per cycle", ratio);
} else {
fprintf(out, " ");
print_metric(ctxp, NULL, NULL, "insn per cycle", 0);
}
total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
out->new_line(ctxp);
if (total && avg) {
ratio = total / avg;
fprintf(out, "\n");
if (aggr == AGGR_NONE)
fprintf(out, " ");
fprintf(out, " # %5.2f stalled cycles per insn", ratio);
print_metric(ctxp, NULL, "%7.2f ",
"stalled cycles per insn",
ratio);
} else {
print_metric(ctxp, NULL, NULL,
"stalled cycles per insn", 0);
}
} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
runtime_branches_stats[ctx][cpu].n != 0) {
print_branch_misses(out, cpu, evsel, avg);
} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
if (runtime_branches_stats[ctx][cpu].n != 0)
print_branch_misses(cpu, evsel, avg, out);
else
print_metric(ctxp, NULL, NULL, "of all branches", 0);
} else if (
evsel->attr.type == PERF_TYPE_HW_CACHE &&
evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_l1_dcache_stats[ctx][cpu].n != 0) {
print_l1_dcache_misses(out, cpu, evsel, avg);
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
if (runtime_l1_dcache_stats[ctx][cpu].n != 0)
print_l1_dcache_misses(cpu, evsel, avg, out);
else
print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0);
} else if (
evsel->attr.type == PERF_TYPE_HW_CACHE &&
evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_l1_icache_stats[ctx][cpu].n != 0) {
print_l1_icache_misses(out, cpu, evsel, avg);
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
if (runtime_l1_icache_stats[ctx][cpu].n != 0)
print_l1_icache_misses(cpu, evsel, avg, out);
else
print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0);
} else if (
evsel->attr.type == PERF_TYPE_HW_CACHE &&
evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_dtlb_cache_stats[ctx][cpu].n != 0) {
print_dtlb_cache_misses(out, cpu, evsel, avg);
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
if (runtime_dtlb_cache_stats[ctx][cpu].n != 0)
print_dtlb_cache_misses(cpu, evsel, avg, out);
else
print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0);
} else if (
evsel->attr.type == PERF_TYPE_HW_CACHE &&
evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_itlb_cache_stats[ctx][cpu].n != 0) {
print_itlb_cache_misses(out, cpu, evsel, avg);
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
if (runtime_itlb_cache_stats[ctx][cpu].n != 0)
print_itlb_cache_misses(cpu, evsel, avg, out);
else
print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0);
} else if (
evsel->attr.type == PERF_TYPE_HW_CACHE &&
evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_ll_cache_stats[ctx][cpu].n != 0) {
print_ll_cache_misses(out, cpu, evsel, avg);
} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
runtime_cacherefs_stats[ctx][cpu].n != 0) {
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
if (runtime_ll_cache_stats[ctx][cpu].n != 0)
print_ll_cache_misses(cpu, evsel, avg, out);
else
print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0);
} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
if (total)
ratio = avg * 100 / total;
fprintf(out, " # %8.3f %% of all cache refs ", ratio);
if (runtime_cacherefs_stats[ctx][cpu].n != 0)
print_metric(ctxp, NULL, "%8.3f %%",
"of all cache refs", ratio);
else
print_metric(ctxp, NULL, NULL, "of all cache refs", 0);
} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
print_stalled_cycles_frontend(out, cpu, evsel, avg);
print_stalled_cycles_frontend(cpu, evsel, avg, out);
} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
print_stalled_cycles_backend(out, cpu, evsel, avg);
print_stalled_cycles_backend(cpu, evsel, avg, out);
} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
total = avg_stats(&runtime_nsecs_stats[cpu]);
if (total) {
ratio = avg / total;
fprintf(out, " # %8.3f GHz ", ratio);
print_metric(ctxp, NULL, "%8.3f", "GHz", ratio);
} else {
fprintf(out, " ");
print_metric(ctxp, NULL, NULL, "Ghz", 0);
}
} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
if (total)
fprintf(out,
" # %5.2f%% transactional cycles ",
100.0 * (avg / total));
print_metric(ctxp, NULL,
"%7.2f%%", "transactional cycles",
100.0 * (avg / total));
else
print_metric(ctxp, NULL, NULL, "transactional cycles",
0);
} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
if (total2 < avg)
total2 = avg;
if (total)
fprintf(out,
" # %5.2f%% aborted cycles ",
print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles",
100.0 * ((total2-avg) / total));
} else if (perf_stat_evsel__is(evsel, TRANSACTION_START) &&
runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
else
print_metric(ctxp, NULL, NULL, "aborted cycles", 0);
} else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
if (avg)
ratio = total / avg;
fprintf(out, " # %8.0f cycles / transaction ", ratio);
} else if (perf_stat_evsel__is(evsel, ELISION_START) &&
runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0)
print_metric(ctxp, NULL, "%8.0f",
"cycles / transaction", ratio);
else
print_metric(ctxp, NULL, NULL, "cycles / transaction",
0);
} else if (perf_stat_evsel__is(evsel, ELISION_START)) {
total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
if (avg)
ratio = total / avg;
fprintf(out, " # %8.0f cycles / elision ", ratio);
print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio);
} else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) {
if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
fprintf(out, " # %8.3f CPUs utilized ", avg / ratio);
print_metric(ctxp, NULL, "%8.3f", "CPUs utilized",
avg / ratio);
else
fprintf(out, " ");
print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);
} else if (runtime_nsecs_stats[cpu].n != 0) {
char unit = 'M';
char unit_buf[10];
total = avg_stats(&runtime_nsecs_stats[cpu]);
@ -429,9 +450,9 @@ void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
ratio *= 1000;
unit = 'K';
}
fprintf(out, " # %8.3f %c/sec ", ratio, unit);
snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
} else {
fprintf(out, " ");
print_metric(ctxp, NULL, NULL, NULL, 0);
}
}

Просмотреть файл

@ -68,11 +68,22 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel);
extern struct stats walltime_nsecs_stats;
typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit,
const char *fmt, double val);
typedef void (*new_line_t )(void *ctx);
void perf_stat__reset_shadow_stats(void);
void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
int cpu);
void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
double avg, int cpu, enum aggr_mode aggr);
struct perf_stat_output_ctx {
void *ctx;
print_metric_t print_metric;
new_line_t new_line;
};
void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
double avg, int cpu,
struct perf_stat_output_ctx *out);
int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw);
void perf_evlist__free_stats(struct perf_evlist *evlist);