WSL2-Linux-Kernel/tools/perf/util/block-info.c

457 строки
11 KiB
C
Исходник Обычный вид История

// SPDX-License-Identifier: GPL-2.0
#include <stdlib.h>
#include <string.h>
#include <linux/zalloc.h>
#include "block-info.h"
#include "sort.h"
#include "annotate.h"
#include "symbol.h"
#include "dso.h"
#include "map.h"
#include "srcline.h"
#include "evlist.h"
static struct block_header_column {
const char *name;
int width;
} block_columns[PERF_HPP_REPORT__BLOCK_MAX_INDEX] = {
[PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT] = {
.name = "Sampled Cycles%",
.width = 15,
},
[PERF_HPP_REPORT__BLOCK_LBR_CYCLES] = {
.name = "Sampled Cycles",
.width = 14,
},
[PERF_HPP_REPORT__BLOCK_CYCLES_PCT] = {
.name = "Avg Cycles%",
.width = 11,
},
[PERF_HPP_REPORT__BLOCK_AVG_CYCLES] = {
.name = "Avg Cycles",
.width = 10,
},
[PERF_HPP_REPORT__BLOCK_RANGE] = {
.name = "[Program Block Range]",
.width = 70,
},
[PERF_HPP_REPORT__BLOCK_DSO] = {
.name = "Shared Object",
.width = 20,
}
};
struct block_info *block_info__get(struct block_info *bi)
{
if (bi)
refcount_inc(&bi->refcnt);
return bi;
}
void block_info__put(struct block_info *bi)
{
if (bi && refcount_dec_and_test(&bi->refcnt))
free(bi);
}
struct block_info *block_info__new(void)
{
struct block_info *bi = zalloc(sizeof(*bi));
if (bi)
refcount_set(&bi->refcnt, 1);
return bi;
}
int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused,
struct hist_entry *left, struct hist_entry *right)
{
struct block_info *bi_l = left->block_info;
struct block_info *bi_r = right->block_info;
int cmp;
if (!bi_l->sym || !bi_r->sym) {
if (!bi_l->sym && !bi_r->sym)
return 0;
else if (!bi_l->sym)
return -1;
else
return 1;
}
if (bi_l->sym == bi_r->sym) {
if (bi_l->start == bi_r->start) {
if (bi_l->end == bi_r->end)
return 0;
else
return (int64_t)(bi_r->end - bi_l->end);
} else
return (int64_t)(bi_r->start - bi_l->start);
} else {
cmp = strcmp(bi_l->sym->name, bi_r->sym->name);
return cmp;
}
if (bi_l->sym->start != bi_r->sym->start)
return (int64_t)(bi_r->sym->start - bi_l->sym->start);
return (int64_t)(bi_r->sym->end - bi_l->sym->end);
}
static void init_block_info(struct block_info *bi, struct symbol *sym,
struct cyc_hist *ch, int offset,
u64 total_cycles)
{
bi->sym = sym;
bi->start = ch->start;
bi->end = offset;
bi->cycles = ch->cycles;
bi->cycles_aggr = ch->cycles_aggr;
bi->num = ch->num;
bi->num_aggr = ch->num_aggr;
bi->total_cycles = total_cycles;
memcpy(bi->cycles_spark, ch->cycles_spark,
NUM_SPARKS * sizeof(u64));
}
int block_info__process_sym(struct hist_entry *he, struct block_hist *bh,
u64 *block_cycles_aggr, u64 total_cycles)
{
struct annotation *notes;
struct cyc_hist *ch;
static struct addr_location al;
u64 cycles = 0;
if (!he->ms.map || !he->ms.sym)
return 0;
memset(&al, 0, sizeof(al));
al.map = he->ms.map;
al.sym = he->ms.sym;
notes = symbol__annotation(he->ms.sym);
if (!notes || !notes->src || !notes->src->cycles_hist)
return 0;
ch = notes->src->cycles_hist;
for (unsigned int i = 0; i < symbol__size(he->ms.sym); i++) {
if (ch[i].num_aggr) {
struct block_info *bi;
struct hist_entry *he_block;
bi = block_info__new();
if (!bi)
return -1;
init_block_info(bi, he->ms.sym, &ch[i], i,
total_cycles);
cycles += bi->cycles_aggr / bi->num_aggr;
he_block = hists__add_entry_block(&bh->block_hists,
&al, bi);
if (!he_block) {
block_info__put(bi);
return -1;
}
}
}
if (block_cycles_aggr)
*block_cycles_aggr += cycles;
return 0;
}
static int block_column_header(struct perf_hpp_fmt *fmt,
struct perf_hpp *hpp,
struct hists *hists __maybe_unused,
int line __maybe_unused,
int *span __maybe_unused)
{
struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
block_fmt->header);
}
static int block_column_width(struct perf_hpp_fmt *fmt,
struct perf_hpp *hpp __maybe_unused,
struct hists *hists __maybe_unused)
{
struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
return block_fmt->width;
}
static int block_total_cycles_pct_entry(struct perf_hpp_fmt *fmt,
struct perf_hpp *hpp,
struct hist_entry *he)
{
struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
struct block_info *bi = he->block_info;
double ratio = 0.0;
char buf[16];
if (block_fmt->total_cycles)
ratio = (double)bi->cycles / (double)block_fmt->total_cycles;
sprintf(buf, "%.2f%%", 100.0 * ratio);
return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf);
}
static int64_t block_total_cycles_pct_sort(struct perf_hpp_fmt *fmt,
struct hist_entry *left,
struct hist_entry *right)
{
struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
struct block_info *bi_l = left->block_info;
struct block_info *bi_r = right->block_info;
double l, r;
if (block_fmt->total_cycles) {
l = ((double)bi_l->cycles /
(double)block_fmt->total_cycles) * 100000.0;
r = ((double)bi_r->cycles /
(double)block_fmt->total_cycles) * 100000.0;
return (int64_t)l - (int64_t)r;
}
return 0;
}
static void cycles_string(u64 cycles, char *buf, int size)
{
if (cycles >= 1000000)
scnprintf(buf, size, "%.1fM", (double)cycles / 1000000.0);
else if (cycles >= 1000)
scnprintf(buf, size, "%.1fK", (double)cycles / 1000.0);
else
scnprintf(buf, size, "%1d", cycles);
}
static int block_cycles_lbr_entry(struct perf_hpp_fmt *fmt,
struct perf_hpp *hpp, struct hist_entry *he)
{
struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
struct block_info *bi = he->block_info;
char cycles_buf[16];
cycles_string(bi->cycles_aggr, cycles_buf, sizeof(cycles_buf));
return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
cycles_buf);
}
static int block_cycles_pct_entry(struct perf_hpp_fmt *fmt,
struct perf_hpp *hpp, struct hist_entry *he)
{
struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
struct block_info *bi = he->block_info;
double ratio = 0.0;
u64 avg;
char buf[16];
if (block_fmt->block_cycles && bi->num_aggr) {
avg = bi->cycles_aggr / bi->num_aggr;
ratio = (double)avg / (double)block_fmt->block_cycles;
}
sprintf(buf, "%.2f%%", 100.0 * ratio);
return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf);
}
static int block_avg_cycles_entry(struct perf_hpp_fmt *fmt,
struct perf_hpp *hpp,
struct hist_entry *he)
{
struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
struct block_info *bi = he->block_info;
char cycles_buf[16];
cycles_string(bi->cycles_aggr / bi->num_aggr, cycles_buf,
sizeof(cycles_buf));
return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
cycles_buf);
}
static int block_range_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he)
{
struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
struct block_info *bi = he->block_info;
char buf[128];
char *start_line, *end_line;
symbol_conf.disable_add2line_warn = true;
start_line = map__srcline(he->ms.map, bi->sym->start + bi->start,
he->ms.sym);
end_line = map__srcline(he->ms.map, bi->sym->start + bi->end,
he->ms.sym);
if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) {
scnprintf(buf, sizeof(buf), "[%s -> %s]",
start_line, end_line);
} else {
scnprintf(buf, sizeof(buf), "[%7lx -> %7lx]",
bi->start, bi->end);
}
free_srcline(start_line);
free_srcline(end_line);
return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf);
}
static int block_dso_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he)
{
struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
struct map *map = he->ms.map;
if (map && map->dso) {
return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
map->dso->short_name);
}
return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
"[unknown]");
}
static void init_block_header(struct block_fmt *block_fmt)
{
struct perf_hpp_fmt *fmt = &block_fmt->fmt;
BUG_ON(block_fmt->idx >= PERF_HPP_REPORT__BLOCK_MAX_INDEX);
block_fmt->header = block_columns[block_fmt->idx].name;
block_fmt->width = block_columns[block_fmt->idx].width;
fmt->header = block_column_header;
fmt->width = block_column_width;
}
static void hpp_register(struct block_fmt *block_fmt, int idx,
struct perf_hpp_list *hpp_list)
{
struct perf_hpp_fmt *fmt = &block_fmt->fmt;
block_fmt->idx = idx;
INIT_LIST_HEAD(&fmt->list);
INIT_LIST_HEAD(&fmt->sort_list);
switch (idx) {
case PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT:
fmt->entry = block_total_cycles_pct_entry;
fmt->cmp = block_info__cmp;
fmt->sort = block_total_cycles_pct_sort;
break;
case PERF_HPP_REPORT__BLOCK_LBR_CYCLES:
fmt->entry = block_cycles_lbr_entry;
break;
case PERF_HPP_REPORT__BLOCK_CYCLES_PCT:
fmt->entry = block_cycles_pct_entry;
break;
case PERF_HPP_REPORT__BLOCK_AVG_CYCLES:
fmt->entry = block_avg_cycles_entry;
break;
case PERF_HPP_REPORT__BLOCK_RANGE:
fmt->entry = block_range_entry;
break;
case PERF_HPP_REPORT__BLOCK_DSO:
fmt->entry = block_dso_entry;
break;
default:
return;
}
init_block_header(block_fmt);
perf_hpp_list__column_register(hpp_list, fmt);
}
static void register_block_columns(struct perf_hpp_list *hpp_list,
struct block_fmt *block_fmts)
{
for (int i = 0; i < PERF_HPP_REPORT__BLOCK_MAX_INDEX; i++)
hpp_register(&block_fmts[i], i, hpp_list);
}
static void init_block_hist(struct block_hist *bh, struct block_fmt *block_fmts)
{
__hists__init(&bh->block_hists, &bh->block_list);
perf_hpp_list__init(&bh->block_list);
bh->block_list.nr_header_lines = 1;
register_block_columns(&bh->block_list, block_fmts);
perf_hpp_list__register_sort_field(&bh->block_list,
&block_fmts[PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT].fmt);
}
static void process_block_report(struct hists *hists,
struct block_report *block_report,
u64 total_cycles)
{
struct rb_node *next = rb_first_cached(&hists->entries);
struct block_hist *bh = &block_report->hist;
struct hist_entry *he;
init_block_hist(bh, block_report->fmts);
while (next) {
he = rb_entry(next, struct hist_entry, rb_node);
block_info__process_sym(he, bh, &block_report->cycles,
total_cycles);
next = rb_next(&he->rb_node);
}
for (int i = 0; i < PERF_HPP_REPORT__BLOCK_MAX_INDEX; i++) {
block_report->fmts[i].total_cycles = total_cycles;
block_report->fmts[i].block_cycles = block_report->cycles;
}
hists__output_resort(&bh->block_hists, NULL);
}
struct block_report *block_info__create_report(struct evlist *evlist,
u64 total_cycles)
{
struct block_report *block_reports;
int nr_hists = evlist->core.nr_entries, i = 0;
struct evsel *pos;
block_reports = calloc(nr_hists, sizeof(struct block_report));
if (!block_reports)
return NULL;
evlist__for_each_entry(evlist, pos) {
struct hists *hists = evsel__hists(pos);
process_block_report(hists, &block_reports[i], total_cycles);
i++;
}
return block_reports;
}
perf report: Sort by sampled cycles percent per block for stdio It would be useful to support sorting for all blocks by the sampled cycles percent per block. This is useful to concentrate on the globally hottest blocks. This patch implements a new option "--total-cycles" which sorts all blocks by 'Sampled Cycles%'. The 'Sampled Cycles%' is the percent: percent = block sampled cycles aggregation / total sampled cycles Note that, this patch only supports "--stdio" mode. For example, # perf record -b ./div # perf report --total-cycles --stdio # To display the perf.data header info, please use --header/--header-only options. # # Total Lost Samples: 0 # # Samples: 2M of event 'cycles' # Event count (approx.): 2753248 # # Sampled Cycles% Sampled Cycles Avg Cycles% Avg Cycles [Program Block Range] Shared Object # ............... .............. ........... .......... ................................................ ................. # 26.04% 2.8M 0.40% 18 [div.c:42 -> div.c:39] div 15.17% 1.2M 0.16% 7 [random_r.c:357 -> random_r.c:380] libc-2.27.so 5.11% 402.0K 0.04% 2 [div.c:27 -> div.c:28] div 4.87% 381.6K 0.04% 2 [random.c:288 -> random.c:291] libc-2.27.so 4.53% 381.0K 0.04% 2 [div.c:40 -> div.c:40] div 3.85% 300.9K 0.02% 1 [div.c:22 -> div.c:25] div 3.08% 241.1K 0.02% 1 [rand.c:26 -> rand.c:27] libc-2.27.so 3.06% 240.0K 0.02% 1 [random.c:291 -> random.c:291] libc-2.27.so 2.78% 215.7K 0.02% 1 [random.c:298 -> random.c:298] libc-2.27.so 2.52% 198.3K 0.02% 1 [random.c:293 -> random.c:293] libc-2.27.so 2.36% 184.8K 0.02% 1 [rand.c:28 -> rand.c:28] libc-2.27.so 2.33% 180.5K 0.02% 1 [random.c:295 -> random.c:295] libc-2.27.so 2.28% 176.7K 0.02% 1 [random.c:295 -> random.c:295] libc-2.27.so 2.20% 168.8K 0.02% 1 [rand@plt+0 -> rand@plt+0] div 1.98% 158.2K 0.02% 1 [random_r.c:388 -> random_r.c:388] libc-2.27.so 1.57% 123.3K 0.02% 1 [div.c:42 -> div.c:44] div 1.44% 116.0K 0.42% 19 [random_r.c:357 -> random_r.c:394] libc-2.27.so 0.25% 182.5K 0.02% 1 [random_r.c:388 -> random_r.c:391] libc-2.27.so 0.00% 48 1.07% 48 [x86_pmu_enable+284 -> x86_pmu_enable+298] [kernel.kallsyms] 0.00% 74 1.64% 74 [vm_mmap_pgoff+0 -> vm_mmap_pgoff+92] [kernel.kallsyms] 0.00% 73 1.62% 73 [vm_mmap+0 -> vm_mmap+48] [kernel.kallsyms] 0.00% 63 0.69% 31 [up_write+0 -> up_write+34] [kernel.kallsyms] 0.00% 13 0.29% 13 [setup_arg_pages+396 -> setup_arg_pages+413] [kernel.kallsyms] 0.00% 3 0.07% 3 [setup_arg_pages+418 -> setup_arg_pages+450] [kernel.kallsyms] 0.00% 616 6.84% 308 [security_mmap_file+0 -> security_mmap_file+72] [kernel.kallsyms] 0.00% 23 0.51% 23 [security_mmap_file+77 -> security_mmap_file+87] [kernel.kallsyms] 0.00% 4 0.02% 1 [sched_clock+0 -> sched_clock+4] [kernel.kallsyms] 0.00% 4 0.02% 1 [sched_clock+9 -> sched_clock+12] [kernel.kallsyms] 0.00% 1 0.02% 1 [rcu_nmi_exit+0 -> rcu_nmi_exit+9] [kernel.kallsyms] Committer testing: This should provide material for hours of endless joy, both from looking for suspicious things in the implementation of this patch, such as the top one: # Sampled Cycles% Sampled Cycles Avg Cycles% Avg Cycles [Program Block Range] Shared Object 2.17% 1.7M 0.08% 607 [compiler.h:199 -> common.c:221] [kernel.vmlinux] As well from things that look legit: # Sampled Cycles% Sampled Cycles Avg Cycles% Avg Cycles [Program Block Range] Shared Object 0.16% 123.0K 0.60% 4.7K [nospec-branch.h:265 -> nospec-branch.h:278] [kernel.vmlinux] :-) Very short system wide taken branches session: # perf record -h -b Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -b, --branch-any sample any taken branches # # perf record -b ^C[ perf record: Woken up 595 times to write data ] [ perf record: Captured and wrote 156.672 MB perf.data (196873 samples) ] # # perf evlist -v cycles: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CPU|PERIOD|BRANCH_STACK, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, precise_ip: 3, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1, branch_sample_type: ANY # # perf report --total-cycles --stdio # To display the perf.data header info, please use --header/--header-only options. # # Total Lost Samples: 0 # # Samples: 6M of event 'cycles' # Event count (approx.): 6299936 # # Sampled Cycles% Sampled Cycles Avg Cycles% Avg Cycles [Program Block Range] Shared Object # ............... .............. ........... .......... ...................................................................... .................... # 2.17% 1.7M 0.08% 607 [compiler.h:199 -> common.c:221] [kernel.vmlinux] 1.75% 1.3M 8.34% 65.5K [memset-vec-unaligned-erms.S:147 -> memset-vec-unaligned-erms.S:151] libc-2.29.so 0.72% 544.5K 0.03% 230 [entry_64.S:657 -> entry_64.S:662] [kernel.vmlinux] 0.56% 541.8K 0.09% 672 [compiler.h:199 -> common.c:300] [kernel.vmlinux] 0.39% 293.2K 0.01% 104 [list_debug.c:43 -> list_debug.c:61] [kernel.vmlinux] 0.36% 278.6K 0.03% 272 [entry_64.S:1289 -> entry_64.S:1308] [kernel.vmlinux] 0.30% 260.8K 0.07% 564 [clear_page_64.S:47 -> clear_page_64.S:50] [kernel.vmlinux] 0.28% 215.3K 0.05% 369 [traps.c:623 -> traps.c:628] [kernel.vmlinux] 0.23% 178.1K 0.04% 278 [entry_64.S:271 -> entry_64.S:275] [kernel.vmlinux] 0.20% 152.6K 0.09% 706 [paravirt.c:177 -> paravirt.c:179] [kernel.vmlinux] 0.20% 155.8K 0.05% 373 [entry_64.S:153 -> entry_64.S:175] [kernel.vmlinux] 0.18% 136.6K 0.03% 222 [msr.h:105 -> msr.h:166] [kernel.vmlinux] 0.16% 123.0K 0.60% 4.7K [nospec-branch.h:265 -> nospec-branch.h:278] [kernel.vmlinux] 0.16% 118.3K 0.01% 44 [entry_64.S:632 -> entry_64.S:657] [kernel.vmlinux] 0.14% 104.5K 0.00% 28 [rwsem.c:1541 -> rwsem.c:1544] [kernel.vmlinux] 0.13% 99.2K 0.01% 53 [spinlock.c:150 -> spinlock.c:152] [kernel.vmlinux] 0.13% 95.5K 0.00% 35 [swap.c:456 -> swap.c:471] [kernel.vmlinux] 0.12% 96.2K 0.05% 407 [copy_user_64.S:175 -> copy_user_64.S:209] [kernel.vmlinux] 0.11% 85.9K 0.00% 31 [swap.c:400 -> page-flags.h:188] [kernel.vmlinux] 0.10% 73.0K 0.01% 52 [paravirt.h:763 -> list.h:131] [kernel.vmlinux] 0.07% 56.2K 0.03% 214 [filemap.c:1524 -> filemap.c:1557] [kernel.vmlinux] 0.07% 54.2K 0.02% 145 [memory.c:1032 -> memory.c:1049] [kernel.vmlinux] 0.07% 50.3K 0.00% 39 [mmzone.c:49 -> mmzone.c:69] [kernel.vmlinux] 0.06% 48.3K 0.01% 40 [paravirt.h:768 -> page_alloc.c:3304] [kernel.vmlinux] 0.06% 46.7K 0.02% 155 [memory.c:1032 -> memory.c:1056] [kernel.vmlinux] 0.06% 46.9K 0.01% 103 [swap.c:867 -> swap.c:902] [kernel.vmlinux] 0.06% 47.8K 0.00% 34 [entry_64.S:1201 -> entry_64.S:1202] [kernel.vmlinux] ----------------------------------------------------------- v7: --- Use use_browser in report__browse_block_hists for supporting stdio and potential tui mode. v6: --- Create report__browse_block_hists in block-info.c (codes are moved from builtin-report.c). It's called from perf_evlist__tty_browse_hists. v5: --- 1. Move all block functions to block-info.c 2. Move the code of setting ms in block hist_entry to other patch. v4: --- 1. Use new option '--total-cycles' to replace '-s total_cycles' in v3. 2. Move block info collection out of block info printing. v3: --- 1. Use common function block_info__process_sym to process the blocks per symbol. 2. Remove the nasty hack for skipping calculation of column length 3. Some minor cleanup Signed-off-by: Jin Yao <yao.jin@linux.intel.com> Reviewed-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Jin Yao <yao.jin@intel.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/20191107074719.26139-6-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-11-07 10:47:17 +03:00
int report__browse_block_hists(struct block_hist *bh, float min_percent,
struct evsel *evsel __maybe_unused)
{
switch (use_browser) {
case 0:
symbol_conf.report_individual_block = true;
hists__fprintf(&bh->block_hists, true, 0, 0, min_percent,
stdout, true);
hists__delete_entries(&bh->block_hists);
return 0;
default:
return -1;
}
return 0;
}