perf stat: Use affinity for reading

Restructure event reading to use affinity to minimize the number of IPIs
needed.

Before on a large test case with 94 CPUs:

  % time     seconds  usecs/call     calls    errors syscall
  ------ ----------- ----------- --------- --------- ----------------
    3.16    0.106079           4     22082           read

After:

    3.43    0.081295           3     22082           read

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lore.kernel.org/lkml/20191121001522.180827-11-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Andi Kleen 2019-11-20 16:15:20 -08:00 коммит произвёл Arnaldo Carvalho de Melo
Родитель 4804e01116
Коммит 4b49ab708d
2 изменённых файлов: 56 добавлений и 40 удалений

Просмотреть файл

@ -266,15 +266,10 @@ static int read_single_counter(struct evsel *counter, int cpu,
* Read out the results of a single counter:
* do not aggregate counts across CPUs in system-wide mode
*/
static int read_counter(struct evsel *counter, struct timespec *rs)
static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
{
int nthreads = perf_thread_map__nr(evsel_list->core.threads);
int ncpus, cpu, thread;
if (target__has_cpu(&target) && !target__has_per_thread(&target))
ncpus = perf_evsel__nr_cpus(counter);
else
ncpus = 1;
int thread;
if (!counter->supported)
return -ENOENT;
@ -283,39 +278,37 @@ static int read_counter(struct evsel *counter, struct timespec *rs)
nthreads = 1;
for (thread = 0; thread < nthreads; thread++) {
for (cpu = 0; cpu < ncpus; cpu++) {
struct perf_counts_values *count;
struct perf_counts_values *count;
count = perf_counts(counter->counts, cpu, thread);
count = perf_counts(counter->counts, cpu, thread);
/*
* The leader's group read loads data into its group members
* (via perf_evsel__read_counter) and sets threir count->loaded.
*/
if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
read_single_counter(counter, cpu, thread, rs)) {
counter->counts->scaled = -1;
perf_counts(counter->counts, cpu, thread)->ena = 0;
perf_counts(counter->counts, cpu, thread)->run = 0;
/*
* The leader's group read loads data into its group members
* (via perf_evsel__read_counter()) and sets their count->loaded.
*/
if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
read_single_counter(counter, cpu, thread, rs)) {
counter->counts->scaled = -1;
perf_counts(counter->counts, cpu, thread)->ena = 0;
perf_counts(counter->counts, cpu, thread)->run = 0;
return -1;
}
perf_counts__set_loaded(counter->counts, cpu, thread, false);
if (STAT_RECORD) {
if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
pr_err("failed to write stat event\n");
return -1;
}
}
perf_counts__set_loaded(counter->counts, cpu, thread, false);
if (STAT_RECORD) {
if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
pr_err("failed to write stat event\n");
return -1;
}
}
if (verbose > 1) {
fprintf(stat_config.output,
"%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
perf_evsel__name(counter),
cpu,
count->val, count->ena, count->run);
}
if (verbose > 1) {
fprintf(stat_config.output,
"%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
perf_evsel__name(counter),
cpu,
count->val, count->ena, count->run);
}
}
@ -325,15 +318,37 @@ static int read_counter(struct evsel *counter, struct timespec *rs)
static void read_counters(struct timespec *rs)
{
struct evsel *counter;
int ret;
struct affinity affinity;
int i, ncpus, cpu;
if (affinity__setup(&affinity) < 0)
return;
ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
if (!target__has_cpu(&target) || target__has_per_thread(&target))
ncpus = 1;
evlist__for_each_cpu(evsel_list, i, cpu) {
if (i >= ncpus)
break;
affinity__set(&affinity, cpu);
evlist__for_each_entry(evsel_list, counter) {
if (evsel__cpu_iter_skip(counter, cpu))
continue;
if (!counter->err) {
counter->err = read_counter_cpu(counter, rs,
counter->cpu_iter - 1);
}
}
}
affinity__cleanup(&affinity);
evlist__for_each_entry(evsel_list, counter) {
ret = read_counter(counter, rs);
if (ret)
if (counter->err)
pr_debug("failed to read counter %s\n", counter->name);
if (ret == 0 && perf_stat_process_counter(&stat_config, counter))
if (counter->err == 0 && perf_stat_process_counter(&stat_config, counter))
pr_warning("failed to process counter %s\n", counter->name);
counter->err = 0;
}
}

Просмотреть файл

@ -86,6 +86,7 @@ struct evsel {
struct list_head config_terms;
struct bpf_object *bpf_obj;
int bpf_fd;
int err;
bool auto_merge_stats;
bool merged_stat;
const char * metric_expr;