2009-04-20 17:37:32 +04:00
|
|
|
/*
|
2009-06-03 01:37:05 +04:00
|
|
|
* builtin-stat.c
|
|
|
|
*
|
|
|
|
* Builtin stat command: Give a precise performance counters summary
|
|
|
|
* overview about any workload, CPU or specific PID.
|
|
|
|
*
|
|
|
|
* Sample output:
|
2009-04-20 17:37:32 +04:00
|
|
|
|
2009-06-03 01:37:05 +04:00
|
|
|
$ perf stat ~/hackbench 10
|
|
|
|
Time: 0.104
|
2009-04-20 17:37:32 +04:00
|
|
|
|
2009-06-03 01:37:05 +04:00
|
|
|
Performance counter stats for '/home/mingo/hackbench':
|
2009-04-20 17:37:32 +04:00
|
|
|
|
2009-06-03 01:37:05 +04:00
|
|
|
1255.538611 task clock ticks # 10.143 CPU utilization factor
|
|
|
|
54011 context switches # 0.043 M/sec
|
|
|
|
385 CPU migrations # 0.000 M/sec
|
|
|
|
17755 pagefaults # 0.014 M/sec
|
|
|
|
3808323185 CPU cycles # 3033.219 M/sec
|
|
|
|
1575111190 instructions # 1254.530 M/sec
|
|
|
|
17367895 cache references # 13.833 M/sec
|
|
|
|
7674421 cache misses # 6.112 M/sec
|
2009-04-20 17:37:32 +04:00
|
|
|
|
2009-06-03 01:37:05 +04:00
|
|
|
Wall-clock time elapsed: 123.786620 msecs
|
2009-04-20 17:37:32 +04:00
|
|
|
|
2009-05-26 11:17:18 +04:00
|
|
|
*
|
|
|
|
* Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
|
|
|
|
*
|
|
|
|
* Improvements and fixes by:
|
|
|
|
*
|
|
|
|
* Arjan van de Ven <arjan@linux.intel.com>
|
|
|
|
* Yanmin Zhang <yanmin.zhang@intel.com>
|
|
|
|
* Wu Fengguang <fengguang.wu@intel.com>
|
|
|
|
* Mike Galbraith <efault@gmx.de>
|
|
|
|
* Paul Mackerras <paulus@samba.org>
|
2009-06-27 01:32:07 +04:00
|
|
|
* Jaswinder Singh Rajput <jaswinder@kernel.org>
|
2009-05-26 11:17:18 +04:00
|
|
|
*
|
|
|
|
* Released under the GPL v2. (and only v2, not any later version)
|
2009-04-20 17:37:32 +04:00
|
|
|
*/
|
|
|
|
|
2009-05-23 20:28:58 +04:00
|
|
|
#include "perf.h"
|
2009-05-27 11:10:38 +04:00
|
|
|
#include "builtin.h"
|
2009-04-27 10:02:14 +04:00
|
|
|
#include "util/util.h"
|
2009-05-26 11:17:18 +04:00
|
|
|
#include "util/parse-options.h"
|
|
|
|
#include "util/parse-events.h"
|
2009-08-17 00:05:48 +04:00
|
|
|
#include "util/event.h"
|
|
|
|
#include "util/debug.h"
|
2009-04-20 17:37:32 +04:00
|
|
|
|
|
|
|
#include <sys/prctl.h>
|
2009-06-13 16:57:28 +04:00
|
|
|
#include <math.h>
|
2009-05-05 19:50:27 +04:00
|
|
|
|
2009-06-27 22:19:09 +04:00
|
|
|
static struct perf_counter_attr default_attrs[] = {
|
2009-04-20 17:37:32 +04:00
|
|
|
|
2009-06-11 16:06:28 +04:00
|
|
|
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
|
|
|
|
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES},
|
|
|
|
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
|
|
|
|
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
|
|
|
|
|
|
|
|
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
|
|
|
|
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
|
|
|
|
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES},
|
|
|
|
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES },
|
|
|
|
|
2009-04-20 17:37:32 +04:00
|
|
|
};
|
2009-05-26 11:17:18 +04:00
|
|
|
|
2009-06-24 16:49:34 +04:00
|
|
|
#define MAX_RUN 100
|
|
|
|
|
2009-06-06 11:58:57 +04:00
|
|
|
static int system_wide = 0;
|
2009-07-01 14:37:06 +04:00
|
|
|
static unsigned int nr_cpus = 0;
|
2009-06-24 16:49:34 +04:00
|
|
|
static int run_idx = 0;
|
2009-04-20 17:37:32 +04:00
|
|
|
|
2009-06-24 16:49:34 +04:00
|
|
|
static int run_count = 1;
|
|
|
|
static int inherit = 1;
|
2009-04-30 15:53:33 +04:00
|
|
|
static int scale = 1;
|
2009-06-24 16:49:34 +04:00
|
|
|
static int target_pid = -1;
|
2009-06-27 08:10:30 +04:00
|
|
|
static int null_run = 0;
|
2009-04-20 17:37:32 +04:00
|
|
|
|
2009-06-24 16:49:34 +04:00
|
|
|
static int fd[MAX_NR_CPUS][MAX_COUNTERS];
|
2009-06-13 16:57:28 +04:00
|
|
|
|
perf_counter tools: Define and use our own u64, s64 etc. definitions
On 64-bit powerpc, __u64 is defined to be unsigned long rather than
unsigned long long. This causes compiler warnings every time we
print a __u64 value with %Lx.
Rather than changing __u64, we define our own u64 to be unsigned long
long on all architectures, and similarly s64 as signed long long.
For consistency we also define u32, s32, u16, s16, u8 and s8. These
definitions are put in a new header, types.h, because these definitions
are needed in util/string.h and util/symbol.h.
The main change here is the mechanical change of __[us]{64,32,16,8}
to remove the "__". The other changes are:
* Create types.h
* Include types.h in perf.h, util/string.h and util/symbol.h
* Add types.h to the LIB_H definition in Makefile
* Added (u64) casts in process_overflow_event() and print_sym_table()
to kill two remaining warnings.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: benh@kernel.crashing.org
LKML-Reference: <19003.33494.495844.956580@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-19 16:21:42 +04:00
|
|
|
static u64 runtime_nsecs[MAX_RUN];
|
|
|
|
static u64 walltime_nsecs[MAX_RUN];
|
|
|
|
static u64 runtime_cycles[MAX_RUN];
|
2009-06-13 16:57:28 +04:00
|
|
|
|
2009-06-24 16:49:34 +04:00
|
|
|
static u64 event_res[MAX_RUN][MAX_COUNTERS][3];
|
|
|
|
static u64 event_scaled[MAX_RUN][MAX_COUNTERS];
|
|
|
|
|
2009-09-04 17:36:12 +04:00
|
|
|
struct stats
|
|
|
|
{
|
|
|
|
double sum;
|
|
|
|
double sum_sq;
|
|
|
|
};
|
2009-06-13 16:57:28 +04:00
|
|
|
|
2009-09-04 17:36:12 +04:00
|
|
|
static double avg_stats(struct stats *stats)
|
|
|
|
{
|
|
|
|
return stats->sum / run_count;
|
|
|
|
}
|
2009-06-13 16:57:28 +04:00
|
|
|
|
2009-09-04 17:36:12 +04:00
|
|
|
/*
|
|
|
|
* stddev = sqrt(1/N (\Sum n_i^2) - avg(n)^2)
|
|
|
|
*/
|
|
|
|
static double stddev_stats(struct stats *stats)
|
|
|
|
{
|
|
|
|
double avg = stats->sum / run_count;
|
2009-06-13 16:57:28 +04:00
|
|
|
|
2009-09-04 17:36:12 +04:00
|
|
|
return sqrt(stats->sum_sq/run_count - avg*avg);
|
|
|
|
}
|
2009-06-13 16:57:28 +04:00
|
|
|
|
2009-09-04 17:36:12 +04:00
|
|
|
struct stats event_res_stats[MAX_COUNTERS][3];
|
|
|
|
struct stats event_scaled_stats[MAX_COUNTERS];
|
|
|
|
struct stats runtime_nsecs_stats;
|
|
|
|
struct stats walltime_nsecs_stats;
|
|
|
|
struct stats runtime_cycles_stats;
|
2009-05-29 11:10:54 +04:00
|
|
|
|
2009-07-01 13:35:09 +04:00
|
|
|
#define MATCH_EVENT(t, c, counter) \
|
|
|
|
(attrs[counter].type == PERF_TYPE_##t && \
|
|
|
|
attrs[counter].config == PERF_COUNT_##c)
|
|
|
|
|
2009-06-23 15:42:49 +04:00
|
|
|
#define ERR_PERF_OPEN \
|
|
|
|
"Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n"
|
|
|
|
|
2009-06-29 15:13:21 +04:00
|
|
|
static void create_perf_stat_counter(int counter, int pid)
|
2009-04-20 17:37:32 +04:00
|
|
|
{
|
2009-06-06 11:58:57 +04:00
|
|
|
struct perf_counter_attr *attr = attrs + counter;
|
2009-05-05 19:50:27 +04:00
|
|
|
|
2009-04-20 17:37:32 +04:00
|
|
|
if (scale)
|
2009-06-06 11:58:57 +04:00
|
|
|
attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
|
|
|
|
PERF_FORMAT_TOTAL_TIME_RUNNING;
|
2009-04-20 17:37:32 +04:00
|
|
|
|
|
|
|
if (system_wide) {
|
2009-07-01 14:37:06 +04:00
|
|
|
unsigned int cpu;
|
|
|
|
|
2009-06-23 15:42:49 +04:00
|
|
|
for (cpu = 0; cpu < nr_cpus; cpu++) {
|
2009-06-06 11:58:57 +04:00
|
|
|
fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0);
|
2009-06-23 15:42:49 +04:00
|
|
|
if (fd[cpu][counter] < 0 && verbose)
|
|
|
|
fprintf(stderr, ERR_PERF_OPEN, counter,
|
|
|
|
fd[cpu][counter], strerror(errno));
|
2009-04-20 17:37:32 +04:00
|
|
|
}
|
|
|
|
} else {
|
2009-06-30 10:07:19 +04:00
|
|
|
attr->inherit = inherit;
|
|
|
|
attr->disabled = 1;
|
|
|
|
attr->enable_on_exec = 1;
|
2009-04-20 17:37:32 +04:00
|
|
|
|
2009-06-29 15:13:21 +04:00
|
|
|
fd[0][counter] = sys_perf_counter_open(attr, pid, -1, -1, 0);
|
2009-06-23 15:42:49 +04:00
|
|
|
if (fd[0][counter] < 0 && verbose)
|
|
|
|
fprintf(stderr, ERR_PERF_OPEN, counter,
|
|
|
|
fd[0][counter], strerror(errno));
|
2009-04-20 17:37:32 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-05-29 11:10:54 +04:00
|
|
|
/*
|
|
|
|
* Does the counter have nsecs as a unit?
|
|
|
|
*/
|
|
|
|
static inline int nsec_counter(int counter)
|
|
|
|
{
|
2009-07-01 13:35:09 +04:00
|
|
|
if (MATCH_EVENT(SOFTWARE, SW_CPU_CLOCK, counter) ||
|
|
|
|
MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
|
2009-05-29 11:10:54 +04:00
|
|
|
return 1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2009-05-29 11:10:54 +04:00
|
|
|
* Read out the results of a single counter:
|
2009-05-29 11:10:54 +04:00
|
|
|
*/
|
2009-05-29 11:10:54 +04:00
|
|
|
static void read_counter(int counter)
|
2009-05-29 11:10:54 +04:00
|
|
|
{
|
perf_counter tools: Define and use our own u64, s64 etc. definitions
On 64-bit powerpc, __u64 is defined to be unsigned long rather than
unsigned long long. This causes compiler warnings every time we
print a __u64 value with %Lx.
Rather than changing __u64, we define our own u64 to be unsigned long
long on all architectures, and similarly s64 as signed long long.
For consistency we also define u32, s32, u16, s16, u8 and s8. These
definitions are put in a new header, types.h, because these definitions
are needed in util/string.h and util/symbol.h.
The main change here is the mechanical change of __[us]{64,32,16,8}
to remove the "__". The other changes are:
* Create types.h
* Include types.h in perf.h, util/string.h and util/symbol.h
* Add types.h to the LIB_H definition in Makefile
* Added (u64) casts in process_overflow_event() and print_sym_table()
to kill two remaining warnings.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: benh@kernel.crashing.org
LKML-Reference: <19003.33494.495844.956580@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-19 16:21:42 +04:00
|
|
|
u64 *count, single_count[3];
|
2009-07-01 14:37:06 +04:00
|
|
|
unsigned int cpu;
|
|
|
|
size_t res, nv;
|
2009-05-29 11:10:54 +04:00
|
|
|
int scaled;
|
|
|
|
|
2009-06-13 16:57:28 +04:00
|
|
|
count = event_res[run_idx][counter];
|
2009-05-29 11:10:54 +04:00
|
|
|
|
2009-05-29 11:10:54 +04:00
|
|
|
count[0] = count[1] = count[2] = 0;
|
2009-05-29 11:10:54 +04:00
|
|
|
|
2009-05-29 11:10:54 +04:00
|
|
|
nv = scale ? 3 : 1;
|
2009-06-23 15:42:49 +04:00
|
|
|
for (cpu = 0; cpu < nr_cpus; cpu++) {
|
2009-06-07 19:06:46 +04:00
|
|
|
if (fd[cpu][counter] < 0)
|
|
|
|
continue;
|
|
|
|
|
perf_counter tools: Define and use our own u64, s64 etc. definitions
On 64-bit powerpc, __u64 is defined to be unsigned long rather than
unsigned long long. This causes compiler warnings every time we
print a __u64 value with %Lx.
Rather than changing __u64, we define our own u64 to be unsigned long
long on all architectures, and similarly s64 as signed long long.
For consistency we also define u32, s32, u16, s16, u8 and s8. These
definitions are put in a new header, types.h, because these definitions
are needed in util/string.h and util/symbol.h.
The main change here is the mechanical change of __[us]{64,32,16,8}
to remove the "__". The other changes are:
* Create types.h
* Include types.h in perf.h, util/string.h and util/symbol.h
* Add types.h to the LIB_H definition in Makefile
* Added (u64) casts in process_overflow_event() and print_sym_table()
to kill two remaining warnings.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: benh@kernel.crashing.org
LKML-Reference: <19003.33494.495844.956580@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-19 16:21:42 +04:00
|
|
|
res = read(fd[cpu][counter], single_count, nv * sizeof(u64));
|
|
|
|
assert(res == nv * sizeof(u64));
|
2009-07-01 14:37:06 +04:00
|
|
|
|
2009-06-13 16:57:28 +04:00
|
|
|
close(fd[cpu][counter]);
|
|
|
|
fd[cpu][counter] = -1;
|
2009-05-29 11:10:54 +04:00
|
|
|
|
|
|
|
count[0] += single_count[0];
|
|
|
|
if (scale) {
|
|
|
|
count[1] += single_count[1];
|
|
|
|
count[2] += single_count[2];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
scaled = 0;
|
|
|
|
if (scale) {
|
|
|
|
if (count[2] == 0) {
|
2009-06-13 16:57:28 +04:00
|
|
|
event_scaled[run_idx][counter] = -1;
|
2009-05-29 11:10:54 +04:00
|
|
|
count[0] = 0;
|
2009-05-29 11:10:54 +04:00
|
|
|
return;
|
|
|
|
}
|
2009-05-29 11:10:54 +04:00
|
|
|
|
2009-05-29 11:10:54 +04:00
|
|
|
if (count[2] < count[1]) {
|
2009-06-13 16:57:28 +04:00
|
|
|
event_scaled[run_idx][counter] = 1;
|
2009-05-29 11:10:54 +04:00
|
|
|
count[0] = (unsigned long long)
|
|
|
|
((double)count[0] * count[1] / count[2] + 0.5);
|
|
|
|
}
|
|
|
|
}
|
2009-05-29 11:10:54 +04:00
|
|
|
/*
|
|
|
|
* Save the full runtime - to allow normalization during printout:
|
|
|
|
*/
|
2009-07-01 13:35:09 +04:00
|
|
|
if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
|
2009-06-13 16:57:28 +04:00
|
|
|
runtime_nsecs[run_idx] = count[0];
|
2009-07-01 13:35:09 +04:00
|
|
|
if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter))
|
2009-06-13 16:57:28 +04:00
|
|
|
runtime_cycles[run_idx] = count[0];
|
2009-05-29 11:10:54 +04:00
|
|
|
}
|
|
|
|
|
2009-07-01 14:37:06 +04:00
|
|
|
static int run_perf_stat(int argc __used, const char **argv)
|
2009-06-13 16:57:28 +04:00
|
|
|
{
|
|
|
|
unsigned long long t0, t1;
|
|
|
|
int status = 0;
|
|
|
|
int counter;
|
|
|
|
int pid;
|
2009-06-29 15:13:21 +04:00
|
|
|
int child_ready_pipe[2], go_pipe[2];
|
|
|
|
char buf;
|
2009-06-13 16:57:28 +04:00
|
|
|
|
|
|
|
if (!system_wide)
|
|
|
|
nr_cpus = 1;
|
|
|
|
|
2009-06-29 15:13:21 +04:00
|
|
|
if (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0) {
|
|
|
|
perror("failed to create pipes");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((pid = fork()) < 0)
|
|
|
|
perror("failed to fork");
|
|
|
|
|
|
|
|
if (!pid) {
|
|
|
|
close(child_ready_pipe[0]);
|
|
|
|
close(go_pipe[1]);
|
|
|
|
fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Do a dummy execvp to get the PLT entry resolved,
|
|
|
|
* so we avoid the resolver overhead on the real
|
|
|
|
* execvp call.
|
|
|
|
*/
|
|
|
|
execvp("", (char **)argv);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Tell the parent we're ready to go
|
|
|
|
*/
|
|
|
|
close(child_ready_pipe[1]);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Wait until the parent tells us to go.
|
|
|
|
*/
|
2009-07-01 23:02:10 +04:00
|
|
|
if (read(go_pipe[0], &buf, 1) == -1)
|
|
|
|
perror("unable to read pipe");
|
2009-06-29 15:13:21 +04:00
|
|
|
|
|
|
|
execvp(argv[0], (char **)argv);
|
|
|
|
|
|
|
|
perror(argv[0]);
|
|
|
|
exit(-1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Wait for the child to be ready to exec.
|
|
|
|
*/
|
|
|
|
close(child_ready_pipe[1]);
|
|
|
|
close(go_pipe[0]);
|
2009-07-01 23:02:10 +04:00
|
|
|
if (read(child_ready_pipe[0], &buf, 1) == -1)
|
|
|
|
perror("unable to read pipe");
|
2009-06-29 15:13:21 +04:00
|
|
|
close(child_ready_pipe[0]);
|
|
|
|
|
2009-06-13 16:57:28 +04:00
|
|
|
for (counter = 0; counter < nr_counters; counter++)
|
2009-06-29 15:13:21 +04:00
|
|
|
create_perf_stat_counter(counter, pid);
|
2009-06-13 16:57:28 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Enable counters and exec the command:
|
|
|
|
*/
|
|
|
|
t0 = rdclock();
|
|
|
|
|
2009-06-29 15:13:21 +04:00
|
|
|
close(go_pipe[1]);
|
2009-06-13 16:57:28 +04:00
|
|
|
wait(&status);
|
|
|
|
|
|
|
|
t1 = rdclock();
|
|
|
|
|
|
|
|
walltime_nsecs[run_idx] = t1 - t0;
|
|
|
|
|
|
|
|
for (counter = 0; counter < nr_counters; counter++)
|
|
|
|
read_counter(counter);
|
|
|
|
|
|
|
|
return WEXITSTATUS(status);
|
|
|
|
}
|
|
|
|
|
2009-09-04 17:36:12 +04:00
|
|
|
static void print_noise(double avg, double stddev)
|
2009-06-13 16:57:28 +04:00
|
|
|
{
|
|
|
|
if (run_count > 1)
|
2009-09-04 17:36:12 +04:00
|
|
|
fprintf(stderr, " ( +- %7.3f%% )", 100*stddev / avg);
|
2009-06-13 16:57:28 +04:00
|
|
|
}
|
|
|
|
|
2009-09-04 17:36:12 +04:00
|
|
|
static void nsec_printout(int counter, double avg, double stddev)
|
2009-06-13 15:35:00 +04:00
|
|
|
{
|
2009-09-04 17:36:12 +04:00
|
|
|
double msecs = avg / 1e6;
|
2009-06-13 15:35:00 +04:00
|
|
|
|
2009-06-27 01:32:07 +04:00
|
|
|
fprintf(stderr, " %14.6f %-24s", msecs, event_name(counter));
|
2009-06-13 15:35:00 +04:00
|
|
|
|
2009-07-01 13:35:09 +04:00
|
|
|
if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) {
|
2009-09-04 17:36:12 +04:00
|
|
|
fprintf(stderr, " # %10.3f CPUs ",
|
|
|
|
avg / avg_stats(&walltime_nsecs_stats));
|
2009-06-13 15:35:00 +04:00
|
|
|
}
|
2009-09-04 17:36:12 +04:00
|
|
|
print_noise(avg, stddev);
|
2009-06-13 15:35:00 +04:00
|
|
|
}
|
|
|
|
|
2009-09-04 17:36:12 +04:00
|
|
|
static void abs_printout(int counter, double avg, double stddev)
|
2009-06-13 15:35:00 +04:00
|
|
|
{
|
2009-09-04 17:36:12 +04:00
|
|
|
fprintf(stderr, " %14.0f %-24s", avg, event_name(counter));
|
2009-06-13 15:35:00 +04:00
|
|
|
|
2009-09-04 17:36:12 +04:00
|
|
|
if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) {
|
2009-06-13 16:57:28 +04:00
|
|
|
fprintf(stderr, " # %10.3f IPC ",
|
2009-09-04 17:36:12 +04:00
|
|
|
avg / avg_stats(&runtime_cycles_stats));
|
2009-06-13 16:57:28 +04:00
|
|
|
} else {
|
2009-09-04 17:36:12 +04:00
|
|
|
fprintf(stderr, " # %10.3f M/sec",
|
|
|
|
1000.0 * avg / avg_stats(&runtime_nsecs_stats));
|
2009-06-13 15:35:00 +04:00
|
|
|
}
|
2009-09-04 17:36:12 +04:00
|
|
|
print_noise(avg, stddev);
|
2009-06-13 15:35:00 +04:00
|
|
|
}
|
|
|
|
|
2009-05-29 11:10:54 +04:00
|
|
|
/*
|
|
|
|
* Print out the results of a single counter:
|
|
|
|
*/
|
|
|
|
static void print_counter(int counter)
|
|
|
|
{
|
2009-09-04 17:36:12 +04:00
|
|
|
double avg, stddev;
|
2009-05-29 11:10:54 +04:00
|
|
|
int scaled;
|
|
|
|
|
2009-09-04 17:36:12 +04:00
|
|
|
avg = avg_stats(&event_res_stats[counter][0]);
|
|
|
|
stddev = stddev_stats(&event_res_stats[counter][0]);
|
|
|
|
scaled = avg_stats(&event_scaled_stats[counter]);
|
2009-05-29 11:10:54 +04:00
|
|
|
|
|
|
|
if (scaled == -1) {
|
2009-06-27 01:32:07 +04:00
|
|
|
fprintf(stderr, " %14s %-24s\n",
|
2009-05-29 11:10:54 +04:00
|
|
|
"<not counted>", event_name(counter));
|
|
|
|
return;
|
|
|
|
}
|
2009-05-29 11:10:54 +04:00
|
|
|
|
2009-06-13 15:35:00 +04:00
|
|
|
if (nsec_counter(counter))
|
2009-09-04 17:36:12 +04:00
|
|
|
nsec_printout(counter, avg, stddev);
|
2009-06-13 15:35:00 +04:00
|
|
|
else
|
2009-09-04 17:36:12 +04:00
|
|
|
abs_printout(counter, avg, stddev);
|
|
|
|
|
|
|
|
if (scaled) {
|
|
|
|
double avg_enabled, avg_running;
|
|
|
|
|
|
|
|
avg_enabled = avg_stats(&event_res_stats[counter][1]);
|
|
|
|
avg_running = avg_stats(&event_res_stats[counter][2]);
|
2009-05-30 14:38:51 +04:00
|
|
|
|
2009-06-29 23:50:54 +04:00
|
|
|
fprintf(stderr, " (scaled from %.2f%%)",
|
2009-09-04 17:36:12 +04:00
|
|
|
100 * avg_running / avg_enabled);
|
|
|
|
}
|
2009-06-13 15:35:00 +04:00
|
|
|
|
2009-05-29 11:10:54 +04:00
|
|
|
fprintf(stderr, "\n");
|
|
|
|
}
|
|
|
|
|
2009-09-04 17:36:12 +04:00
|
|
|
static void update_stats(const char *name, int idx, struct stats *stats, u64 *val)
|
2009-04-20 17:37:32 +04:00
|
|
|
{
|
2009-09-04 17:36:12 +04:00
|
|
|
double sq = *val;
|
2009-04-20 17:37:32 +04:00
|
|
|
|
2009-09-04 17:36:12 +04:00
|
|
|
stats->sum += *val;
|
|
|
|
stats->sum_sq += sq * sq;
|
2009-06-13 17:40:35 +04:00
|
|
|
|
|
|
|
if (verbose > 1)
|
|
|
|
fprintf(stderr, "debug: %20s[%d]: %Ld\n", name, idx, *val);
|
|
|
|
}
|
2009-09-04 17:36:12 +04:00
|
|
|
|
2009-06-13 16:57:28 +04:00
|
|
|
/*
|
|
|
|
* Calculate the averages and noises:
|
|
|
|
*/
|
|
|
|
static void calc_avg(void)
|
|
|
|
{
|
|
|
|
int i, j;
|
|
|
|
|
2009-06-13 17:40:35 +04:00
|
|
|
if (verbose > 1)
|
|
|
|
fprintf(stderr, "\n");
|
|
|
|
|
2009-06-13 16:57:28 +04:00
|
|
|
for (i = 0; i < run_count; i++) {
|
2009-09-04 17:36:12 +04:00
|
|
|
update_stats("runtime", 0, &runtime_nsecs_stats, runtime_nsecs + i);
|
|
|
|
update_stats("walltime", 0, &walltime_nsecs_stats, walltime_nsecs + i);
|
|
|
|
update_stats("runtime_cycles", 0, &runtime_cycles_stats, runtime_cycles + i);
|
2009-06-13 16:57:28 +04:00
|
|
|
|
|
|
|
for (j = 0; j < nr_counters; j++) {
|
2009-09-04 17:36:12 +04:00
|
|
|
update_stats("counter/0", j,
|
|
|
|
event_res_stats[j]+0, event_res[i][j]+0);
|
|
|
|
update_stats("counter/1", j,
|
|
|
|
event_res_stats[j]+1, event_res[i][j]+1);
|
|
|
|
update_stats("counter/2", j,
|
|
|
|
event_res_stats[j]+2, event_res[i][j]+2);
|
2009-07-01 14:37:06 +04:00
|
|
|
if (event_scaled[i][j] != (u64)-1)
|
2009-09-04 17:36:12 +04:00
|
|
|
update_stats("scaled", j,
|
|
|
|
event_scaled_stats + j, event_scaled[i]+j);
|
2009-06-13 16:57:28 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void print_stat(int argc, const char **argv)
|
|
|
|
{
|
|
|
|
int i, counter;
|
|
|
|
|
|
|
|
calc_avg();
|
2009-04-20 17:37:32 +04:00
|
|
|
|
|
|
|
fflush(stdout);
|
|
|
|
|
|
|
|
fprintf(stderr, "\n");
|
2009-06-03 21:36:07 +04:00
|
|
|
fprintf(stderr, " Performance counter stats for \'%s", argv[0]);
|
|
|
|
|
|
|
|
for (i = 1; i < argc; i++)
|
|
|
|
fprintf(stderr, " %s", argv[i]);
|
|
|
|
|
2009-06-13 16:57:28 +04:00
|
|
|
fprintf(stderr, "\'");
|
|
|
|
if (run_count > 1)
|
|
|
|
fprintf(stderr, " (%d runs)", run_count);
|
|
|
|
fprintf(stderr, ":\n\n");
|
2009-05-29 11:10:54 +04:00
|
|
|
|
2009-05-29 11:10:54 +04:00
|
|
|
for (counter = 0; counter < nr_counters; counter++)
|
|
|
|
print_counter(counter);
|
2009-04-20 17:37:32 +04:00
|
|
|
|
|
|
|
fprintf(stderr, "\n");
|
2009-06-27 08:24:32 +04:00
|
|
|
fprintf(stderr, " %14.9f seconds time elapsed",
|
2009-09-04 17:36:12 +04:00
|
|
|
avg_stats(&walltime_nsecs_stats)/1e9);
|
2009-06-27 08:24:32 +04:00
|
|
|
if (run_count > 1) {
|
|
|
|
fprintf(stderr, " ( +- %7.3f%% )",
|
2009-09-04 17:36:12 +04:00
|
|
|
100*stddev_stats(&walltime_nsecs_stats) /
|
|
|
|
avg_stats(&walltime_nsecs_stats));
|
2009-06-27 08:24:32 +04:00
|
|
|
}
|
|
|
|
fprintf(stderr, "\n\n");
|
2009-04-20 17:37:32 +04:00
|
|
|
}
|
|
|
|
|
2009-06-10 17:55:59 +04:00
|
|
|
static volatile int signr = -1;
|
|
|
|
|
2009-05-26 11:17:18 +04:00
|
|
|
static void skip_signal(int signo)
|
2009-04-20 17:37:32 +04:00
|
|
|
{
|
2009-06-10 17:55:59 +04:00
|
|
|
signr = signo;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void sig_atexit(void)
|
|
|
|
{
|
|
|
|
if (signr == -1)
|
|
|
|
return;
|
|
|
|
|
|
|
|
signal(signr, SIG_DFL);
|
|
|
|
kill(getpid(), signr);
|
2009-05-26 11:17:18 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static const char * const stat_usage[] = {
|
|
|
|
"perf stat [<options>] <command>",
|
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
|
|
|
static const struct option options[] = {
|
|
|
|
OPT_CALLBACK('e', "event", NULL, "event",
|
2009-06-06 14:24:17 +04:00
|
|
|
"event selector. use 'perf list' to list available events",
|
|
|
|
parse_events),
|
2009-05-26 11:17:18 +04:00
|
|
|
OPT_BOOLEAN('i', "inherit", &inherit,
|
|
|
|
"child tasks inherit counters"),
|
|
|
|
OPT_INTEGER('p', "pid", &target_pid,
|
|
|
|
"stat events on existing pid"),
|
|
|
|
OPT_BOOLEAN('a', "all-cpus", &system_wide,
|
2009-06-24 16:49:34 +04:00
|
|
|
"system-wide collection from all CPUs"),
|
2009-08-07 12:18:39 +04:00
|
|
|
OPT_BOOLEAN('c', "scale", &scale,
|
2009-06-24 16:49:34 +04:00
|
|
|
"scale/normalize counters"),
|
2009-06-07 19:06:46 +04:00
|
|
|
OPT_BOOLEAN('v', "verbose", &verbose,
|
|
|
|
"be more verbose (show counter open errors, etc)"),
|
2009-06-13 16:57:28 +04:00
|
|
|
OPT_INTEGER('r', "repeat", &run_count,
|
|
|
|
"repeat command and print average + stddev (max: 100)"),
|
2009-06-27 08:10:30 +04:00
|
|
|
OPT_BOOLEAN('n', "null", &null_run,
|
|
|
|
"null run - dont start any counters"),
|
2009-05-26 11:17:18 +04:00
|
|
|
OPT_END()
|
|
|
|
};
|
|
|
|
|
2009-07-01 14:37:06 +04:00
|
|
|
int cmd_stat(int argc, const char **argv, const char *prefix __used)
|
2009-05-26 11:17:18 +04:00
|
|
|
{
|
2009-06-13 16:57:28 +04:00
|
|
|
int status;
|
|
|
|
|
2009-07-22 17:04:12 +04:00
|
|
|
argc = parse_options(argc, argv, options, stat_usage,
|
|
|
|
PARSE_OPT_STOP_AT_NON_OPTION);
|
2009-05-26 11:17:18 +04:00
|
|
|
if (!argc)
|
|
|
|
usage_with_options(stat_usage, options);
|
2009-06-13 16:57:28 +04:00
|
|
|
if (run_count <= 0 || run_count > MAX_RUN)
|
|
|
|
usage_with_options(stat_usage, options);
|
2009-04-20 17:37:32 +04:00
|
|
|
|
2009-06-27 22:19:09 +04:00
|
|
|
/* Set attrs and nr_counters if no event is selected and !null_run */
|
|
|
|
if (!null_run && !nr_counters) {
|
|
|
|
memcpy(attrs, default_attrs, sizeof(default_attrs));
|
|
|
|
nr_counters = ARRAY_SIZE(default_attrs);
|
|
|
|
}
|
2009-04-20 17:37:32 +04:00
|
|
|
|
|
|
|
nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
|
|
|
|
assert(nr_cpus <= MAX_NR_CPUS);
|
2009-07-01 14:37:06 +04:00
|
|
|
assert((int)nr_cpus >= 0);
|
2009-04-20 17:37:32 +04:00
|
|
|
|
2009-05-15 13:03:23 +04:00
|
|
|
/*
|
|
|
|
* We dont want to block the signals - that would cause
|
|
|
|
* child tasks to inherit that and Ctrl-C would not work.
|
|
|
|
* What we want is for Ctrl-C to work in the exec()-ed
|
|
|
|
* task, but being ignored by perf stat itself:
|
|
|
|
*/
|
2009-06-10 17:55:59 +04:00
|
|
|
atexit(sig_atexit);
|
2009-05-15 13:03:23 +04:00
|
|
|
signal(SIGINT, skip_signal);
|
|
|
|
signal(SIGALRM, skip_signal);
|
|
|
|
signal(SIGABRT, skip_signal);
|
|
|
|
|
2009-06-13 16:57:28 +04:00
|
|
|
status = 0;
|
|
|
|
for (run_idx = 0; run_idx < run_count; run_idx++) {
|
|
|
|
if (run_count != 1 && verbose)
|
2009-06-24 16:49:34 +04:00
|
|
|
fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1);
|
2009-06-13 16:57:28 +04:00
|
|
|
status = run_perf_stat(argc, argv);
|
|
|
|
}
|
|
|
|
|
|
|
|
print_stat(argc, argv);
|
|
|
|
|
|
|
|
return status;
|
2009-04-20 17:37:32 +04:00
|
|
|
}
|