From 9ae7d3351aac238eef9646479693105688fd9cc9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 19 Jan 2012 14:07:23 -0200 Subject: [PATCH 1/9] perf tools: Add fprintf methods for thread_map and cpu_map classes For helping with debugging. Cc: David Ahern Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-m06n4rp7pwr6dlzwoq89cl69@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cpumap.c | 11 +++++++++++ tools/perf/util/cpumap.h | 4 ++++ tools/perf/util/thread_map.c | 11 +++++++++++ tools/perf/util/thread_map.h | 4 ++++ 4 files changed, 30 insertions(+) diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 6893eec693ab..adc72f09914d 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -166,6 +166,17 @@ out: return cpus; } +size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp) +{ + int i; + size_t printed = fprintf(fp, "%d cpu%s: ", + map->nr, map->nr > 1 ? "s" : ""); + for (i = 0; i < map->nr; ++i) + printed += fprintf(fp, "%s%d", i ? ", " : "", map->map[i]); + + return printed + fprintf(fp, "\n"); +} + struct cpu_map *cpu_map__dummy_new(void) { struct cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int)); diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 072c0a374794..c41518573c6a 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -1,6 +1,8 @@ #ifndef __PERF_CPUMAP_H #define __PERF_CPUMAP_H +#include + struct cpu_map { int nr; int map[]; @@ -10,4 +12,6 @@ struct cpu_map *cpu_map__new(const char *cpu_list); struct cpu_map *cpu_map__dummy_new(void); void cpu_map__delete(struct cpu_map *map); +size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); + #endif /* __PERF_CPUMAP_H */ diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index a5df131b77c3..894d52f65166 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -62,3 +62,14 @@ void thread_map__delete(struct thread_map *threads) { free(threads); } + +size_t thread_map__fprintf(struct thread_map *threads, FILE *fp) +{ + int i; + size_t printed = fprintf(fp, "%d thread%s: ", + threads->nr, threads->nr > 1 ? "s" : ""); + for (i = 0; i < threads->nr; ++i) + printed += fprintf(fp, "%s%d", i ? ", " : "", threads->map[i]); + + return printed + fprintf(fp, "\n"); +} diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index 3cb907311409..736ab4a26210 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -2,6 +2,7 @@ #define __PERF_THREAD_MAP_H #include +#include struct thread_map { int nr; @@ -12,4 +13,7 @@ struct thread_map *thread_map__new_by_pid(pid_t pid); struct thread_map *thread_map__new_by_tid(pid_t tid); struct thread_map *thread_map__new(pid_t pid, pid_t tid); void thread_map__delete(struct thread_map *threads); + +size_t thread_map__fprintf(struct thread_map *threads, FILE *fp); + #endif /* __PERF_THREAD_MAP_H */ From 0d37aa34f8806bb443dd3c8621fd9bdbb50c58bb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 19 Jan 2012 14:08:15 -0200 Subject: [PATCH 2/9] perf tools: Introduce per user view The new --uid command line option will show only the tasks for a given user, using the proc interface to figure out the existing tasks. Kernel work is needed to close races at startup, but this should already be useful in many use cases. Cc: David Ahern Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-bdnspm000gw2l984a2t53o8z@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 4 + tools/perf/Documentation/perf-top.txt | 4 + tools/perf/builtin-record.c | 12 ++- tools/perf/builtin-stat.c | 2 +- tools/perf/builtin-test.c | 8 +- tools/perf/builtin-top.c | 22 +++++- tools/perf/perf.h | 1 + tools/perf/util/evlist.c | 6 +- tools/perf/util/evlist.h | 2 +- tools/perf/util/hist.h | 1 + tools/perf/util/python.c | 10 +-- tools/perf/util/thread_map.c | 98 +++++++++++++++++++++++- tools/perf/util/thread_map.h | 3 +- tools/perf/util/top.c | 3 + tools/perf/util/top.h | 2 + tools/perf/util/ui/browsers/hists.c | 3 + tools/perf/util/usage.c | 39 ++++++++++ tools/perf/util/util.h | 2 + 18 files changed, 200 insertions(+), 22 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 2937f7e14bb7..ff9a66e0d4e4 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -58,6 +58,10 @@ OPTIONS --tid=:: Record events on existing thread ID. +-u:: +--uid=:: + Record events in threads owned by uid. Name or number. + -r:: --realtime=:: Collect data with this RT SCHED_FIFO priority. diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index b1a5bbbfebef..ab1454ed450f 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -78,6 +78,10 @@ Default is to monitor all CPUS. --tid=:: Profile events on existing thread ID. +-u:: +--uid=:: + Record events in threads owned by uid. Name or number. + -r :: --realtime=:: Collect data with this RT SCHED_FIFO priority. diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 0abfb18b911f..32870eef952f 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -44,6 +44,7 @@ struct perf_record { struct perf_evlist *evlist; struct perf_session *session; const char *progname; + const char *uid_str; int output; unsigned int page_size; int realtime_prio; @@ -727,6 +728,7 @@ const struct option record_options[] = { OPT_CALLBACK('G', "cgroup", &record.evlist, "name", "monitor event in cgroup name only", parse_cgroups), + OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"), OPT_END() }; @@ -748,7 +750,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) argc = parse_options(argc, argv, record_options, record_usage, PARSE_OPT_STOP_AT_NON_OPTION); if (!argc && rec->opts.target_pid == -1 && rec->opts.target_tid == -1 && - !rec->opts.system_wide && !rec->opts.cpu_list) + !rec->opts.system_wide && !rec->opts.cpu_list && !rec->uid_str) usage_with_options(record_usage, record_options); if (rec->force && rec->append_file) { @@ -788,11 +790,17 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) goto out_symbol_exit; } + rec->opts.uid = parse_target_uid(rec->uid_str, rec->opts.target_tid, + rec->opts.target_pid); + if (rec->uid_str != NULL && rec->opts.uid == UINT_MAX - 1) + goto out_free_fd; + if (rec->opts.target_pid != -1) rec->opts.target_tid = rec->opts.target_pid; if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid, - rec->opts.target_tid, rec->opts.cpu_list) < 0) + rec->opts.target_tid, rec->opts.uid, + rec->opts.cpu_list) < 0) usage_with_options(record_usage, record_options); list_for_each_entry(pos, &evsel_list->entries, node) { diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f5d2a63eba66..459b8620a5d9 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1201,7 +1201,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) if (target_pid != -1) target_tid = target_pid; - evsel_list->threads = thread_map__new(target_pid, target_tid); + evsel_list->threads = thread_map__new(target_pid, target_tid, UINT_MAX); if (evsel_list->threads == NULL) { pr_err("Problems finding threads of monitor\n"); usage_with_options(stat_usage, options); diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 3854e869dce1..3ce709e97462 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -276,7 +276,7 @@ static int test__open_syscall_event(void) return -1; } - threads = thread_map__new(-1, getpid()); + threads = thread_map__new(-1, getpid(), UINT_MAX); if (threads == NULL) { pr_debug("thread_map__new\n"); return -1; @@ -342,7 +342,7 @@ static int test__open_syscall_event_on_all_cpus(void) return -1; } - threads = thread_map__new(-1, getpid()); + threads = thread_map__new(-1, getpid(), UINT_MAX); if (threads == NULL) { pr_debug("thread_map__new\n"); return -1; @@ -490,7 +490,7 @@ static int test__basic_mmap(void) expected_nr_events[i] = random() % 257; } - threads = thread_map__new(-1, getpid()); + threads = thread_map__new(-1, getpid(), UINT_MAX); if (threads == NULL) { pr_debug("thread_map__new\n"); return -1; @@ -1054,7 +1054,7 @@ static int test__PERF_RECORD(void) * we're monitoring, the one forked there. */ err = perf_evlist__create_maps(evlist, opts.target_pid, - opts.target_tid, opts.cpu_list); + opts.target_tid, UINT_MAX, opts.cpu_list); if (err < 0) { pr_debug("Not enough memory to create thread/cpu maps\n"); goto out_delete_evlist; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 8f80df896038..e8b033c074f9 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -64,7 +64,6 @@ #include #include - void get_term_dimensions(struct winsize *ws) { char *s = getenv("LINES"); @@ -537,10 +536,20 @@ static void perf_top__sort_new_samples(void *arg) static void *display_thread_tui(void *arg) { + struct perf_evsel *pos; struct perf_top *top = arg; const char *help = "For a higher level overview, try: perf top --sort comm,dso"; perf_top__sort_new_samples(top); + + /* + * Initialize the uid_filter_str, in the future the TUI will allow + * Zooming in/out UIDs. For now juse use whatever the user passed + * via --uid. + */ + list_for_each_entry(pos, &top->evlist->entries, node) + pos->hists.uid_filter_str = top->uid_str; + perf_evlist__tui_browse_hists(top->evlist, help, perf_top__sort_new_samples, top, top->delay_secs); @@ -949,7 +958,7 @@ static int __cmd_top(struct perf_top *top) if (ret) goto out_delete; - if (top->target_tid != -1) + if (top->target_tid != -1 || top->uid != UINT_MAX) perf_event__synthesize_thread_map(&top->tool, top->evlist->threads, perf_event__process, &top->session->host_machine); @@ -1089,6 +1098,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) .delay_secs = 2, .target_pid = -1, .target_tid = -1, + .uid = UINT_MAX, .freq = 1000, /* 1 KHz */ .sample_id_all_avail = true, .mmap_pages = 128, @@ -1162,6 +1172,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) "Display raw encoding of assembly instructions (default)"), OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), + OPT_STRING('u', "uid", &top.uid_str, "user", "user to profile"), OPT_END() }; @@ -1187,6 +1198,10 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) setup_browser(false); + top.uid = parse_target_uid(top.uid_str, top.target_tid, top.target_pid); + if (top.uid_str != NULL && top.uid == UINT_MAX - 1) + goto out_delete_evlist; + /* CPU and PID are mutually exclusive */ if (top.target_tid > 0 && top.cpu_list) { printf("WARNING: PID switch overriding CPU\n"); @@ -1198,7 +1213,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) top.target_tid = top.target_pid; if (perf_evlist__create_maps(top.evlist, top.target_pid, - top.target_tid, top.cpu_list) < 0) + top.target_tid, top.uid, top.cpu_list) < 0) usage_with_options(top_usage, options); if (!top.evlist->nr_entries && @@ -1262,6 +1277,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) status = __cmd_top(&top); +out_delete_evlist: perf_evlist__delete(top.evlist); return status; diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 64f8bee31ced..92af1688bae4 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -188,6 +188,7 @@ void pthread__unblock_sigwinch(void); struct perf_record_opts { pid_t target_pid; pid_t target_tid; + uid_t uid; bool call_graph; bool group; bool inherit_stat; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 3f16e08a5c8d..a6d50e376257 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -594,14 +594,14 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, } int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid, - pid_t target_tid, const char *cpu_list) + pid_t target_tid, uid_t uid, const char *cpu_list) { - evlist->threads = thread_map__new(target_pid, target_tid); + evlist->threads = thread_map__new(target_pid, target_tid, uid); if (evlist->threads == NULL) return -1; - if (cpu_list == NULL && target_tid != -1) + if (uid != UINT_MAX || (cpu_list == NULL && target_tid != -1)) evlist->cpus = cpu_map__dummy_new(); else evlist->cpus = cpu_map__new(cpu_list); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 8922aeed0467..9c516607ce20 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -107,7 +107,7 @@ static inline void perf_evlist__set_maps(struct perf_evlist *evlist, } int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid, - pid_t target_tid, const char *cpu_list); + pid_t tid, uid_t uid, const char *cpu_list); void perf_evlist__delete_maps(struct perf_evlist *evlist); int perf_evlist__set_filters(struct perf_evlist *evlist); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index f55f0a8d1f81..0d486135d10f 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -55,6 +55,7 @@ struct hists { u64 nr_entries; const struct thread *thread_filter; const struct dso *dso_filter; + const char *uid_filter_str; pthread_mutex_t lock; struct events_stats stats; u64 event_stream; diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 9dd47a4f2596..e03b58a48424 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -425,14 +425,14 @@ struct pyrf_thread_map { static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads, PyObject *args, PyObject *kwargs) { - static char *kwlist[] = { "pid", "tid", NULL }; - int pid = -1, tid = -1; + static char *kwlist[] = { "pid", "tid", "uid", NULL }; + int pid = -1, tid = -1, uid = UINT_MAX; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii", - kwlist, &pid, &tid)) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iii", + kwlist, &pid, &tid, &uid)) return -1; - pthreads->threads = thread_map__new(pid, tid); + pthreads->threads = thread_map__new(pid, tid, uid); if (pthreads->threads == NULL) return -1; return 0; diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 894d52f65166..3d4b6c5931b9 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -1,6 +1,11 @@ #include +#include +#include #include #include +#include +#include +#include #include "thread_map.h" /* Skip "." and ".." directories */ @@ -23,7 +28,7 @@ struct thread_map *thread_map__new_by_pid(pid_t pid) sprintf(name, "/proc/%d/task", pid); items = scandir(name, &namelist, filter, NULL); if (items <= 0) - return NULL; + return NULL; threads = malloc(sizeof(*threads) + sizeof(pid_t) * items); if (threads != NULL) { @@ -51,10 +56,99 @@ struct thread_map *thread_map__new_by_tid(pid_t tid) return threads; } -struct thread_map *thread_map__new(pid_t pid, pid_t tid) +struct thread_map *thread_map__new_by_uid(uid_t uid) +{ + DIR *proc; + int max_threads = 32, items, i; + char path[256]; + struct dirent dirent, *next, **namelist = NULL; + struct thread_map *threads = malloc(sizeof(*threads) + + max_threads * sizeof(pid_t)); + if (threads == NULL) + goto out; + + proc = opendir("/proc"); + if (proc == NULL) + goto out_free_threads; + + threads->nr = 0; + + while (!readdir_r(proc, &dirent, &next) && next) { + char *end; + bool grow = false; + struct stat st; + pid_t pid = strtol(dirent.d_name, &end, 10); + + if (*end) /* only interested in proper numerical dirents */ + continue; + + snprintf(path, sizeof(path), "/proc/%s", dirent.d_name); + + if (stat(path, &st) != 0) + continue; + + if (st.st_uid != uid) + continue; + + snprintf(path, sizeof(path), "/proc/%d/task", pid); + items = scandir(path, &namelist, filter, NULL); + if (items <= 0) + goto out_free_closedir; + + while (threads->nr + items >= max_threads) { + max_threads *= 2; + grow = true; + } + + if (grow) { + struct thread_map *tmp; + + tmp = realloc(threads, (sizeof(*threads) + + max_threads * sizeof(pid_t))); + if (tmp == NULL) + goto out_free_namelist; + + threads = tmp; + } + + for (i = 0; i < items; i++) + threads->map[threads->nr + i] = atoi(namelist[i]->d_name); + + for (i = 0; i < items; i++) + free(namelist[i]); + free(namelist); + + threads->nr += items; + } + +out_closedir: + closedir(proc); +out: + return threads; + +out_free_threads: + free(threads); + return NULL; + +out_free_namelist: + for (i = 0; i < items; i++) + free(namelist[i]); + free(namelist); + +out_free_closedir: + free(threads); + threads = NULL; + goto out_closedir; +} + +struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid) { if (pid != -1) return thread_map__new_by_pid(pid); + + if (tid == -1 && uid != UINT_MAX) + return thread_map__new_by_uid(uid); + return thread_map__new_by_tid(tid); } diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index 736ab4a26210..c75ddbaba005 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -11,7 +11,8 @@ struct thread_map { struct thread_map *thread_map__new_by_pid(pid_t pid); struct thread_map *thread_map__new_by_tid(pid_t tid); -struct thread_map *thread_map__new(pid_t pid, pid_t tid); +struct thread_map *thread_map__new_by_uid(uid_t uid); +struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid); void thread_map__delete(struct thread_map *threads); size_t thread_map__fprintf(struct thread_map *threads, FILE *fp); diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index 500471dffa4f..e4370ca27193 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -75,6 +75,9 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) else if (top->target_tid != -1) ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %d", top->target_tid); + else if (top->uid_str != NULL) + ret += SNPRINTF(bf + ret, size - ret, " (uid: %s", + top->uid_str); else ret += SNPRINTF(bf + ret, size - ret, " (all"); diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index a248f3c2c60d..def3e53e0fe0 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -24,6 +24,7 @@ struct perf_top { int print_entries, count_filter, delay_secs; int freq; pid_t target_pid, target_tid; + uid_t uid; bool hide_kernel_symbols, hide_user_symbols, zero; bool system_wide; bool use_tui, use_stdio; @@ -45,6 +46,7 @@ struct perf_top { int realtime_prio; int sym_pcnt_filter; const char *sym_filter; + const char *uid_str; }; size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size); diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c index 1212a386a033..7b6669d1f11f 100644 --- a/tools/perf/util/ui/browsers/hists.c +++ b/tools/perf/util/ui/browsers/hists.c @@ -841,6 +841,9 @@ static int hists__browser_title(struct hists *self, char *bf, size_t size, nr_events = convert_unit(nr_events, &unit); printed = snprintf(bf, size, "Events: %lu%c %s", nr_events, unit, ev_name); + if (self->uid_filter_str) + printed += snprintf(bf + printed, size - printed, + ", UID: %s", self->uid_filter_str); if (thread) printed += snprintf(bf + printed, size - printed, ", Thread: %s(%d)", diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c index d76d1c0ff98f..d0c013934f30 100644 --- a/tools/perf/util/usage.c +++ b/tools/perf/util/usage.c @@ -7,6 +7,7 @@ * Copyright (C) Linus Torvalds, 2005 */ #include "util.h" +#include "debug.h" static void report(const char *prefix, const char *err, va_list params) { @@ -81,3 +82,41 @@ void warning(const char *warn, ...) warn_routine(warn, params); va_end(params); } + +uid_t parse_target_uid(const char *str, pid_t tid, pid_t pid) +{ + struct passwd pwd, *result; + char buf[1024]; + + if (str == NULL) + return UINT_MAX; + + /* CPU and PID are mutually exclusive */ + if (tid > 0 || pid > 0) { + ui__warning("PID/TID switch overriding UID\n"); + sleep(1); + return UINT_MAX; + } + + getpwnam_r(str, &pwd, buf, sizeof(buf), &result); + + if (result == NULL) { + char *endptr; + int uid = strtol(str, &endptr, 10); + + if (*endptr != '\0') { + ui__error("Invalid user %s\n", str); + return UINT_MAX - 1; + } + + getpwuid_r(uid, &pwd, buf, sizeof(buf), &result); + + if (result == NULL) { + ui__error("Problems obtaining information for user %s\n", + str); + return UINT_MAX - 1; + } + } + + return result->pw_uid; +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index b9c530cce79a..061dbf8c038d 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -246,6 +246,8 @@ struct perf_event_attr; void event_attr_init(struct perf_event_attr *attr); +uid_t parse_target_uid(const char *str, pid_t tid, pid_t pid); + #define _STR(x) #x #define STR(x) _STR(x) From 9ea811973d49a1df0be04ff6e4df449e4fca4fb5 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 18 Jan 2012 13:28:13 +0000 Subject: [PATCH 3/9] perf bench: Make "default" memcpy() selection actually use glibc's implementation Since arch/x86/lib/memcpy_64.S implements not only __memcpy, but also memcpy, without further precautions this function will get chose by the static linker for resolving all references, and hence the "default" measurement didn't really measure anything else than the "x86-64-unrolled" one. Fix this by renaming (through the pre-processor) the conflicting symbol. On my Westmere system, the glibc variant turns out to require about 4% less instructions, but 15% more cycles for the default 1Mb block size measured. Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/4F16D6FD020000780006D72F@nat28.tlf.novell.com Signed-off-by: Jan Beulich Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/mem-memcpy-x86-64-asm.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S index a57b66e853c2..384b60788ab9 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm.S +++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S @@ -1,2 +1,2 @@ - +#define memcpy MEMCPY /* don't hide glibc's memcpy() */ #include "../../../arch/x86/lib/memcpy_64.S" From 800eb01484b3ca1eaf4eb5186df13fb24de2db19 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 18 Jan 2012 13:28:56 +0000 Subject: [PATCH 4/9] perf bench: Also allow measuring alternative memcpy implementations Intended to be able to support the current selection of the preferred memcpy() implementation, this patch adds the ability to also measure the two alternative implementations, again by way of using some pre-processsor replacement. While on my Westmere system this proves that the movsb based variant is worse than the movsq based one (since the ERMS feature isn't there), it also shows that here for the default as well as small sizes the unrolled variant outperforms the movsq one. Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/4F16D728020000780006D732@nat28.tlf.novell.com Signed-off-by: Jan Beulich Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/mem-memcpy-x86-64-asm-def.h | 8 ++++++++ tools/perf/bench/mem-memcpy-x86-64-asm.S | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h index d588b87696fc..d66ab799b35f 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h +++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h @@ -2,3 +2,11 @@ MEMCPY_FN(__memcpy, "x86-64-unrolled", "unrolled memcpy() in arch/x86/lib/memcpy_64.S") + +MEMCPY_FN(memcpy_c, + "x86-64-movsq", + "movsq-based memcpy() in arch/x86/lib/memcpy_64.S") + +MEMCPY_FN(memcpy_c_e, + "x86-64-movsb", + "movsb-based memcpy() in arch/x86/lib/memcpy_64.S") diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S index 384b60788ab9..a20780bd0771 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm.S +++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S @@ -1,2 +1,6 @@ #define memcpy MEMCPY /* don't hide glibc's memcpy() */ +#define altinstr_replacement text +#define globl p2align 4; .globl +#define Lmemcpy_c globl memcpy_c; memcpy_c +#define Lmemcpy_c_e globl memcpy_c_e; memcpy_c_e #include "../../../arch/x86/lib/memcpy_64.S" From be3de80dc2e671d9ee15e69fe9cd84d2b71e2225 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 24 Jan 2012 10:03:22 -0200 Subject: [PATCH 5/9] perf bench: Also allow measuring memset() This simply clones the respective memcpy() implementation. Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/4F16D743020000780006D735@nat28.tlf.novell.com Signed-off-by: Jan Beulich Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 4 +- tools/perf/bench/bench.h | 1 + tools/perf/bench/mem-memset-arch.h | 12 + tools/perf/bench/mem-memset-x86-64-asm-def.h | 12 + tools/perf/bench/mem-memset-x86-64-asm.S | 6 + tools/perf/bench/mem-memset.c | 291 +++++++++++++++++++ tools/perf/builtin-bench.c | 3 + tools/perf/util/include/asm/dwarf2.h | 4 +- 8 files changed, 331 insertions(+), 2 deletions(-) create mode 100644 tools/perf/bench/mem-memset-arch.h create mode 100644 tools/perf/bench/mem-memset-x86-64-asm-def.h create mode 100644 tools/perf/bench/mem-memset-x86-64-asm.S create mode 100644 tools/perf/bench/mem-memset.c diff --git a/tools/perf/Makefile b/tools/perf/Makefile index ac86d67b636e..599031ac69ac 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -61,7 +61,7 @@ ifeq ($(ARCH),x86_64) ifeq (${IS_X86_64}, 1) RAW_ARCH := x86_64 ARCH_CFLAGS := -DARCH_X86_64 - ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S + ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S endif endif @@ -362,8 +362,10 @@ BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o ifeq ($(RAW_ARCH),x86_64) BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o +BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o endif BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o +BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o BUILTIN_OBJS += $(OUTPUT)builtin-diff.o BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index f7781c6267c0..a09bece6dad2 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -4,6 +4,7 @@ extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used); +extern int bench_mem_memset(int argc, const char **argv, const char *prefix); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h new file mode 100644 index 000000000000..a040fa77665b --- /dev/null +++ b/tools/perf/bench/mem-memset-arch.h @@ -0,0 +1,12 @@ + +#ifdef ARCH_X86_64 + +#define MEMSET_FN(fn, name, desc) \ + extern void *fn(void *, int, size_t); + +#include "mem-memset-x86-64-asm-def.h" + +#undef MEMSET_FN + +#endif + diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h new file mode 100644 index 000000000000..a71dff97c1f5 --- /dev/null +++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h @@ -0,0 +1,12 @@ + +MEMSET_FN(__memset, + "x86-64-unrolled", + "unrolled memset() in arch/x86/lib/memset_64.S") + +MEMSET_FN(memset_c, + "x86-64-stosq", + "movsq-based memset() in arch/x86/lib/memset_64.S") + +MEMSET_FN(memset_c_e, + "x86-64-stosb", + "movsb-based memset() in arch/x86/lib/memset_64.S") diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S new file mode 100644 index 000000000000..cb9217063776 --- /dev/null +++ b/tools/perf/bench/mem-memset-x86-64-asm.S @@ -0,0 +1,6 @@ +#define memset MEMSET /* don't hide glibc's memset() */ +#define altinstr_replacement text +#define globl p2align 4; .globl +#define Lmemset_c globl memset_c; memset_c +#define Lmemset_c_e globl memset_c_e; memset_c_e +#include "../../../arch/x86/lib/memset_64.S" diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c new file mode 100644 index 000000000000..9c0c6f0cba9b --- /dev/null +++ b/tools/perf/bench/mem-memset.c @@ -0,0 +1,291 @@ +/* + * mem-memset.c + * + * memset: Simple memory set in various ways + * + * Trivial clone of mem-memcpy.c. + */ +#include + +#include "../perf.h" +#include "../util/util.h" +#include "../util/parse-options.h" +#include "../util/header.h" +#include "bench.h" +#include "mem-memset-arch.h" + +#include +#include +#include +#include +#include + +#define K 1024 + +static const char *length_str = "1MB"; +static const char *routine = "default"; +static bool use_clock; +static int clock_fd; +static bool only_prefault; +static bool no_prefault; + +static const struct option options[] = { + OPT_STRING('l', "length", &length_str, "1MB", + "Specify length of memory to copy. " + "available unit: B, MB, GB (upper and lower)"), + OPT_STRING('r', "routine", &routine, "default", + "Specify routine to copy"), + OPT_BOOLEAN('c', "clock", &use_clock, + "Use CPU clock for measuring"), + OPT_BOOLEAN('o', "only-prefault", &only_prefault, + "Show only the result with page faults before memset()"), + OPT_BOOLEAN('n', "no-prefault", &no_prefault, + "Show only the result without page faults before memset()"), + OPT_END() +}; + +typedef void *(*memset_t)(void *, int, size_t); + +struct routine { + const char *name; + const char *desc; + memset_t fn; +}; + +static const struct routine routines[] = { + { "default", + "Default memset() provided by glibc", + memset }, +#ifdef ARCH_X86_64 + +#define MEMSET_FN(fn, name, desc) { name, desc, fn }, +#include "mem-memset-x86-64-asm-def.h" +#undef MEMSET_FN + +#endif + + { NULL, + NULL, + NULL } +}; + +static const char * const bench_mem_memset_usage[] = { + "perf bench mem memset ", + NULL +}; + +static struct perf_event_attr clock_attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES +}; + +static void init_clock(void) +{ + clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0); + + if (clock_fd < 0 && errno == ENOSYS) + die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); + else + BUG_ON(clock_fd < 0); +} + +static u64 get_clock(void) +{ + int ret; + u64 clk; + + ret = read(clock_fd, &clk, sizeof(u64)); + BUG_ON(ret != sizeof(u64)); + + return clk; +} + +static double timeval2double(struct timeval *ts) +{ + return (double)ts->tv_sec + + (double)ts->tv_usec / (double)1000000; +} + +static void alloc_mem(void **dst, size_t length) +{ + *dst = zalloc(length); + if (!dst) + die("memory allocation failed - maybe length is too large?\n"); +} + +static u64 do_memset_clock(memset_t fn, size_t len, bool prefault) +{ + u64 clock_start = 0ULL, clock_end = 0ULL; + void *dst = NULL; + + alloc_mem(&dst, len); + + if (prefault) + fn(dst, -1, len); + + clock_start = get_clock(); + fn(dst, 0, len); + clock_end = get_clock(); + + free(dst); + return clock_end - clock_start; +} + +static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault) +{ + struct timeval tv_start, tv_end, tv_diff; + void *dst = NULL; + + alloc_mem(&dst, len); + + if (prefault) + fn(dst, -1, len); + + BUG_ON(gettimeofday(&tv_start, NULL)); + fn(dst, 0, len); + BUG_ON(gettimeofday(&tv_end, NULL)); + + timersub(&tv_end, &tv_start, &tv_diff); + + free(dst); + return (double)((double)len / timeval2double(&tv_diff)); +} + +#define pf (no_prefault ? 0 : 1) + +#define print_bps(x) do { \ + if (x < K) \ + printf(" %14lf B/Sec", x); \ + else if (x < K * K) \ + printf(" %14lfd KB/Sec", x / K); \ + else if (x < K * K * K) \ + printf(" %14lf MB/Sec", x / K / K); \ + else \ + printf(" %14lf GB/Sec", x / K / K / K); \ + } while (0) + +int bench_mem_memset(int argc, const char **argv, + const char *prefix __used) +{ + int i; + size_t len; + double result_bps[2]; + u64 result_clock[2]; + + argc = parse_options(argc, argv, options, + bench_mem_memset_usage, 0); + + if (use_clock) + init_clock(); + + len = (size_t)perf_atoll((char *)length_str); + + result_clock[0] = result_clock[1] = 0ULL; + result_bps[0] = result_bps[1] = 0.0; + + if ((s64)len <= 0) { + fprintf(stderr, "Invalid length:%s\n", length_str); + return 1; + } + + /* same to without specifying either of prefault and no-prefault */ + if (only_prefault && no_prefault) + only_prefault = no_prefault = false; + + for (i = 0; routines[i].name; i++) { + if (!strcmp(routines[i].name, routine)) + break; + } + if (!routines[i].name) { + printf("Unknown routine:%s\n", routine); + printf("Available routines...\n"); + for (i = 0; routines[i].name; i++) { + printf("\t%s ... %s\n", + routines[i].name, routines[i].desc); + } + return 1; + } + + if (bench_format == BENCH_FORMAT_DEFAULT) + printf("# Copying %s Bytes ...\n\n", length_str); + + if (!only_prefault && !no_prefault) { + /* show both of results */ + if (use_clock) { + result_clock[0] = + do_memset_clock(routines[i].fn, len, false); + result_clock[1] = + do_memset_clock(routines[i].fn, len, true); + } else { + result_bps[0] = + do_memset_gettimeofday(routines[i].fn, + len, false); + result_bps[1] = + do_memset_gettimeofday(routines[i].fn, + len, true); + } + } else { + if (use_clock) { + result_clock[pf] = + do_memset_clock(routines[i].fn, + len, only_prefault); + } else { + result_bps[pf] = + do_memset_gettimeofday(routines[i].fn, + len, only_prefault); + } + } + + switch (bench_format) { + case BENCH_FORMAT_DEFAULT: + if (!only_prefault && !no_prefault) { + if (use_clock) { + printf(" %14lf Clock/Byte\n", + (double)result_clock[0] + / (double)len); + printf(" %14lf Clock/Byte (with prefault)\n ", + (double)result_clock[1] + / (double)len); + } else { + print_bps(result_bps[0]); + printf("\n"); + print_bps(result_bps[1]); + printf(" (with prefault)\n"); + } + } else { + if (use_clock) { + printf(" %14lf Clock/Byte", + (double)result_clock[pf] + / (double)len); + } else + print_bps(result_bps[pf]); + + printf("%s\n", only_prefault ? " (with prefault)" : ""); + } + break; + case BENCH_FORMAT_SIMPLE: + if (!only_prefault && !no_prefault) { + if (use_clock) { + printf("%lf %lf\n", + (double)result_clock[0] / (double)len, + (double)result_clock[1] / (double)len); + } else { + printf("%lf %lf\n", + result_bps[0], result_bps[1]); + } + } else { + if (use_clock) { + printf("%lf\n", (double)result_clock[pf] + / (double)len); + } else + printf("%lf\n", result_bps[pf]); + } + break; + default: + /* reaching this means there's some disaster: */ + die("unknown format: %d\n", bench_format); + break; + } + + return 0; +} diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index fcb96269852a..b0e74ab2d7a2 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -52,6 +52,9 @@ static struct bench_suite mem_suites[] = { { "memcpy", "Simple memory copy in various ways", bench_mem_memcpy }, + { "memset", + "Simple memory set in various ways", + bench_mem_memset }, suite_all, { NULL, NULL, diff --git a/tools/perf/util/include/asm/dwarf2.h b/tools/perf/util/include/asm/dwarf2.h index bb4198e7837a..afe38199e922 100644 --- a/tools/perf/util/include/asm/dwarf2.h +++ b/tools/perf/util/include/asm/dwarf2.h @@ -2,10 +2,12 @@ #ifndef PERF_DWARF2_H #define PERF_DWARF2_H -/* dwarf2.h ... dummy header file for including arch/x86/lib/memcpy_64.S */ +/* dwarf2.h ... dummy header file for including arch/x86/lib/mem{cpy,set}_64.S */ #define CFI_STARTPROC #define CFI_ENDPROC +#define CFI_REMEMBER_STATE +#define CFI_RESTORE_STATE #endif /* PERF_DWARF2_H */ From e3e877e79b7c6a322f9f628e87052c13581238cc Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 18 Jan 2012 13:29:59 +0000 Subject: [PATCH 6/9] perf bench: Allow passing an iteration count to "bench mem mem{cpy,set}" "perf stat ... perf bench mem mem..." is pretty meaningless when using small block sizes (as the overhead of the invocation of each test run basically hides the actual test result in the noise). Repeating the actually interesting function's invocation a number of times allows the results to become meaningful. Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/4F16D767020000780006D738@nat28.tlf.novell.com Signed-off-by: Jan Beulich Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/mem-memcpy.c | 11 +++++++++-- tools/perf/bench/mem-memset.c | 11 +++++++++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index db82021f4b91..6ad2b1c6b27b 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c @@ -24,6 +24,7 @@ static const char *length_str = "1MB"; static const char *routine = "default"; +static int iterations = 1; static bool use_clock; static int clock_fd; static bool only_prefault; @@ -35,6 +36,8 @@ static const struct option options[] = { "available unit: B, MB, GB (upper and lower)"), OPT_STRING('r', "routine", &routine, "default", "Specify routine to copy"), + OPT_INTEGER('i', "iterations", &iterations, + "repeat memcpy() invocation this number of times"), OPT_BOOLEAN('c', "clock", &use_clock, "Use CPU clock for measuring"), OPT_BOOLEAN('o', "only-prefault", &only_prefault, @@ -121,6 +124,7 @@ static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault) { u64 clock_start = 0ULL, clock_end = 0ULL; void *src = NULL, *dst = NULL; + int i; alloc_mem(&src, &dst, len); @@ -128,7 +132,8 @@ static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault) fn(dst, src, len); clock_start = get_clock(); - fn(dst, src, len); + for (i = 0; i < iterations; ++i) + fn(dst, src, len); clock_end = get_clock(); free(src); @@ -140,6 +145,7 @@ static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault) { struct timeval tv_start, tv_end, tv_diff; void *src = NULL, *dst = NULL; + int i; alloc_mem(&src, &dst, len); @@ -147,7 +153,8 @@ static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault) fn(dst, src, len); BUG_ON(gettimeofday(&tv_start, NULL)); - fn(dst, src, len); + for (i = 0; i < iterations; ++i) + fn(dst, src, len); BUG_ON(gettimeofday(&tv_end, NULL)); timersub(&tv_end, &tv_start, &tv_diff); diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c index 9c0c6f0cba9b..59d4933eff44 100644 --- a/tools/perf/bench/mem-memset.c +++ b/tools/perf/bench/mem-memset.c @@ -24,6 +24,7 @@ static const char *length_str = "1MB"; static const char *routine = "default"; +static int iterations = 1; static bool use_clock; static int clock_fd; static bool only_prefault; @@ -35,6 +36,8 @@ static const struct option options[] = { "available unit: B, MB, GB (upper and lower)"), OPT_STRING('r', "routine", &routine, "default", "Specify routine to copy"), + OPT_INTEGER('i', "iterations", &iterations, + "repeat memset() invocation this number of times"), OPT_BOOLEAN('c', "clock", &use_clock, "Use CPU clock for measuring"), OPT_BOOLEAN('o', "only-prefault", &only_prefault, @@ -117,6 +120,7 @@ static u64 do_memset_clock(memset_t fn, size_t len, bool prefault) { u64 clock_start = 0ULL, clock_end = 0ULL; void *dst = NULL; + int i; alloc_mem(&dst, len); @@ -124,7 +128,8 @@ static u64 do_memset_clock(memset_t fn, size_t len, bool prefault) fn(dst, -1, len); clock_start = get_clock(); - fn(dst, 0, len); + for (i = 0; i < iterations; ++i) + fn(dst, i, len); clock_end = get_clock(); free(dst); @@ -135,6 +140,7 @@ static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault) { struct timeval tv_start, tv_end, tv_diff; void *dst = NULL; + int i; alloc_mem(&dst, len); @@ -142,7 +148,8 @@ static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault) fn(dst, -1, len); BUG_ON(gettimeofday(&tv_start, NULL)); - fn(dst, 0, len); + for (i = 0; i < iterations; ++i) + fn(dst, i, len); BUG_ON(gettimeofday(&tv_end, NULL)); timersub(&tv_end, &tv_start, &tv_diff); From 2ef1ea3826434bdebe17b2941356a8f764ff5fcd Mon Sep 17 00:00:00 2001 From: David Daney Date: Tue, 17 Jan 2012 13:41:01 -0800 Subject: [PATCH 7/9] perf tools: Fix broken build by defining _GNU_SOURCE in Makefile When building on my Debian/mips system, util/util.c fails to build because commit 1aed2671738785e8f5aea663a6fda91aa7ef59b5 (perf kvm: Do guest-only counting by default) indirectly includes stdio.h before the feature selection in util.h is done. This prevents _GNU_SOURCE in util.h from enabling the declaration of getline(), from now second inclusion of stdio.h, and the build is broken. There is another breakage in util/evsel.c caused by include ordering, but I didn't fully track down the commit that caused it. The root cause of all this is an inconsistent definition of _GNU_SOURCE, so I move the definition into the Makefile so that it is passed to all invocations of the compiler and used uniformly for all system header files. All other #define and #undef of _GNU_SOURCE are removed as they cause conflicts with the definition passed to the compiler. All the features.h definitions (_LARGEFILE64_SOURCE _FILE_OFFSET_BITS=64 and _GNU_SOURCE) are needed by the python glue code too, so they are moved to BASIC_CFLAGS, and the misleading comments about BASIC_CFLAGS are removed. This gives me a clean build on x86_64 (fc12) and mips (Debian). Cc: David Daney Cc: Ingo Molnar Cc: Joerg Roedel Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1326836461-11952-1-git-send-email-ddaney.cavm@gmail.com Signed-off-by: David Daney Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 7 ++----- tools/perf/builtin-probe.c | 2 -- tools/perf/util/probe-event.c | 2 -- tools/perf/util/symbol.c | 1 - tools/perf/util/trace-event-parse.c | 3 +-- tools/perf/util/ui/browsers/hists.c | 2 -- tools/perf/util/ui/helpline.c | 1 - tools/perf/util/util.h | 1 - 8 files changed, 3 insertions(+), 16 deletions(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 599031ac69ac..d64f5819a380 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -104,7 +104,7 @@ endif CFLAGS = -fno-omit-frame-pointer -ggdb3 -Wall -Wextra -std=gnu99 $(CFLAGS_WERROR) $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) EXTLIBS = -lpthread -lrt -lelf -lm -ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 +ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE ALL_LDFLAGS = $(LDFLAGS) STRIP ?= strip @@ -168,10 +168,7 @@ endif ### --- END CONFIGURATION SECTION --- -# Those must not be GNU-specific; they are shared with perl/ which may -# be built by a different compiler. (Note that this is an artifact now -# but it still might be nice to keep that distinction.) -BASIC_CFLAGS = -Iutil/include -Iarch/$(ARCH)/include +BASIC_CFLAGS = -Iutil/include -Iarch/$(ARCH)/include -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE BASIC_LDFLAGS = # Guard against environment variables diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 59d43abfbfec..fb8566181f27 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -20,7 +20,6 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */ -#define _GNU_SOURCE #include #include #include @@ -31,7 +30,6 @@ #include #include -#undef _GNU_SOURCE #include "perf.h" #include "builtin.h" #include "util/util.h" diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index eb25900e2211..29cb65459811 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -19,7 +19,6 @@ * */ -#define _GNU_SOURCE #include #include #include @@ -33,7 +32,6 @@ #include #include -#undef _GNU_SOURCE #include "util.h" #include "event.h" #include "string.h" diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 215d50f2042e..0975438c3e72 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1,4 +1,3 @@ -#define _GNU_SOURCE #include #include #include diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 6c164dc9ee95..1a8d4dc4f386 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -21,14 +21,13 @@ * The parts for function graph printing was taken and modified from the * Linux Kernel that were written by Frederic Weisbecker. */ -#define _GNU_SOURCE + #include #include #include #include #include -#undef _GNU_SOURCE #include "../perf.h" #include "util.h" #include "trace-event.h" diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c index 7b6669d1f11f..bfba0490c098 100644 --- a/tools/perf/util/ui/browsers/hists.c +++ b/tools/perf/util/ui/browsers/hists.c @@ -1,6 +1,4 @@ -#define _GNU_SOURCE #include -#undef _GNU_SOURCE #include "../libslang.h" #include #include diff --git a/tools/perf/util/ui/helpline.c b/tools/perf/util/ui/helpline.c index 6ef3c5691762..4f48f5901b30 100644 --- a/tools/perf/util/ui/helpline.c +++ b/tools/perf/util/ui/helpline.c @@ -1,4 +1,3 @@ -#define _GNU_SOURCE #include #include #include diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 061dbf8c038d..232d17ef3e60 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -40,7 +40,6 @@ #define decimal_length(x) ((int)(sizeof(x) * 2.56 + 0.5) + 1) #define _ALL_SOURCE 1 -#define _GNU_SOURCE 1 #define _BSD_SOURCE 1 #define HAS_BOOL From a844d1ef09872a8b8c66d431edd1b8a943e51c7a Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Fri, 20 Jan 2012 17:43:54 +0530 Subject: [PATCH 8/9] perf probe: Usability fixes Ingo pointed out few perf probe usability related errors during his review of uprobes. Since these issues are independent of uprobes, fixing them in a separate patch. Suggested-by: Ingo Molnar Acked-by: Masami Hiramatsu Cc: Masami Hiramatsu Link: http://lkml.kernel.org/r/20120120121354.GL15447@linux.vnet.ibm.com Signed-off-by: Srikar Dronamraju Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 29cb65459811..b9bbdd236357 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1729,7 +1729,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, } ret = 0; - printf("Add new event%s\n", (ntevs > 1) ? "s:" : ":"); + printf("Added new event%s\n", (ntevs > 1) ? "s:" : ":"); for (i = 0; i < ntevs; i++) { tev = &tevs[i]; if (pev->event) @@ -1784,7 +1784,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, if (ret >= 0) { /* Show how to use the event. */ - printf("\nYou can now use it on all perf tools, such as:\n\n"); + printf("\nYou can now use it in all perf tools, such as:\n\n"); printf("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group, tev->event); } @@ -1959,7 +1959,7 @@ static int __del_trace_probe_event(int fd, struct str_node *ent) goto error; } - printf("Remove event: %s\n", ent->s); + printf("Removed event: %s\n", ent->s); return 0; error: pr_warning("Failed to delete event: %s\n", strerror(-ret)); From f8f4b2872295dca88339ec0c403b2217b1197353 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Fri, 20 Jan 2012 10:49:12 +0100 Subject: [PATCH 9/9] perf tools: Fix strlen() bug in perf_event__synthesize_event_type() The event_type record has a max length for the event name. It's called MAX_EVENT_NAME. The name may be truncated to fit the max length. But the header.size still reflects the original name length. If that length is > MAX_EVENT_NAME, then the header.size field is bogus. Fix this by using the length of the name after the potential truncation. Cc: David Ahern Cc: Ingo Molnar Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120120094912.GA4882@quad Signed-off-by: Stephane Eranian Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 3e7e0b09c12c..ecd7f4dd7eea 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2105,7 +2105,7 @@ int perf_event__synthesize_event_type(struct perf_tool *tool, strncpy(ev.event_type.event_type.name, name, MAX_EVENT_NAME - 1); ev.event_type.header.type = PERF_RECORD_HEADER_EVENT_TYPE; - size = strlen(name); + size = strlen(ev.event_type.event_type.name); size = ALIGN(size, sizeof(u64)); ev.event_type.header.size = sizeof(ev.event_type) - (sizeof(ev.event_type.event_type.name) - size);