Merge branch 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: perf_counter: Start documenting HAVE_PERF_COUNTERS requirements perf_counter: Add forward/backward attribute ABI compatibility perf record: Explicity program a default counter perf_counter: Remove PERF_TYPE_RAW special casing perf_counter: PERF_TYPE_HW_CACHE is a hardware counter too powerpc, perf_counter: Fix performance counter event types perf_counter/x86: Add a quirk for Atom processors perf_counter tools: Remove one L1-data alias
This commit is contained in:
Коммит
4ddbac9898
|
@ -294,12 +294,12 @@ static void power7_disable_pmc(unsigned int pmc, u64 mmcr[])
|
||||||
}
|
}
|
||||||
|
|
||||||
static int power7_generic_events[] = {
|
static int power7_generic_events[] = {
|
||||||
[PERF_COUNT_CPU_CYCLES] = 0x1e,
|
[PERF_COUNT_HW_CPU_CYCLES] = 0x1e,
|
||||||
[PERF_COUNT_INSTRUCTIONS] = 2,
|
[PERF_COUNT_HW_INSTRUCTIONS] = 2,
|
||||||
[PERF_COUNT_CACHE_REFERENCES] = 0xc880, /* LD_REF_L1_LSU */
|
[PERF_COUNT_HW_CACHE_REFERENCES] = 0xc880, /* LD_REF_L1_LSU*/
|
||||||
[PERF_COUNT_CACHE_MISSES] = 0x400f0, /* LD_MISS_L1 */
|
[PERF_COUNT_HW_CACHE_MISSES] = 0x400f0, /* LD_MISS_L1 */
|
||||||
[PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x10068, /* BRU_FIN */
|
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x10068, /* BRU_FIN */
|
||||||
[PERF_COUNT_BRANCH_MISSES] = 0x400f6, /* BR_MPRED */
|
[PERF_COUNT_HW_BRANCH_MISSES] = 0x400f6, /* BR_MPRED */
|
||||||
};
|
};
|
||||||
|
|
||||||
#define C(x) PERF_COUNT_HW_CACHE_##x
|
#define C(x) PERF_COUNT_HW_CACHE_##x
|
||||||
|
|
|
@ -968,6 +968,13 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
|
||||||
if (!x86_pmu.num_counters_fixed)
|
if (!x86_pmu.num_counters_fixed)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Quirk, IA32_FIXED_CTRs do not work on current Atom processors:
|
||||||
|
*/
|
||||||
|
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
|
||||||
|
boot_cpu_data.x86_model == 28)
|
||||||
|
return -1;
|
||||||
|
|
||||||
event = hwc->config & ARCH_PERFMON_EVENT_MASK;
|
event = hwc->config & ARCH_PERFMON_EVENT_MASK;
|
||||||
|
|
||||||
if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
|
if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
|
||||||
|
|
|
@ -120,6 +120,8 @@ enum perf_counter_sample_format {
|
||||||
PERF_SAMPLE_ID = 1U << 6,
|
PERF_SAMPLE_ID = 1U << 6,
|
||||||
PERF_SAMPLE_CPU = 1U << 7,
|
PERF_SAMPLE_CPU = 1U << 7,
|
||||||
PERF_SAMPLE_PERIOD = 1U << 8,
|
PERF_SAMPLE_PERIOD = 1U << 8,
|
||||||
|
|
||||||
|
PERF_SAMPLE_MAX = 1U << 9, /* non-ABI */
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -131,17 +133,26 @@ enum perf_counter_read_format {
|
||||||
PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0,
|
PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0,
|
||||||
PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1,
|
PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1,
|
||||||
PERF_FORMAT_ID = 1U << 2,
|
PERF_FORMAT_ID = 1U << 2,
|
||||||
|
|
||||||
|
PERF_FORMAT_MAX = 1U << 3, /* non-ABI */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Hardware event to monitor via a performance monitoring counter:
|
* Hardware event to monitor via a performance monitoring counter:
|
||||||
*/
|
*/
|
||||||
struct perf_counter_attr {
|
struct perf_counter_attr {
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Major type: hardware/software/tracepoint/etc.
|
* Major type: hardware/software/tracepoint/etc.
|
||||||
*/
|
*/
|
||||||
__u32 type;
|
__u32 type;
|
||||||
__u32 __reserved_1;
|
|
||||||
|
/*
|
||||||
|
* Size of the attr structure, for fwd/bwd compat.
|
||||||
|
*/
|
||||||
|
__u32 size;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Type specific configuration information.
|
* Type specific configuration information.
|
||||||
|
@ -168,12 +179,12 @@ struct perf_counter_attr {
|
||||||
comm : 1, /* include comm data */
|
comm : 1, /* include comm data */
|
||||||
freq : 1, /* use freq, not period */
|
freq : 1, /* use freq, not period */
|
||||||
|
|
||||||
__reserved_2 : 53;
|
__reserved_1 : 53;
|
||||||
|
|
||||||
__u32 wakeup_events; /* wakeup every n events */
|
__u32 wakeup_events; /* wakeup every n events */
|
||||||
__u32 __reserved_3;
|
__u32 __reserved_2;
|
||||||
|
|
||||||
__u64 __reserved_4;
|
__u64 __reserved_3;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -621,7 +632,8 @@ extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
|
||||||
static inline int is_software_counter(struct perf_counter *counter)
|
static inline int is_software_counter(struct perf_counter *counter)
|
||||||
{
|
{
|
||||||
return (counter->attr.type != PERF_TYPE_RAW) &&
|
return (counter->attr.type != PERF_TYPE_RAW) &&
|
||||||
(counter->attr.type != PERF_TYPE_HARDWARE);
|
(counter->attr.type != PERF_TYPE_HARDWARE) &&
|
||||||
|
(counter->attr.type != PERF_TYPE_HW_CACHE);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64);
|
extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64);
|
||||||
|
|
|
@ -758,6 +758,6 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
|
||||||
|
|
||||||
|
|
||||||
asmlinkage long sys_perf_counter_open(
|
asmlinkage long sys_perf_counter_open(
|
||||||
const struct perf_counter_attr __user *attr_uptr,
|
struct perf_counter_attr __user *attr_uptr,
|
||||||
pid_t pid, int cpu, int group_fd, unsigned long flags);
|
pid_t pid, int cpu, int group_fd, unsigned long flags);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -936,6 +936,8 @@ config AIO
|
||||||
|
|
||||||
config HAVE_PERF_COUNTERS
|
config HAVE_PERF_COUNTERS
|
||||||
bool
|
bool
|
||||||
|
help
|
||||||
|
See tools/perf/design.txt for details.
|
||||||
|
|
||||||
menu "Performance Counters"
|
menu "Performance Counters"
|
||||||
|
|
||||||
|
|
|
@ -3570,12 +3570,8 @@ perf_counter_alloc(struct perf_counter_attr *attr,
|
||||||
if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP))
|
if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP))
|
||||||
goto done;
|
goto done;
|
||||||
|
|
||||||
if (attr->type == PERF_TYPE_RAW) {
|
|
||||||
pmu = hw_perf_counter_init(counter);
|
|
||||||
goto done;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (attr->type) {
|
switch (attr->type) {
|
||||||
|
case PERF_TYPE_RAW:
|
||||||
case PERF_TYPE_HARDWARE:
|
case PERF_TYPE_HARDWARE:
|
||||||
case PERF_TYPE_HW_CACHE:
|
case PERF_TYPE_HW_CACHE:
|
||||||
pmu = hw_perf_counter_init(counter);
|
pmu = hw_perf_counter_init(counter);
|
||||||
|
@ -3588,6 +3584,9 @@ perf_counter_alloc(struct perf_counter_attr *attr,
|
||||||
case PERF_TYPE_TRACEPOINT:
|
case PERF_TYPE_TRACEPOINT:
|
||||||
pmu = tp_perf_counter_init(counter);
|
pmu = tp_perf_counter_init(counter);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
done:
|
done:
|
||||||
err = 0;
|
err = 0;
|
||||||
|
@ -3614,6 +3613,85 @@ done:
|
||||||
return counter;
|
return counter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int perf_copy_attr(struct perf_counter_attr __user *uattr,
|
||||||
|
struct perf_counter_attr *attr)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
u32 size;
|
||||||
|
|
||||||
|
if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0))
|
||||||
|
return -EFAULT;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* zero the full structure, so that a short copy will be nice.
|
||||||
|
*/
|
||||||
|
memset(attr, 0, sizeof(*attr));
|
||||||
|
|
||||||
|
ret = get_user(size, &uattr->size);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
if (size > PAGE_SIZE) /* silly large */
|
||||||
|
goto err_size;
|
||||||
|
|
||||||
|
if (!size) /* abi compat */
|
||||||
|
size = PERF_ATTR_SIZE_VER0;
|
||||||
|
|
||||||
|
if (size < PERF_ATTR_SIZE_VER0)
|
||||||
|
goto err_size;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we're handed a bigger struct than we know of,
|
||||||
|
* ensure all the unknown bits are 0.
|
||||||
|
*/
|
||||||
|
if (size > sizeof(*attr)) {
|
||||||
|
unsigned long val;
|
||||||
|
unsigned long __user *addr;
|
||||||
|
unsigned long __user *end;
|
||||||
|
|
||||||
|
addr = PTR_ALIGN((void __user *)uattr + sizeof(*attr),
|
||||||
|
sizeof(unsigned long));
|
||||||
|
end = PTR_ALIGN((void __user *)uattr + size,
|
||||||
|
sizeof(unsigned long));
|
||||||
|
|
||||||
|
for (; addr < end; addr += sizeof(unsigned long)) {
|
||||||
|
ret = get_user(val, addr);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
if (val)
|
||||||
|
goto err_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = copy_from_user(attr, uattr, size);
|
||||||
|
if (ret)
|
||||||
|
return -EFAULT;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the type exists, the corresponding creation will verify
|
||||||
|
* the attr->config.
|
||||||
|
*/
|
||||||
|
if (attr->type >= PERF_TYPE_MAX)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (attr->read_format & ~(PERF_FORMAT_MAX-1))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
out:
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
err_size:
|
||||||
|
put_user(sizeof(*attr), &uattr->size);
|
||||||
|
ret = -E2BIG;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* sys_perf_counter_open - open a performance counter, associate it to a task/cpu
|
* sys_perf_counter_open - open a performance counter, associate it to a task/cpu
|
||||||
*
|
*
|
||||||
|
@ -3623,7 +3701,7 @@ done:
|
||||||
* @group_fd: group leader counter fd
|
* @group_fd: group leader counter fd
|
||||||
*/
|
*/
|
||||||
SYSCALL_DEFINE5(perf_counter_open,
|
SYSCALL_DEFINE5(perf_counter_open,
|
||||||
const struct perf_counter_attr __user *, attr_uptr,
|
struct perf_counter_attr __user *, attr_uptr,
|
||||||
pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
|
pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
|
||||||
{
|
{
|
||||||
struct perf_counter *counter, *group_leader;
|
struct perf_counter *counter, *group_leader;
|
||||||
|
@ -3639,8 +3717,9 @@ SYSCALL_DEFINE5(perf_counter_open,
|
||||||
if (flags)
|
if (flags)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (copy_from_user(&attr, attr_uptr, sizeof(attr)) != 0)
|
ret = perf_copy_attr(attr_uptr, &attr);
|
||||||
return -EFAULT;
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
if (!attr.exclude_kernel) {
|
if (!attr.exclude_kernel) {
|
||||||
if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
|
if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
|
||||||
|
|
|
@ -568,8 +568,11 @@ int cmd_record(int argc, const char **argv, const char *prefix)
|
||||||
if (!argc && target_pid == -1 && !system_wide)
|
if (!argc && target_pid == -1 && !system_wide)
|
||||||
usage_with_options(record_usage, options);
|
usage_with_options(record_usage, options);
|
||||||
|
|
||||||
if (!nr_counters)
|
if (!nr_counters) {
|
||||||
nr_counters = 1;
|
nr_counters = 1;
|
||||||
|
attrs[0].type = PERF_TYPE_HARDWARE;
|
||||||
|
attrs[0].config = PERF_COUNT_HW_CPU_CYCLES;
|
||||||
|
}
|
||||||
|
|
||||||
for (counter = 0; counter < nr_counters; counter++) {
|
for (counter = 0; counter < nr_counters; counter++) {
|
||||||
if (attrs[counter].sample_period)
|
if (attrs[counter].sample_period)
|
||||||
|
|
|
@ -440,3 +440,18 @@ by this process or by another, and doesn't affect any counters that
|
||||||
this process has created on other processes. It only enables or
|
this process has created on other processes. It only enables or
|
||||||
disables the group leaders, not any other members in the groups.
|
disables the group leaders, not any other members in the groups.
|
||||||
|
|
||||||
|
|
||||||
|
Arch requirements
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
If your architecture does not have hardware performance metrics, you can
|
||||||
|
still use the generic software counters based on hrtimers for sampling.
|
||||||
|
|
||||||
|
So to start with, in order to add HAVE_PERF_COUNTERS to your Kconfig, you
|
||||||
|
will need at least this:
|
||||||
|
- asm/perf_counter.h - a basic stub will suffice at first
|
||||||
|
- support for atomic64 types (and associated helper functions)
|
||||||
|
- set_perf_counter_pending() implemented
|
||||||
|
|
||||||
|
If your architecture does have hardware capabilities, you can override the
|
||||||
|
weak stub hw_perf_counter_init() to register hardware counters.
|
||||||
|
|
|
@ -53,11 +53,12 @@ static inline unsigned long long rdclock(void)
|
||||||
_min1 < _min2 ? _min1 : _min2; })
|
_min1 < _min2 ? _min1 : _min2; })
|
||||||
|
|
||||||
static inline int
|
static inline int
|
||||||
sys_perf_counter_open(struct perf_counter_attr *attr_uptr,
|
sys_perf_counter_open(struct perf_counter_attr *attr,
|
||||||
pid_t pid, int cpu, int group_fd,
|
pid_t pid, int cpu, int group_fd,
|
||||||
unsigned long flags)
|
unsigned long flags)
|
||||||
{
|
{
|
||||||
return syscall(__NR_perf_counter_open, attr_uptr, pid, cpu,
|
attr->size = sizeof(*attr);
|
||||||
|
return syscall(__NR_perf_counter_open, attr, pid, cpu,
|
||||||
group_fd, flags);
|
group_fd, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -75,7 +75,7 @@ static char *sw_event_names[] = {
|
||||||
#define MAX_ALIASES 8
|
#define MAX_ALIASES 8
|
||||||
|
|
||||||
static char *hw_cache [][MAX_ALIASES] = {
|
static char *hw_cache [][MAX_ALIASES] = {
|
||||||
{ "L1-data" , "l1-d", "l1d", "l1" },
|
{ "L1-data" , "l1-d", "l1d" },
|
||||||
{ "L1-instruction" , "l1-i", "l1i" },
|
{ "L1-instruction" , "l1-i", "l1i" },
|
||||||
{ "L2" , "l2" },
|
{ "L2" , "l2" },
|
||||||
{ "Data-TLB" , "dtlb", "d-tlb" },
|
{ "Data-TLB" , "dtlb", "d-tlb" },
|
||||||
|
|
Загрузка…
Ссылка в новой задаче