Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Ingo Molnar: "x86 PMU driver fixes plus a core code race fix" * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf/x86/intel: Fix incorrect lbr_sel_mask value perf/x86/intel/pt: Don't die on VMXON perf/core: Fix perf_event_open() vs. execve() race perf/x86/amd: Set the size of event map array to PERF_COUNT_HW_MAX perf/core: Make sysctl_perf_cpu_time_max_percent conform to documentation perf/x86/intel/rapl: Add missing Haswell model perf/x86/intel: Add model number for Skylake Server to perf
This commit is contained in:
Коммит
814dd9481d
|
@ -115,7 +115,7 @@ static __initconst const u64 amd_hw_cache_event_ids
|
||||||
/*
|
/*
|
||||||
* AMD Performance Monitor K7 and later.
|
* AMD Performance Monitor K7 and later.
|
||||||
*/
|
*/
|
||||||
static const u64 amd_perfmon_event_map[] =
|
static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
|
||||||
{
|
{
|
||||||
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
|
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
|
||||||
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
|
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
|
||||||
|
|
|
@ -3639,6 +3639,7 @@ __init int intel_pmu_init(void)
|
||||||
|
|
||||||
case 78: /* 14nm Skylake Mobile */
|
case 78: /* 14nm Skylake Mobile */
|
||||||
case 94: /* 14nm Skylake Desktop */
|
case 94: /* 14nm Skylake Desktop */
|
||||||
|
case 85: /* 14nm Skylake Server */
|
||||||
x86_pmu.late_ack = true;
|
x86_pmu.late_ack = true;
|
||||||
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
|
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
|
||||||
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
|
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
|
||||||
|
|
|
@ -63,7 +63,7 @@ static enum {
|
||||||
|
|
||||||
#define LBR_PLM (LBR_KERNEL | LBR_USER)
|
#define LBR_PLM (LBR_KERNEL | LBR_USER)
|
||||||
|
|
||||||
#define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */
|
#define LBR_SEL_MASK 0x3ff /* valid bits in LBR_SELECT */
|
||||||
#define LBR_NOT_SUPP -1 /* LBR filter not supported */
|
#define LBR_NOT_SUPP -1 /* LBR filter not supported */
|
||||||
#define LBR_IGN 0 /* ignored */
|
#define LBR_IGN 0 /* ignored */
|
||||||
|
|
||||||
|
@ -610,8 +610,10 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
|
||||||
* The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate
|
* The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate
|
||||||
* in suppress mode. So LBR_SELECT should be set to
|
* in suppress mode. So LBR_SELECT should be set to
|
||||||
* (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK)
|
* (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK)
|
||||||
|
* But the 10th bit LBR_CALL_STACK does not operate
|
||||||
|
* in suppress mode.
|
||||||
*/
|
*/
|
||||||
reg->config = mask ^ x86_pmu.lbr_sel_mask;
|
reg->config = mask ^ (x86_pmu.lbr_sel_mask & ~LBR_CALL_STACK);
|
||||||
|
|
||||||
if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
|
if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
|
||||||
(br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
|
(br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
|
||||||
|
|
|
@ -136,9 +136,21 @@ static int __init pt_pmu_hw_init(void)
|
||||||
struct dev_ext_attribute *de_attrs;
|
struct dev_ext_attribute *de_attrs;
|
||||||
struct attribute **attrs;
|
struct attribute **attrs;
|
||||||
size_t size;
|
size_t size;
|
||||||
|
u64 reg;
|
||||||
int ret;
|
int ret;
|
||||||
long i;
|
long i;
|
||||||
|
|
||||||
|
if (boot_cpu_has(X86_FEATURE_VMX)) {
|
||||||
|
/*
|
||||||
|
* Intel SDM, 36.5 "Tracing post-VMXON" says that
|
||||||
|
* "IA32_VMX_MISC[bit 14]" being 1 means PT can trace
|
||||||
|
* post-VMXON.
|
||||||
|
*/
|
||||||
|
rdmsrl(MSR_IA32_VMX_MISC, reg);
|
||||||
|
if (reg & BIT(14))
|
||||||
|
pt_pmu.vmx = true;
|
||||||
|
}
|
||||||
|
|
||||||
attrs = NULL;
|
attrs = NULL;
|
||||||
|
|
||||||
for (i = 0; i < PT_CPUID_LEAVES; i++) {
|
for (i = 0; i < PT_CPUID_LEAVES; i++) {
|
||||||
|
@ -269,20 +281,23 @@ static void pt_config(struct perf_event *event)
|
||||||
|
|
||||||
reg |= (event->attr.config & PT_CONFIG_MASK);
|
reg |= (event->attr.config & PT_CONFIG_MASK);
|
||||||
|
|
||||||
|
event->hw.config = reg;
|
||||||
wrmsrl(MSR_IA32_RTIT_CTL, reg);
|
wrmsrl(MSR_IA32_RTIT_CTL, reg);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void pt_config_start(bool start)
|
static void pt_config_stop(struct perf_event *event)
|
||||||
{
|
{
|
||||||
u64 ctl;
|
u64 ctl = READ_ONCE(event->hw.config);
|
||||||
|
|
||||||
|
/* may be already stopped by a PMI */
|
||||||
|
if (!(ctl & RTIT_CTL_TRACEEN))
|
||||||
|
return;
|
||||||
|
|
||||||
rdmsrl(MSR_IA32_RTIT_CTL, ctl);
|
|
||||||
if (start)
|
|
||||||
ctl |= RTIT_CTL_TRACEEN;
|
|
||||||
else
|
|
||||||
ctl &= ~RTIT_CTL_TRACEEN;
|
ctl &= ~RTIT_CTL_TRACEEN;
|
||||||
wrmsrl(MSR_IA32_RTIT_CTL, ctl);
|
wrmsrl(MSR_IA32_RTIT_CTL, ctl);
|
||||||
|
|
||||||
|
WRITE_ONCE(event->hw.config, ctl);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A wrmsr that disables trace generation serializes other PT
|
* A wrmsr that disables trace generation serializes other PT
|
||||||
* registers and causes all data packets to be written to memory,
|
* registers and causes all data packets to be written to memory,
|
||||||
|
@ -291,7 +306,6 @@ static void pt_config_start(bool start)
|
||||||
* The below WMB, separating data store and aux_head store matches
|
* The below WMB, separating data store and aux_head store matches
|
||||||
* the consumer's RMB that separates aux_head load and data load.
|
* the consumer's RMB that separates aux_head load and data load.
|
||||||
*/
|
*/
|
||||||
if (!start)
|
|
||||||
wmb();
|
wmb();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -942,11 +956,17 @@ void intel_pt_interrupt(void)
|
||||||
if (!ACCESS_ONCE(pt->handle_nmi))
|
if (!ACCESS_ONCE(pt->handle_nmi))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
pt_config_start(false);
|
/*
|
||||||
|
* If VMX is on and PT does not support it, don't touch anything.
|
||||||
|
*/
|
||||||
|
if (READ_ONCE(pt->vmx_on))
|
||||||
|
return;
|
||||||
|
|
||||||
if (!event)
|
if (!event)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
pt_config_stop(event);
|
||||||
|
|
||||||
buf = perf_get_aux(&pt->handle);
|
buf = perf_get_aux(&pt->handle);
|
||||||
if (!buf)
|
if (!buf)
|
||||||
return;
|
return;
|
||||||
|
@ -983,6 +1003,35 @@ void intel_pt_interrupt(void)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void intel_pt_handle_vmx(int on)
|
||||||
|
{
|
||||||
|
struct pt *pt = this_cpu_ptr(&pt_ctx);
|
||||||
|
struct perf_event *event;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
/* PT plays nice with VMX, do nothing */
|
||||||
|
if (pt_pmu.vmx)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* VMXON will clear RTIT_CTL.TraceEn; we need to make
|
||||||
|
* sure to not try to set it while VMX is on. Disable
|
||||||
|
* interrupts to avoid racing with pmu callbacks;
|
||||||
|
* concurrent PMI should be handled fine.
|
||||||
|
*/
|
||||||
|
local_irq_save(flags);
|
||||||
|
WRITE_ONCE(pt->vmx_on, on);
|
||||||
|
|
||||||
|
if (on) {
|
||||||
|
/* prevent pt_config_stop() from writing RTIT_CTL */
|
||||||
|
event = pt->handle.event;
|
||||||
|
if (event)
|
||||||
|
event->hw.config = 0;
|
||||||
|
}
|
||||||
|
local_irq_restore(flags);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(intel_pt_handle_vmx);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* PMU callbacks
|
* PMU callbacks
|
||||||
*/
|
*/
|
||||||
|
@ -992,6 +1041,9 @@ static void pt_event_start(struct perf_event *event, int mode)
|
||||||
struct pt *pt = this_cpu_ptr(&pt_ctx);
|
struct pt *pt = this_cpu_ptr(&pt_ctx);
|
||||||
struct pt_buffer *buf = perf_get_aux(&pt->handle);
|
struct pt_buffer *buf = perf_get_aux(&pt->handle);
|
||||||
|
|
||||||
|
if (READ_ONCE(pt->vmx_on))
|
||||||
|
return;
|
||||||
|
|
||||||
if (!buf || pt_buffer_is_full(buf, pt)) {
|
if (!buf || pt_buffer_is_full(buf, pt)) {
|
||||||
event->hw.state = PERF_HES_STOPPED;
|
event->hw.state = PERF_HES_STOPPED;
|
||||||
return;
|
return;
|
||||||
|
@ -1014,7 +1066,8 @@ static void pt_event_stop(struct perf_event *event, int mode)
|
||||||
* see comment in intel_pt_interrupt().
|
* see comment in intel_pt_interrupt().
|
||||||
*/
|
*/
|
||||||
ACCESS_ONCE(pt->handle_nmi) = 0;
|
ACCESS_ONCE(pt->handle_nmi) = 0;
|
||||||
pt_config_start(false);
|
|
||||||
|
pt_config_stop(event);
|
||||||
|
|
||||||
if (event->hw.state == PERF_HES_STOPPED)
|
if (event->hw.state == PERF_HES_STOPPED)
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -65,6 +65,7 @@ enum pt_capabilities {
|
||||||
struct pt_pmu {
|
struct pt_pmu {
|
||||||
struct pmu pmu;
|
struct pmu pmu;
|
||||||
u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
|
u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
|
||||||
|
bool vmx;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -107,10 +108,12 @@ struct pt_buffer {
|
||||||
* struct pt - per-cpu pt context
|
* struct pt - per-cpu pt context
|
||||||
* @handle: perf output handle
|
* @handle: perf output handle
|
||||||
* @handle_nmi: do handle PT PMI on this cpu, there's an active event
|
* @handle_nmi: do handle PT PMI on this cpu, there's an active event
|
||||||
|
* @vmx_on: 1 if VMX is ON on this cpu
|
||||||
*/
|
*/
|
||||||
struct pt {
|
struct pt {
|
||||||
struct perf_output_handle handle;
|
struct perf_output_handle handle;
|
||||||
int handle_nmi;
|
int handle_nmi;
|
||||||
|
int vmx_on;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* __INTEL_PT_H__ */
|
#endif /* __INTEL_PT_H__ */
|
||||||
|
|
|
@ -718,6 +718,7 @@ static int __init rapl_pmu_init(void)
|
||||||
break;
|
break;
|
||||||
case 60: /* Haswell */
|
case 60: /* Haswell */
|
||||||
case 69: /* Haswell-Celeron */
|
case 69: /* Haswell-Celeron */
|
||||||
|
case 70: /* Haswell GT3e */
|
||||||
case 61: /* Broadwell */
|
case 61: /* Broadwell */
|
||||||
case 71: /* Broadwell-H */
|
case 71: /* Broadwell-H */
|
||||||
rapl_cntr_mask = RAPL_IDX_HSW;
|
rapl_cntr_mask = RAPL_IDX_HSW;
|
||||||
|
|
|
@ -285,6 +285,10 @@ static inline void perf_events_lapic_init(void) { }
|
||||||
static inline void perf_check_microcode(void) { }
|
static inline void perf_check_microcode(void) { }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_CPU_SUP_INTEL
|
||||||
|
extern void intel_pt_handle_vmx(int on);
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
|
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
|
||||||
extern void amd_pmu_enable_virt(void);
|
extern void amd_pmu_enable_virt(void);
|
||||||
extern void amd_pmu_disable_virt(void);
|
extern void amd_pmu_disable_virt(void);
|
||||||
|
|
|
@ -3103,6 +3103,8 @@ static __init int vmx_disabled_by_bios(void)
|
||||||
|
|
||||||
static void kvm_cpu_vmxon(u64 addr)
|
static void kvm_cpu_vmxon(u64 addr)
|
||||||
{
|
{
|
||||||
|
intel_pt_handle_vmx(1);
|
||||||
|
|
||||||
asm volatile (ASM_VMX_VMXON_RAX
|
asm volatile (ASM_VMX_VMXON_RAX
|
||||||
: : "a"(&addr), "m"(addr)
|
: : "a"(&addr), "m"(addr)
|
||||||
: "memory", "cc");
|
: "memory", "cc");
|
||||||
|
@ -3172,6 +3174,8 @@ static void vmclear_local_loaded_vmcss(void)
|
||||||
static void kvm_cpu_vmxoff(void)
|
static void kvm_cpu_vmxoff(void)
|
||||||
{
|
{
|
||||||
asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
|
asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
|
||||||
|
|
||||||
|
intel_pt_handle_vmx(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void hardware_disable(void)
|
static void hardware_disable(void)
|
||||||
|
|
|
@ -412,7 +412,8 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
|
||||||
if (ret || !write)
|
if (ret || !write)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
if (sysctl_perf_cpu_time_max_percent == 100) {
|
if (sysctl_perf_cpu_time_max_percent == 100 ||
|
||||||
|
sysctl_perf_cpu_time_max_percent == 0) {
|
||||||
printk(KERN_WARNING
|
printk(KERN_WARNING
|
||||||
"perf: Dynamic interrupt throttling disabled, can hang your system!\n");
|
"perf: Dynamic interrupt throttling disabled, can hang your system!\n");
|
||||||
WRITE_ONCE(perf_sample_allowed_ns, 0);
|
WRITE_ONCE(perf_sample_allowed_ns, 0);
|
||||||
|
@ -1105,6 +1106,7 @@ static void put_ctx(struct perf_event_context *ctx)
|
||||||
* function.
|
* function.
|
||||||
*
|
*
|
||||||
* Lock order:
|
* Lock order:
|
||||||
|
* cred_guard_mutex
|
||||||
* task_struct::perf_event_mutex
|
* task_struct::perf_event_mutex
|
||||||
* perf_event_context::mutex
|
* perf_event_context::mutex
|
||||||
* perf_event::child_mutex;
|
* perf_event::child_mutex;
|
||||||
|
@ -3420,7 +3422,6 @@ static struct task_struct *
|
||||||
find_lively_task_by_vpid(pid_t vpid)
|
find_lively_task_by_vpid(pid_t vpid)
|
||||||
{
|
{
|
||||||
struct task_struct *task;
|
struct task_struct *task;
|
||||||
int err;
|
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
if (!vpid)
|
if (!vpid)
|
||||||
|
@ -3434,16 +3435,7 @@ find_lively_task_by_vpid(pid_t vpid)
|
||||||
if (!task)
|
if (!task)
|
||||||
return ERR_PTR(-ESRCH);
|
return ERR_PTR(-ESRCH);
|
||||||
|
|
||||||
/* Reuse ptrace permission checks for now. */
|
|
||||||
err = -EACCES;
|
|
||||||
if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
|
|
||||||
goto errout;
|
|
||||||
|
|
||||||
return task;
|
return task;
|
||||||
errout:
|
|
||||||
put_task_struct(task);
|
|
||||||
return ERR_PTR(err);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -8413,6 +8405,24 @@ SYSCALL_DEFINE5(perf_event_open,
|
||||||
|
|
||||||
get_online_cpus();
|
get_online_cpus();
|
||||||
|
|
||||||
|
if (task) {
|
||||||
|
err = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
|
||||||
|
if (err)
|
||||||
|
goto err_cpus;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reuse ptrace permission checks for now.
|
||||||
|
*
|
||||||
|
* We must hold cred_guard_mutex across this and any potential
|
||||||
|
* perf_install_in_context() call for this new event to
|
||||||
|
* serialize against exec() altering our credentials (and the
|
||||||
|
* perf_event_exit_task() that could imply).
|
||||||
|
*/
|
||||||
|
err = -EACCES;
|
||||||
|
if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
|
||||||
|
goto err_cred;
|
||||||
|
}
|
||||||
|
|
||||||
if (flags & PERF_FLAG_PID_CGROUP)
|
if (flags & PERF_FLAG_PID_CGROUP)
|
||||||
cgroup_fd = pid;
|
cgroup_fd = pid;
|
||||||
|
|
||||||
|
@ -8420,7 +8430,7 @@ SYSCALL_DEFINE5(perf_event_open,
|
||||||
NULL, NULL, cgroup_fd);
|
NULL, NULL, cgroup_fd);
|
||||||
if (IS_ERR(event)) {
|
if (IS_ERR(event)) {
|
||||||
err = PTR_ERR(event);
|
err = PTR_ERR(event);
|
||||||
goto err_cpus;
|
goto err_cred;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is_sampling_event(event)) {
|
if (is_sampling_event(event)) {
|
||||||
|
@ -8479,11 +8489,6 @@ SYSCALL_DEFINE5(perf_event_open,
|
||||||
goto err_context;
|
goto err_context;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (task) {
|
|
||||||
put_task_struct(task);
|
|
||||||
task = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Look up the group leader (we will attach this event to it):
|
* Look up the group leader (we will attach this event to it):
|
||||||
*/
|
*/
|
||||||
|
@ -8581,6 +8586,11 @@ SYSCALL_DEFINE5(perf_event_open,
|
||||||
|
|
||||||
WARN_ON_ONCE(ctx->parent_ctx);
|
WARN_ON_ONCE(ctx->parent_ctx);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is the point on no return; we cannot fail hereafter. This is
|
||||||
|
* where we start modifying current state.
|
||||||
|
*/
|
||||||
|
|
||||||
if (move_group) {
|
if (move_group) {
|
||||||
/*
|
/*
|
||||||
* See perf_event_ctx_lock() for comments on the details
|
* See perf_event_ctx_lock() for comments on the details
|
||||||
|
@ -8652,6 +8662,11 @@ SYSCALL_DEFINE5(perf_event_open,
|
||||||
mutex_unlock(&gctx->mutex);
|
mutex_unlock(&gctx->mutex);
|
||||||
mutex_unlock(&ctx->mutex);
|
mutex_unlock(&ctx->mutex);
|
||||||
|
|
||||||
|
if (task) {
|
||||||
|
mutex_unlock(&task->signal->cred_guard_mutex);
|
||||||
|
put_task_struct(task);
|
||||||
|
}
|
||||||
|
|
||||||
put_online_cpus();
|
put_online_cpus();
|
||||||
|
|
||||||
mutex_lock(¤t->perf_event_mutex);
|
mutex_lock(¤t->perf_event_mutex);
|
||||||
|
@ -8684,6 +8699,9 @@ err_alloc:
|
||||||
*/
|
*/
|
||||||
if (!event_file)
|
if (!event_file)
|
||||||
free_event(event);
|
free_event(event);
|
||||||
|
err_cred:
|
||||||
|
if (task)
|
||||||
|
mutex_unlock(&task->signal->cred_guard_mutex);
|
||||||
err_cpus:
|
err_cpus:
|
||||||
put_online_cpus();
|
put_online_cpus();
|
||||||
err_task:
|
err_task:
|
||||||
|
@ -8968,6 +8986,9 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When a child task exits, feed back event values to parent events.
|
* When a child task exits, feed back event values to parent events.
|
||||||
|
*
|
||||||
|
* Can be called with cred_guard_mutex held when called from
|
||||||
|
* install_exec_creds().
|
||||||
*/
|
*/
|
||||||
void perf_event_exit_task(struct task_struct *child)
|
void perf_event_exit_task(struct task_struct *child)
|
||||||
{
|
{
|
||||||
|
|
Загрузка…
Ссылка в новой задаче