perf_counters: make software counters work as per-cpu counters
Impact: kernel crash fix Yanmin Zhang reported that using a PERF_COUNT_TASK_CLOCK software counter as a per-cpu counter would reliably crash the system, because it calls __task_delta_exec with a null pointer. The page fault, context switch and cpu migration counters also won't function correctly as per-cpu counters since they reference the current task. This fixes the problem by redirecting the task_clock counter to the cpu_clock counter when used as a per-cpu counter, and by implementing per-cpu page fault, context switch and cpu migration counters. Along the way, this: - Initializes counter->ctx earlier, in perf_counter_alloc, so that sw_perf_counter_init can use it - Adds code to kernel/sched.c to count task migrations into each cpu, in rq->nr_migrations_in - Exports the per-cpu context switch and task migration counts via new functions added to kernel/sched.c - Makes sure that if sw_perf_counter_init fails, we don't try to initialize the counter as a hardware counter. Since the user has passed a negative, non-raw event type, they clearly don't intend for it to be interpreted as a hardware event. Reported-by: "Zhang Yanmin" <yanmin_zhang@linux.intel.com> Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Родитель
82aa9a1829
Коммит
23a185ca8a
|
@ -137,6 +137,8 @@ extern unsigned long nr_running(void);
|
||||||
extern unsigned long nr_uninterruptible(void);
|
extern unsigned long nr_uninterruptible(void);
|
||||||
extern unsigned long nr_active(void);
|
extern unsigned long nr_active(void);
|
||||||
extern unsigned long nr_iowait(void);
|
extern unsigned long nr_iowait(void);
|
||||||
|
extern u64 cpu_nr_switches(int cpu);
|
||||||
|
extern u64 cpu_nr_migrations(int cpu);
|
||||||
|
|
||||||
struct seq_file;
|
struct seq_file;
|
||||||
struct cfs_rq;
|
struct cfs_rq;
|
||||||
|
|
|
@ -20,6 +20,8 @@
|
||||||
#include <linux/anon_inodes.h>
|
#include <linux/anon_inodes.h>
|
||||||
#include <linux/kernel_stat.h>
|
#include <linux/kernel_stat.h>
|
||||||
#include <linux/perf_counter.h>
|
#include <linux/perf_counter.h>
|
||||||
|
#include <linux/mm.h>
|
||||||
|
#include <linux/vmstat.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Each CPU has a list of per CPU counters:
|
* Each CPU has a list of per CPU counters:
|
||||||
|
@ -502,7 +504,6 @@ perf_install_in_context(struct perf_counter_context *ctx,
|
||||||
{
|
{
|
||||||
struct task_struct *task = ctx->task;
|
struct task_struct *task = ctx->task;
|
||||||
|
|
||||||
counter->ctx = ctx;
|
|
||||||
if (!task) {
|
if (!task) {
|
||||||
/*
|
/*
|
||||||
* Per cpu counters are installed via an smp call and
|
* Per cpu counters are installed via an smp call and
|
||||||
|
@ -1417,11 +1418,19 @@ static const struct hw_perf_counter_ops perf_ops_task_clock = {
|
||||||
.read = task_clock_perf_counter_read,
|
.read = task_clock_perf_counter_read,
|
||||||
};
|
};
|
||||||
|
|
||||||
static u64 get_page_faults(void)
|
#ifdef CONFIG_VM_EVENT_COUNTERS
|
||||||
{
|
#define cpu_page_faults() __get_cpu_var(vm_event_states).event[PGFAULT]
|
||||||
struct task_struct *curr = current;
|
#else
|
||||||
|
#define cpu_page_faults() 0
|
||||||
|
#endif
|
||||||
|
|
||||||
return curr->maj_flt + curr->min_flt;
|
static u64 get_page_faults(struct perf_counter *counter)
|
||||||
|
{
|
||||||
|
struct task_struct *curr = counter->ctx->task;
|
||||||
|
|
||||||
|
if (curr)
|
||||||
|
return curr->maj_flt + curr->min_flt;
|
||||||
|
return cpu_page_faults();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void page_faults_perf_counter_update(struct perf_counter *counter)
|
static void page_faults_perf_counter_update(struct perf_counter *counter)
|
||||||
|
@ -1430,7 +1439,7 @@ static void page_faults_perf_counter_update(struct perf_counter *counter)
|
||||||
s64 delta;
|
s64 delta;
|
||||||
|
|
||||||
prev = atomic64_read(&counter->hw.prev_count);
|
prev = atomic64_read(&counter->hw.prev_count);
|
||||||
now = get_page_faults();
|
now = get_page_faults(counter);
|
||||||
|
|
||||||
atomic64_set(&counter->hw.prev_count, now);
|
atomic64_set(&counter->hw.prev_count, now);
|
||||||
|
|
||||||
|
@ -1446,11 +1455,7 @@ static void page_faults_perf_counter_read(struct perf_counter *counter)
|
||||||
|
|
||||||
static int page_faults_perf_counter_enable(struct perf_counter *counter)
|
static int page_faults_perf_counter_enable(struct perf_counter *counter)
|
||||||
{
|
{
|
||||||
/*
|
atomic64_set(&counter->hw.prev_count, get_page_faults(counter));
|
||||||
* page-faults is a per-task value already,
|
|
||||||
* so we dont have to clear it on switch-in.
|
|
||||||
*/
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1465,11 +1470,13 @@ static const struct hw_perf_counter_ops perf_ops_page_faults = {
|
||||||
.read = page_faults_perf_counter_read,
|
.read = page_faults_perf_counter_read,
|
||||||
};
|
};
|
||||||
|
|
||||||
static u64 get_context_switches(void)
|
static u64 get_context_switches(struct perf_counter *counter)
|
||||||
{
|
{
|
||||||
struct task_struct *curr = current;
|
struct task_struct *curr = counter->ctx->task;
|
||||||
|
|
||||||
return curr->nvcsw + curr->nivcsw;
|
if (curr)
|
||||||
|
return curr->nvcsw + curr->nivcsw;
|
||||||
|
return cpu_nr_switches(smp_processor_id());
|
||||||
}
|
}
|
||||||
|
|
||||||
static void context_switches_perf_counter_update(struct perf_counter *counter)
|
static void context_switches_perf_counter_update(struct perf_counter *counter)
|
||||||
|
@ -1478,7 +1485,7 @@ static void context_switches_perf_counter_update(struct perf_counter *counter)
|
||||||
s64 delta;
|
s64 delta;
|
||||||
|
|
||||||
prev = atomic64_read(&counter->hw.prev_count);
|
prev = atomic64_read(&counter->hw.prev_count);
|
||||||
now = get_context_switches();
|
now = get_context_switches(counter);
|
||||||
|
|
||||||
atomic64_set(&counter->hw.prev_count, now);
|
atomic64_set(&counter->hw.prev_count, now);
|
||||||
|
|
||||||
|
@ -1494,11 +1501,7 @@ static void context_switches_perf_counter_read(struct perf_counter *counter)
|
||||||
|
|
||||||
static int context_switches_perf_counter_enable(struct perf_counter *counter)
|
static int context_switches_perf_counter_enable(struct perf_counter *counter)
|
||||||
{
|
{
|
||||||
/*
|
atomic64_set(&counter->hw.prev_count, get_context_switches(counter));
|
||||||
* ->nvcsw + curr->nivcsw is a per-task value already,
|
|
||||||
* so we dont have to clear it on switch-in.
|
|
||||||
*/
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1513,9 +1516,13 @@ static const struct hw_perf_counter_ops perf_ops_context_switches = {
|
||||||
.read = context_switches_perf_counter_read,
|
.read = context_switches_perf_counter_read,
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline u64 get_cpu_migrations(void)
|
static inline u64 get_cpu_migrations(struct perf_counter *counter)
|
||||||
{
|
{
|
||||||
return current->se.nr_migrations;
|
struct task_struct *curr = counter->ctx->task;
|
||||||
|
|
||||||
|
if (curr)
|
||||||
|
return curr->se.nr_migrations;
|
||||||
|
return cpu_nr_migrations(smp_processor_id());
|
||||||
}
|
}
|
||||||
|
|
||||||
static void cpu_migrations_perf_counter_update(struct perf_counter *counter)
|
static void cpu_migrations_perf_counter_update(struct perf_counter *counter)
|
||||||
|
@ -1524,7 +1531,7 @@ static void cpu_migrations_perf_counter_update(struct perf_counter *counter)
|
||||||
s64 delta;
|
s64 delta;
|
||||||
|
|
||||||
prev = atomic64_read(&counter->hw.prev_count);
|
prev = atomic64_read(&counter->hw.prev_count);
|
||||||
now = get_cpu_migrations();
|
now = get_cpu_migrations(counter);
|
||||||
|
|
||||||
atomic64_set(&counter->hw.prev_count, now);
|
atomic64_set(&counter->hw.prev_count, now);
|
||||||
|
|
||||||
|
@ -1540,11 +1547,7 @@ static void cpu_migrations_perf_counter_read(struct perf_counter *counter)
|
||||||
|
|
||||||
static int cpu_migrations_perf_counter_enable(struct perf_counter *counter)
|
static int cpu_migrations_perf_counter_enable(struct perf_counter *counter)
|
||||||
{
|
{
|
||||||
/*
|
atomic64_set(&counter->hw.prev_count, get_cpu_migrations(counter));
|
||||||
* se.nr_migrations is a per-task value already,
|
|
||||||
* so we dont have to clear it on switch-in.
|
|
||||||
*/
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1569,7 +1572,14 @@ sw_perf_counter_init(struct perf_counter *counter)
|
||||||
hw_ops = &perf_ops_cpu_clock;
|
hw_ops = &perf_ops_cpu_clock;
|
||||||
break;
|
break;
|
||||||
case PERF_COUNT_TASK_CLOCK:
|
case PERF_COUNT_TASK_CLOCK:
|
||||||
hw_ops = &perf_ops_task_clock;
|
/*
|
||||||
|
* If the user instantiates this as a per-cpu counter,
|
||||||
|
* use the cpu_clock counter instead.
|
||||||
|
*/
|
||||||
|
if (counter->ctx->task)
|
||||||
|
hw_ops = &perf_ops_task_clock;
|
||||||
|
else
|
||||||
|
hw_ops = &perf_ops_cpu_clock;
|
||||||
break;
|
break;
|
||||||
case PERF_COUNT_PAGE_FAULTS:
|
case PERF_COUNT_PAGE_FAULTS:
|
||||||
hw_ops = &perf_ops_page_faults;
|
hw_ops = &perf_ops_page_faults;
|
||||||
|
@ -1592,6 +1602,7 @@ sw_perf_counter_init(struct perf_counter *counter)
|
||||||
static struct perf_counter *
|
static struct perf_counter *
|
||||||
perf_counter_alloc(struct perf_counter_hw_event *hw_event,
|
perf_counter_alloc(struct perf_counter_hw_event *hw_event,
|
||||||
int cpu,
|
int cpu,
|
||||||
|
struct perf_counter_context *ctx,
|
||||||
struct perf_counter *group_leader,
|
struct perf_counter *group_leader,
|
||||||
gfp_t gfpflags)
|
gfp_t gfpflags)
|
||||||
{
|
{
|
||||||
|
@ -1623,6 +1634,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
|
||||||
counter->wakeup_pending = 0;
|
counter->wakeup_pending = 0;
|
||||||
counter->group_leader = group_leader;
|
counter->group_leader = group_leader;
|
||||||
counter->hw_ops = NULL;
|
counter->hw_ops = NULL;
|
||||||
|
counter->ctx = ctx;
|
||||||
|
|
||||||
counter->state = PERF_COUNTER_STATE_INACTIVE;
|
counter->state = PERF_COUNTER_STATE_INACTIVE;
|
||||||
if (hw_event->disabled)
|
if (hw_event->disabled)
|
||||||
|
@ -1631,7 +1643,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
|
||||||
hw_ops = NULL;
|
hw_ops = NULL;
|
||||||
if (!hw_event->raw && hw_event->type < 0)
|
if (!hw_event->raw && hw_event->type < 0)
|
||||||
hw_ops = sw_perf_counter_init(counter);
|
hw_ops = sw_perf_counter_init(counter);
|
||||||
if (!hw_ops)
|
else
|
||||||
hw_ops = hw_perf_counter_init(counter);
|
hw_ops = hw_perf_counter_init(counter);
|
||||||
|
|
||||||
if (!hw_ops) {
|
if (!hw_ops) {
|
||||||
|
@ -1707,7 +1719,8 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user,
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
counter = perf_counter_alloc(&hw_event, cpu, group_leader, GFP_KERNEL);
|
counter = perf_counter_alloc(&hw_event, cpu, ctx, group_leader,
|
||||||
|
GFP_KERNEL);
|
||||||
if (!counter)
|
if (!counter)
|
||||||
goto err_put_context;
|
goto err_put_context;
|
||||||
|
|
||||||
|
@ -1777,15 +1790,14 @@ inherit_counter(struct perf_counter *parent_counter,
|
||||||
parent_counter = parent_counter->parent;
|
parent_counter = parent_counter->parent;
|
||||||
|
|
||||||
child_counter = perf_counter_alloc(&parent_counter->hw_event,
|
child_counter = perf_counter_alloc(&parent_counter->hw_event,
|
||||||
parent_counter->cpu, group_leader,
|
parent_counter->cpu, child_ctx,
|
||||||
GFP_KERNEL);
|
group_leader, GFP_KERNEL);
|
||||||
if (!child_counter)
|
if (!child_counter)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Link it up in the child's context:
|
* Link it up in the child's context:
|
||||||
*/
|
*/
|
||||||
child_counter->ctx = child_ctx;
|
|
||||||
child_counter->task = child;
|
child_counter->task = child;
|
||||||
list_add_counter(child_counter, child_ctx);
|
list_add_counter(child_counter, child_ctx);
|
||||||
child_ctx->nr_counters++;
|
child_ctx->nr_counters++;
|
||||||
|
|
|
@ -558,6 +558,7 @@ struct rq {
|
||||||
struct load_weight load;
|
struct load_weight load;
|
||||||
unsigned long nr_load_updates;
|
unsigned long nr_load_updates;
|
||||||
u64 nr_switches;
|
u64 nr_switches;
|
||||||
|
u64 nr_migrations_in;
|
||||||
|
|
||||||
struct cfs_rq cfs;
|
struct cfs_rq cfs;
|
||||||
struct rt_rq rt;
|
struct rt_rq rt;
|
||||||
|
@ -1908,6 +1909,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
|
||||||
#endif
|
#endif
|
||||||
if (old_cpu != new_cpu) {
|
if (old_cpu != new_cpu) {
|
||||||
p->se.nr_migrations++;
|
p->se.nr_migrations++;
|
||||||
|
new_rq->nr_migrations_in++;
|
||||||
#ifdef CONFIG_SCHEDSTATS
|
#ifdef CONFIG_SCHEDSTATS
|
||||||
if (task_hot(p, old_rq->clock, NULL))
|
if (task_hot(p, old_rq->clock, NULL))
|
||||||
schedstat_inc(p, se.nr_forced2_migrations);
|
schedstat_inc(p, se.nr_forced2_migrations);
|
||||||
|
@ -2810,6 +2812,21 @@ unsigned long nr_active(void)
|
||||||
return running + uninterruptible;
|
return running + uninterruptible;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Externally visible per-cpu scheduler statistics:
|
||||||
|
* cpu_nr_switches(cpu) - number of context switches on that cpu
|
||||||
|
* cpu_nr_migrations(cpu) - number of migrations into that cpu
|
||||||
|
*/
|
||||||
|
u64 cpu_nr_switches(int cpu)
|
||||||
|
{
|
||||||
|
return cpu_rq(cpu)->nr_switches;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 cpu_nr_migrations(int cpu)
|
||||||
|
{
|
||||||
|
return cpu_rq(cpu)->nr_migrations_in;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Update rq->cpu_load[] statistics. This function is usually called every
|
* Update rq->cpu_load[] statistics. This function is usually called every
|
||||||
* scheduler tick (TICK_NSEC).
|
* scheduler tick (TICK_NSEC).
|
||||||
|
|
Загрузка…
Ссылка в новой задаче