trace,smp: Trace all smp_function_call*() invocations
(Ab)use the trace_ipi_send_cpu*() family to trace all smp_function_call*() invocations, not only those that result in an actual IPI. The queued entries log their callback function while the actual IPIs are traced on generic_smp_call_function_single_interrupt(). Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
This commit is contained in:
Родитель
68e2d17c9e
Коммит
5c3124975e
66
kernel/smp.c
66
kernel/smp.c
|
@ -104,18 +104,20 @@ void __init call_function_init(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
static __always_inline void
|
static __always_inline void
|
||||||
send_call_function_single_ipi(int cpu, smp_call_func_t func)
|
send_call_function_single_ipi(int cpu)
|
||||||
{
|
{
|
||||||
if (call_function_single_prep_ipi(cpu)) {
|
if (call_function_single_prep_ipi(cpu)) {
|
||||||
trace_ipi_send_cpu(cpu, _RET_IP_, func);
|
trace_ipi_send_cpu(cpu, _RET_IP_,
|
||||||
|
generic_smp_call_function_single_interrupt);
|
||||||
arch_send_call_function_single_ipi(cpu);
|
arch_send_call_function_single_ipi(cpu);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static __always_inline void
|
static __always_inline void
|
||||||
send_call_function_ipi_mask(struct cpumask *mask, smp_call_func_t func)
|
send_call_function_ipi_mask(struct cpumask *mask)
|
||||||
{
|
{
|
||||||
trace_ipi_send_cpumask(mask, _RET_IP_, func);
|
trace_ipi_send_cpumask(mask, _RET_IP_,
|
||||||
|
generic_smp_call_function_single_interrupt);
|
||||||
arch_send_call_function_ipi_mask(mask);
|
arch_send_call_function_ipi_mask(mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -316,25 +318,6 @@ static __always_inline void csd_unlock(struct __call_single_data *csd)
|
||||||
smp_store_release(&csd->node.u_flags, 0);
|
smp_store_release(&csd->node.u_flags, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __always_inline void
|
|
||||||
raw_smp_call_single_queue(int cpu, struct llist_node *node, smp_call_func_t func)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* The list addition should be visible to the target CPU when it pops
|
|
||||||
* the head of the list to pull the entry off it in the IPI handler
|
|
||||||
* because of normal cache coherency rules implied by the underlying
|
|
||||||
* llist ops.
|
|
||||||
*
|
|
||||||
* If IPIs can go out of order to the cache coherency protocol
|
|
||||||
* in an architecture, sufficient synchronisation should be added
|
|
||||||
* to arch code to make it appear to obey cache coherency WRT
|
|
||||||
* locking and barrier primitives. Generic code isn't really
|
|
||||||
* equipped to do the right thing...
|
|
||||||
*/
|
|
||||||
if (llist_add(node, &per_cpu(call_single_queue, cpu)))
|
|
||||||
send_call_function_single_ipi(cpu, func);
|
|
||||||
}
|
|
||||||
|
|
||||||
static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);
|
static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);
|
||||||
|
|
||||||
void __smp_call_single_queue(int cpu, struct llist_node *node)
|
void __smp_call_single_queue(int cpu, struct llist_node *node)
|
||||||
|
@ -354,10 +337,23 @@ void __smp_call_single_queue(int cpu, struct llist_node *node)
|
||||||
func = CSD_TYPE(csd) == CSD_TYPE_TTWU ?
|
func = CSD_TYPE(csd) == CSD_TYPE_TTWU ?
|
||||||
sched_ttwu_pending : csd->func;
|
sched_ttwu_pending : csd->func;
|
||||||
|
|
||||||
raw_smp_call_single_queue(cpu, node, func);
|
trace_ipi_send_cpu(cpu, _RET_IP_, func);
|
||||||
} else {
|
|
||||||
raw_smp_call_single_queue(cpu, node, NULL);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The list addition should be visible to the target CPU when it pops
|
||||||
|
* the head of the list to pull the entry off it in the IPI handler
|
||||||
|
* because of normal cache coherency rules implied by the underlying
|
||||||
|
* llist ops.
|
||||||
|
*
|
||||||
|
* If IPIs can go out of order to the cache coherency protocol
|
||||||
|
* in an architecture, sufficient synchronisation should be added
|
||||||
|
* to arch code to make it appear to obey cache coherency WRT
|
||||||
|
* locking and barrier primitives. Generic code isn't really
|
||||||
|
* equipped to do the right thing...
|
||||||
|
*/
|
||||||
|
if (llist_add(node, &per_cpu(call_single_queue, cpu)))
|
||||||
|
send_call_function_single_ipi(cpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -732,9 +728,9 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
|
||||||
int cpu, last_cpu, this_cpu = smp_processor_id();
|
int cpu, last_cpu, this_cpu = smp_processor_id();
|
||||||
struct call_function_data *cfd;
|
struct call_function_data *cfd;
|
||||||
bool wait = scf_flags & SCF_WAIT;
|
bool wait = scf_flags & SCF_WAIT;
|
||||||
|
int nr_cpus = 0, nr_queued = 0;
|
||||||
bool run_remote = false;
|
bool run_remote = false;
|
||||||
bool run_local = false;
|
bool run_local = false;
|
||||||
int nr_cpus = 0;
|
|
||||||
|
|
||||||
lockdep_assert_preemption_disabled();
|
lockdep_assert_preemption_disabled();
|
||||||
|
|
||||||
|
@ -776,8 +772,10 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
|
||||||
for_each_cpu(cpu, cfd->cpumask) {
|
for_each_cpu(cpu, cfd->cpumask) {
|
||||||
call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu);
|
call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu);
|
||||||
|
|
||||||
if (cond_func && !cond_func(cpu, info))
|
if (cond_func && !cond_func(cpu, info)) {
|
||||||
|
__cpumask_clear_cpu(cpu, cfd->cpumask);
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
csd_lock(csd);
|
csd_lock(csd);
|
||||||
if (wait)
|
if (wait)
|
||||||
|
@ -793,17 +791,25 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
|
||||||
nr_cpus++;
|
nr_cpus++;
|
||||||
last_cpu = cpu;
|
last_cpu = cpu;
|
||||||
}
|
}
|
||||||
|
nr_queued++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Trace each smp_function_call_*() as an IPI, actual IPIs
|
||||||
|
* will be traced with func==generic_smp_call_function_single_ipi().
|
||||||
|
*/
|
||||||
|
if (nr_queued)
|
||||||
|
trace_ipi_send_cpumask(cfd->cpumask, _RET_IP_, func);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Choose the most efficient way to send an IPI. Note that the
|
* Choose the most efficient way to send an IPI. Note that the
|
||||||
* number of CPUs might be zero due to concurrent changes to the
|
* number of CPUs might be zero due to concurrent changes to the
|
||||||
* provided mask.
|
* provided mask.
|
||||||
*/
|
*/
|
||||||
if (nr_cpus == 1)
|
if (nr_cpus == 1)
|
||||||
send_call_function_single_ipi(last_cpu, func);
|
send_call_function_single_ipi(last_cpu);
|
||||||
else if (likely(nr_cpus > 1))
|
else if (likely(nr_cpus > 1))
|
||||||
send_call_function_ipi_mask(cfd->cpumask_ipi, func);
|
send_call_function_ipi_mask(cfd->cpumask_ipi);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (run_local && (!cond_func || cond_func(this_cpu, info))) {
|
if (run_local && (!cond_func || cond_func(this_cpu, info))) {
|
||||||
|
|
Загрузка…
Ссылка в новой задаче