Merge branch 'timers/nohz-irq-work-v7' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks into timers/nohz
Pull nohz updates from Frederic Weisbecker: " This set moves the nohz kick, used to notify a full dynticks CPU when events require tick rescheduling, out of the scheduler tick to a dedicated IPI. This debloats a bit the scheduler IPI from off-topic work that was abusing that scheduler fast path for its convenient asynchronous properties. Now the nohz kick uses irq-work for its own needs. Of course this implied quite some background infrastructure rework, including: * Clean up some irq-work internals * Implement remote irq-work * Implement nohz kick on top of remote irq-work * Move full dynticks timer enqueue notification to new kick * Move multi-task notification to new kick * Remove unecessary barriers on multi-task notification " Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Коммит
d490b3e2c2
|
@ -33,6 +33,11 @@ void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *))
|
||||||
#define DEFINE_IRQ_WORK(name, _f) struct irq_work name = { .func = (_f), }
|
#define DEFINE_IRQ_WORK(name, _f) struct irq_work name = { .func = (_f), }
|
||||||
|
|
||||||
bool irq_work_queue(struct irq_work *work);
|
bool irq_work_queue(struct irq_work *work);
|
||||||
|
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
bool irq_work_queue_on(struct irq_work *work, int cpu);
|
||||||
|
#endif
|
||||||
|
|
||||||
void irq_work_run(void);
|
void irq_work_run(void);
|
||||||
void irq_work_sync(struct irq_work *work);
|
void irq_work_sync(struct irq_work *work);
|
||||||
|
|
||||||
|
|
|
@ -181,7 +181,13 @@ static inline bool tick_nohz_full_cpu(int cpu)
|
||||||
|
|
||||||
extern void tick_nohz_init(void);
|
extern void tick_nohz_init(void);
|
||||||
extern void __tick_nohz_full_check(void);
|
extern void __tick_nohz_full_check(void);
|
||||||
extern void tick_nohz_full_kick(void);
|
extern void tick_nohz_full_kick_cpu(int cpu);
|
||||||
|
|
||||||
|
static inline void tick_nohz_full_kick(void)
|
||||||
|
{
|
||||||
|
tick_nohz_full_kick_cpu(smp_processor_id());
|
||||||
|
}
|
||||||
|
|
||||||
extern void tick_nohz_full_kick_all(void);
|
extern void tick_nohz_full_kick_all(void);
|
||||||
extern void __tick_nohz_task_switch(struct task_struct *tsk);
|
extern void __tick_nohz_task_switch(struct task_struct *tsk);
|
||||||
#else
|
#else
|
||||||
|
@ -189,6 +195,7 @@ static inline void tick_nohz_init(void) { }
|
||||||
static inline bool tick_nohz_full_enabled(void) { return false; }
|
static inline bool tick_nohz_full_enabled(void) { return false; }
|
||||||
static inline bool tick_nohz_full_cpu(int cpu) { return false; }
|
static inline bool tick_nohz_full_cpu(int cpu) { return false; }
|
||||||
static inline void __tick_nohz_full_check(void) { }
|
static inline void __tick_nohz_full_check(void) { }
|
||||||
|
static inline void tick_nohz_full_kick_cpu(int cpu) { }
|
||||||
static inline void tick_nohz_full_kick(void) { }
|
static inline void tick_nohz_full_kick(void) { }
|
||||||
static inline void tick_nohz_full_kick_all(void) { }
|
static inline void tick_nohz_full_kick_all(void) { }
|
||||||
static inline void __tick_nohz_task_switch(struct task_struct *tsk) { }
|
static inline void __tick_nohz_task_switch(struct task_struct *tsk) { }
|
||||||
|
|
|
@ -16,11 +16,12 @@
|
||||||
#include <linux/tick.h>
|
#include <linux/tick.h>
|
||||||
#include <linux/cpu.h>
|
#include <linux/cpu.h>
|
||||||
#include <linux/notifier.h>
|
#include <linux/notifier.h>
|
||||||
|
#include <linux/smp.h>
|
||||||
#include <asm/processor.h>
|
#include <asm/processor.h>
|
||||||
|
|
||||||
|
|
||||||
static DEFINE_PER_CPU(struct llist_head, irq_work_list);
|
static DEFINE_PER_CPU(struct llist_head, raised_list);
|
||||||
static DEFINE_PER_CPU(int, irq_work_raised);
|
static DEFINE_PER_CPU(struct llist_head, lazy_list);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Claim the entry so that no one else will poke at it.
|
* Claim the entry so that no one else will poke at it.
|
||||||
|
@ -55,12 +56,34 @@ void __weak arch_irq_work_raise(void)
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
/*
|
/*
|
||||||
* Enqueue the irq_work @entry unless it's already pending
|
* Enqueue the irq_work @work on @cpu unless it's already pending
|
||||||
* somewhere.
|
* somewhere.
|
||||||
*
|
*
|
||||||
* Can be re-enqueued while the callback is still in progress.
|
* Can be re-enqueued while the callback is still in progress.
|
||||||
*/
|
*/
|
||||||
|
bool irq_work_queue_on(struct irq_work *work, int cpu)
|
||||||
|
{
|
||||||
|
/* All work should have been flushed before going offline */
|
||||||
|
WARN_ON_ONCE(cpu_is_offline(cpu));
|
||||||
|
|
||||||
|
/* Arch remote IPI send/receive backend aren't NMI safe */
|
||||||
|
WARN_ON_ONCE(in_nmi());
|
||||||
|
|
||||||
|
/* Only queue if not already pending */
|
||||||
|
if (!irq_work_claim(work))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (llist_add(&work->llnode, &per_cpu(raised_list, cpu)))
|
||||||
|
arch_send_call_function_single_ipi(cpu);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(irq_work_queue_on);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Enqueue the irq work @work on the current CPU */
|
||||||
bool irq_work_queue(struct irq_work *work)
|
bool irq_work_queue(struct irq_work *work)
|
||||||
{
|
{
|
||||||
/* Only queue if not already pending */
|
/* Only queue if not already pending */
|
||||||
|
@ -70,15 +93,13 @@ bool irq_work_queue(struct irq_work *work)
|
||||||
/* Queue the entry and raise the IPI if needed. */
|
/* Queue the entry and raise the IPI if needed. */
|
||||||
preempt_disable();
|
preempt_disable();
|
||||||
|
|
||||||
llist_add(&work->llnode, &__get_cpu_var(irq_work_list));
|
/* If the work is "lazy", handle it from next tick if any */
|
||||||
|
if (work->flags & IRQ_WORK_LAZY) {
|
||||||
/*
|
if (llist_add(&work->llnode, &__get_cpu_var(lazy_list)) &&
|
||||||
* If the work is not "lazy" or the tick is stopped, raise the irq
|
tick_nohz_tick_stopped())
|
||||||
* work interrupt (if supported by the arch), otherwise, just wait
|
arch_irq_work_raise();
|
||||||
* for the next tick.
|
} else {
|
||||||
*/
|
if (llist_add(&work->llnode, &__get_cpu_var(raised_list)))
|
||||||
if (!(work->flags & IRQ_WORK_LAZY) || tick_nohz_tick_stopped()) {
|
|
||||||
if (!this_cpu_cmpxchg(irq_work_raised, 0, 1))
|
|
||||||
arch_irq_work_raise();
|
arch_irq_work_raise();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -90,10 +111,11 @@ EXPORT_SYMBOL_GPL(irq_work_queue);
|
||||||
|
|
||||||
bool irq_work_needs_cpu(void)
|
bool irq_work_needs_cpu(void)
|
||||||
{
|
{
|
||||||
struct llist_head *this_list;
|
struct llist_head *raised, *lazy;
|
||||||
|
|
||||||
this_list = &__get_cpu_var(irq_work_list);
|
raised = &__get_cpu_var(raised_list);
|
||||||
if (llist_empty(this_list))
|
lazy = &__get_cpu_var(lazy_list);
|
||||||
|
if (llist_empty(raised) && llist_empty(lazy))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* All work should have been flushed before going offline */
|
/* All work should have been flushed before going offline */
|
||||||
|
@ -102,28 +124,18 @@ bool irq_work_needs_cpu(void)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __irq_work_run(void)
|
static void irq_work_run_list(struct llist_head *list)
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
struct irq_work *work;
|
struct irq_work *work;
|
||||||
struct llist_head *this_list;
|
|
||||||
struct llist_node *llnode;
|
struct llist_node *llnode;
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Reset the "raised" state right before we check the list because
|
|
||||||
* an NMI may enqueue after we find the list empty from the runner.
|
|
||||||
*/
|
|
||||||
__this_cpu_write(irq_work_raised, 0);
|
|
||||||
barrier();
|
|
||||||
|
|
||||||
this_list = &__get_cpu_var(irq_work_list);
|
|
||||||
if (llist_empty(this_list))
|
|
||||||
return;
|
|
||||||
|
|
||||||
BUG_ON(!irqs_disabled());
|
BUG_ON(!irqs_disabled());
|
||||||
|
|
||||||
llnode = llist_del_all(this_list);
|
if (llist_empty(list))
|
||||||
|
return;
|
||||||
|
|
||||||
|
llnode = llist_del_all(list);
|
||||||
while (llnode != NULL) {
|
while (llnode != NULL) {
|
||||||
work = llist_entry(llnode, struct irq_work, llnode);
|
work = llist_entry(llnode, struct irq_work, llnode);
|
||||||
|
|
||||||
|
@ -148,6 +160,12 @@ static void __irq_work_run(void)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void __irq_work_run(void)
|
||||||
|
{
|
||||||
|
irq_work_run_list(&__get_cpu_var(raised_list));
|
||||||
|
irq_work_run_list(&__get_cpu_var(lazy_list));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Run the irq_work entries on this cpu. Requires to be ran from hardirq
|
* Run the irq_work entries on this cpu. Requires to be ran from hardirq
|
||||||
* context with local IRQs disabled.
|
* context with local IRQs disabled.
|
||||||
|
|
|
@ -684,10 +684,16 @@ static void wake_up_idle_cpu(int cpu)
|
||||||
|
|
||||||
static bool wake_up_full_nohz_cpu(int cpu)
|
static bool wake_up_full_nohz_cpu(int cpu)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* We just need the target to call irq_exit() and re-evaluate
|
||||||
|
* the next tick. The nohz full kick at least implies that.
|
||||||
|
* If needed we can still optimize that later with an
|
||||||
|
* empty IRQ.
|
||||||
|
*/
|
||||||
if (tick_nohz_full_cpu(cpu)) {
|
if (tick_nohz_full_cpu(cpu)) {
|
||||||
if (cpu != smp_processor_id() ||
|
if (cpu != smp_processor_id() ||
|
||||||
tick_nohz_tick_stopped())
|
tick_nohz_tick_stopped())
|
||||||
smp_send_reschedule(cpu);
|
tick_nohz_full_kick_cpu(cpu);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -734,10 +740,11 @@ bool sched_can_stop_tick(void)
|
||||||
|
|
||||||
rq = this_rq();
|
rq = this_rq();
|
||||||
|
|
||||||
/* Make sure rq->nr_running update is visible after the IPI */
|
/*
|
||||||
smp_rmb();
|
* More than one running task need preemption.
|
||||||
|
* nr_running update is assumed to be visible
|
||||||
/* More than one running task need preemption */
|
* after IPI is sent from wakers.
|
||||||
|
*/
|
||||||
if (rq->nr_running > 1)
|
if (rq->nr_running > 1)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -1568,9 +1575,7 @@ void scheduler_ipi(void)
|
||||||
*/
|
*/
|
||||||
preempt_fold_need_resched();
|
preempt_fold_need_resched();
|
||||||
|
|
||||||
if (llist_empty(&this_rq()->wake_list)
|
if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick())
|
||||||
&& !tick_nohz_full_cpu(smp_processor_id())
|
|
||||||
&& !got_nohz_idle_kick())
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1587,7 +1592,6 @@ void scheduler_ipi(void)
|
||||||
* somewhat pessimize the simple resched case.
|
* somewhat pessimize the simple resched case.
|
||||||
*/
|
*/
|
||||||
irq_enter();
|
irq_enter();
|
||||||
tick_nohz_full_check();
|
|
||||||
sched_ttwu_pending();
|
sched_ttwu_pending();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -1221,9 +1221,15 @@ static inline void add_nr_running(struct rq *rq, unsigned count)
|
||||||
#ifdef CONFIG_NO_HZ_FULL
|
#ifdef CONFIG_NO_HZ_FULL
|
||||||
if (prev_nr < 2 && rq->nr_running >= 2) {
|
if (prev_nr < 2 && rq->nr_running >= 2) {
|
||||||
if (tick_nohz_full_cpu(rq->cpu)) {
|
if (tick_nohz_full_cpu(rq->cpu)) {
|
||||||
/* Order rq->nr_running write against the IPI */
|
/*
|
||||||
smp_wmb();
|
* Tick is needed if more than one task runs on a CPU.
|
||||||
smp_send_reschedule(rq->cpu);
|
* Send the target an IPI to kick it out of nohz mode.
|
||||||
|
*
|
||||||
|
* We assume that IPI implies full memory barrier and the
|
||||||
|
* new value of rq->nr_running is visible on reception
|
||||||
|
* from the target.
|
||||||
|
*/
|
||||||
|
tick_nohz_full_kick_cpu(rq->cpu);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
*
|
*
|
||||||
* (C) Jens Axboe <jens.axboe@oracle.com> 2008
|
* (C) Jens Axboe <jens.axboe@oracle.com> 2008
|
||||||
*/
|
*/
|
||||||
|
#include <linux/irq_work.h>
|
||||||
#include <linux/rcupdate.h>
|
#include <linux/rcupdate.h>
|
||||||
#include <linux/rculist.h>
|
#include <linux/rculist.h>
|
||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
|
@ -210,6 +211,14 @@ void generic_smp_call_function_single_interrupt(void)
|
||||||
csd->func(csd->info);
|
csd->func(csd->info);
|
||||||
csd_unlock(csd);
|
csd_unlock(csd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Handle irq works queued remotely by irq_work_queue_on().
|
||||||
|
* Smp functions above are typically synchronous so they
|
||||||
|
* better run first since some other CPUs may be busy waiting
|
||||||
|
* for them.
|
||||||
|
*/
|
||||||
|
irq_work_run();
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -224,13 +224,15 @@ static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Kick the current CPU if it's full dynticks in order to force it to
|
* Kick the CPU if it's full dynticks in order to force it to
|
||||||
* re-evaluate its dependency on the tick and restart it if necessary.
|
* re-evaluate its dependency on the tick and restart it if necessary.
|
||||||
*/
|
*/
|
||||||
void tick_nohz_full_kick(void)
|
void tick_nohz_full_kick_cpu(int cpu)
|
||||||
{
|
{
|
||||||
if (tick_nohz_full_cpu(smp_processor_id()))
|
if (!tick_nohz_full_cpu(cpu))
|
||||||
irq_work_queue(&__get_cpu_var(nohz_full_kick_work));
|
return;
|
||||||
|
|
||||||
|
irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void nohz_full_kick_ipi(void *info)
|
static void nohz_full_kick_ipi(void *info)
|
||||||
|
|
Загрузка…
Ссылка в новой задаче