rcu: Switch synchronize_sched_expedited() to stop_one_cpu()

The synchronize_sched_expedited() currently invokes try_stop_cpus(),
which schedules the stopper kthreads on each online non-idle CPU,
and waits until all those kthreads are running before letting any
of them stop.  This is disastrous for real-time workloads, which
get hit with a preemption that is as long as the longest scheduling
latency on any CPU, including any non-realtime housekeeping CPUs.
This commit therefore switches to using stop_one_cpu() on each CPU
in turn.  This avoids inflicting the worst-case scheduling latency
on the worst-case CPU onto all other CPUs, and also simplifies the
code a little bit.

Follow-up commits will simplify the counter-snapshotting algorithm
and convert a number of the counters that are now protected by the
new ->expedited_mutex to non-atomic.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
[ paulmck: Kept stop_one_cpu(), dropped disabling of "guardrails". ]
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
This commit is contained in:
Peter Zijlstra 2015-06-23 19:03:45 -07:00 коммит произвёл Paul E. McKenney
Родитель 75c27f119b
Коммит c190c3b16c
2 изменённых файлов: 15 добавлений и 27 удалений

Просмотреть файл

@ -103,6 +103,7 @@ struct rcu_state sname##_state = { \
.orphan_nxttail = &sname##_state.orphan_nxtlist, \ .orphan_nxttail = &sname##_state.orphan_nxtlist, \
.orphan_donetail = &sname##_state.orphan_donelist, \ .orphan_donetail = &sname##_state.orphan_donelist, \
.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
.expedited_mutex = __MUTEX_INITIALIZER(sname##_state.expedited_mutex), \
.name = RCU_STATE_NAME(sname), \ .name = RCU_STATE_NAME(sname), \
.abbr = sabbr, \ .abbr = sabbr, \
} }
@ -3305,8 +3306,6 @@ static int synchronize_sched_expedited_cpu_stop(void *data)
*/ */
void synchronize_sched_expedited(void) void synchronize_sched_expedited(void)
{ {
cpumask_var_t cm;
bool cma = false;
int cpu; int cpu;
long firstsnap, s, snap; long firstsnap, s, snap;
int trycount = 0; int trycount = 0;
@ -3342,28 +3341,11 @@ void synchronize_sched_expedited(void)
} }
WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id())); WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id()));
/* Offline CPUs, idle CPUs, and any CPU we run on are quiescent. */
cma = zalloc_cpumask_var(&cm, GFP_KERNEL);
if (cma) {
cpumask_copy(cm, cpu_online_mask);
cpumask_clear_cpu(raw_smp_processor_id(), cm);
for_each_cpu(cpu, cm) {
struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
if (!(atomic_add_return(0, &rdtp->dynticks) & 0x1))
cpumask_clear_cpu(cpu, cm);
}
if (cpumask_weight(cm) == 0)
goto all_cpus_idle;
}
/* /*
* Each pass through the following loop attempts to force a * Each pass through the following loop attempts to force a
* context switch on each CPU. * context switch on each CPU.
*/ */
while (try_stop_cpus(cma ? cm : cpu_online_mask, while (!mutex_trylock(&rsp->expedited_mutex)) {
synchronize_sched_expedited_cpu_stop,
NULL) == -EAGAIN) {
put_online_cpus(); put_online_cpus();
atomic_long_inc(&rsp->expedited_tryfail); atomic_long_inc(&rsp->expedited_tryfail);
@ -3373,7 +3355,6 @@ void synchronize_sched_expedited(void)
/* ensure test happens before caller kfree */ /* ensure test happens before caller kfree */
smp_mb__before_atomic(); /* ^^^ */ smp_mb__before_atomic(); /* ^^^ */
atomic_long_inc(&rsp->expedited_workdone1); atomic_long_inc(&rsp->expedited_workdone1);
free_cpumask_var(cm);
return; return;
} }
@ -3383,7 +3364,6 @@ void synchronize_sched_expedited(void)
} else { } else {
wait_rcu_gp(call_rcu_sched); wait_rcu_gp(call_rcu_sched);
atomic_long_inc(&rsp->expedited_normal); atomic_long_inc(&rsp->expedited_normal);
free_cpumask_var(cm);
return; return;
} }
@ -3393,7 +3373,6 @@ void synchronize_sched_expedited(void)
/* ensure test happens before caller kfree */ /* ensure test happens before caller kfree */
smp_mb__before_atomic(); /* ^^^ */ smp_mb__before_atomic(); /* ^^^ */
atomic_long_inc(&rsp->expedited_workdone2); atomic_long_inc(&rsp->expedited_workdone2);
free_cpumask_var(cm);
return; return;
} }
@ -3408,16 +3387,23 @@ void synchronize_sched_expedited(void)
/* CPU hotplug operation in flight, use normal GP. */ /* CPU hotplug operation in flight, use normal GP. */
wait_rcu_gp(call_rcu_sched); wait_rcu_gp(call_rcu_sched);
atomic_long_inc(&rsp->expedited_normal); atomic_long_inc(&rsp->expedited_normal);
free_cpumask_var(cm);
return; return;
} }
snap = atomic_long_read(&rsp->expedited_start); snap = atomic_long_read(&rsp->expedited_start);
smp_mb(); /* ensure read is before try_stop_cpus(). */ smp_mb(); /* ensure read is before try_stop_cpus(). */
} }
atomic_long_inc(&rsp->expedited_stoppedcpus);
all_cpus_idle: /* Stop each CPU that is online, non-idle, and not us. */
free_cpumask_var(cm); for_each_online_cpu(cpu) {
struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
/* Skip our CPU and any idle CPUs. */
if (raw_smp_processor_id() == cpu ||
!(atomic_add_return(0, &rdtp->dynticks) & 0x1))
continue;
stop_one_cpu(cpu, synchronize_sched_expedited_cpu_stop, NULL);
}
atomic_long_inc(&rsp->expedited_stoppedcpus);
/* /*
* Everyone up to our most recent fetch is covered by our grace * Everyone up to our most recent fetch is covered by our grace
@ -3436,6 +3422,7 @@ all_cpus_idle:
} }
} while (atomic_long_cmpxchg(&rsp->expedited_done, s, snap) != s); } while (atomic_long_cmpxchg(&rsp->expedited_done, s, snap) != s);
atomic_long_inc(&rsp->expedited_done_exit); atomic_long_inc(&rsp->expedited_done_exit);
mutex_unlock(&rsp->expedited_mutex);
put_online_cpus(); put_online_cpus();
} }

Просмотреть файл

@ -480,6 +480,7 @@ struct rcu_state {
/* _rcu_barrier(). */ /* _rcu_barrier(). */
/* End of fields guarded by barrier_mutex. */ /* End of fields guarded by barrier_mutex. */
struct mutex expedited_mutex; /* Serializes expediting. */
atomic_long_t expedited_start; /* Starting ticket. */ atomic_long_t expedited_start; /* Starting ticket. */
atomic_long_t expedited_done; /* Done ticket. */ atomic_long_t expedited_done; /* Done ticket. */
atomic_long_t expedited_wrap; /* # near-wrap incidents. */ atomic_long_t expedited_wrap; /* # near-wrap incidents. */