nohz/full, sched/rt: Fix missed tick-reenabling bug in dequeue_task_rt()

[ Upstream commit 5c66d1b9b3 ] dequeue_task_rt() only decrements 'rt_rq->rt_nr_running' after having called sched_update_tick_dependency() preventing it from re-enabling the tick on systems that no longer have pending SCHED_RT tasks but have multiple runnable SCHED_OTHER tasks: dequeue_task_rt() dequeue_rt_entity() dequeue_rt_stack() dequeue_top_rt_rq() sub_nr_running() // decrements rq->nr_running sched_update_tick_dependency() sched_can_stop_tick() // checks rq->rt.rt_nr_running, ... __dequeue_rt_entity() dec_rt_tasks() // decrements rq->rt.rt_nr_running ... Every other scheduler class performs the operation in the opposite order, and sched_update_tick_dependency() expects the values to be updated as such. So avoid the misbehaviour by inverting the order in which the above operations are performed in the RT scheduler. Fixes: 76d92ac305 ("sched: Migrate sched to use new tick dependency mask model") Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Valentin Schneider <vschneid@redhat.com> Reviewed-by: Phil Auld <pauld@redhat.com> Link: https://lore.kernel.org/r/20220628092259.330171-1-nsaenzju@redhat.com Signed-off-by: Sasha Levin <sashal@kernel.org>
2022-06-28 11:22:59 +02:00 · 2022-06-28 11:22:59 +02:00 · b3d3069a6f
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@ -444,7 +444,7 @@ static inline void rt_queue_push_tasks(struct rq *rq)
 #endif /* CONFIG_SMP */

 static void enqueue_top_rt_rq(struct rt_rq *rt_rq);
-static void dequeue_top_rt_rq(struct rt_rq *rt_rq);
+static void dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count);

 static inline int on_rt_rq(struct sched_rt_entity *rt_se)
 {
@ -565,7 +565,7 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
 	rt_se = rt_rq->tg->rt_se[cpu];

 	if (!rt_se) {
-		dequeue_top_rt_rq(rt_rq);
+		dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
 		/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
 		cpufreq_update_util(rq_of_rt_rq(rt_rq), 0);
 	}
@ -651,7 +651,7 @@ static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)

 static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
 {
-	dequeue_top_rt_rq(rt_rq);
+	dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
 }

 static inline int rt_rq_throttled(struct rt_rq *rt_rq)
@ -1051,7 +1051,7 @@ static void update_curr_rt(struct rq *rq)
 }

 static void
-dequeue_top_rt_rq(struct rt_rq *rt_rq)
+dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count)
 {
 	struct rq *rq = rq_of_rt_rq(rt_rq);

@ -1062,7 +1062,7 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq)

 	BUG_ON(!rq->nr_running);

-	sub_nr_running(rq, rt_rq->rt_nr_running);
+	sub_nr_running(rq, count);
 	rt_rq->rt_queued = 0;

 }
@ -1342,18 +1342,21 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag
 static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
 {
 	struct sched_rt_entity *back = NULL;
+	unsigned int rt_nr_running;

 	for_each_sched_rt_entity(rt_se) {
 		rt_se->back = back;
 		back = rt_se;
 	}

-	dequeue_top_rt_rq(rt_rq_of_se(back));
+	rt_nr_running = rt_rq_of_se(back)->rt_nr_running;

 	for (rt_se = back; rt_se; rt_se = rt_se->back) {
 		if (on_rt_rq(rt_se))
 			__dequeue_rt_entity(rt_se, flags);
 	}
+
+	dequeue_top_rt_rq(rt_rq_of_se(back), rt_nr_running);
 }

 static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)