- Have get_push_task() check whether current has migration disabled and

thus avoid useless invocations of the migration thread - Rework initialization flow so that all rq->core's are initialized, even of CPUs which have not been onlined yet, so that iterating over them all works as expected -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmErbMkACgkQEsHwGGHe VUqHnA//XleDtzYa1oswLB2e/jfUU9OUIijLYWGxNxo+zwJ/cBGNZWIUd6EIN8G9 JXvuQEjKsHu68MsT/Ctwrs2C3TnROgIwX1l2p4PhZzGeUGoOhC1Ec11/9oobGN3l wqPYXsAAW3/tIm1KjkKEh2vfDomB51cjUYjn7cbEJHWX93m40dD3dHnerAqrfL3d /yZID5ANVCyMVgV9LDFXFA8ct0vMsM1lt8WsLI6s8zQI7uzmsP4PwIeMqmYl2bEZ nnRZnBRxTwZROyf5G+/8X+7mj9aiQ0T01D6+xOMtwN6IoSrw/05sArTB//mB8iOf xile4RJtzpJ+ukw/lKyNY+1nRM21NOnbPyhnmOxBom49U4ZkkEspwHOY0gubHzxN ka36t5ADSdbz8QQqO/f+wEgEHpHeFOOmQU6eMeHxZQ1ZRR3VWkkHnOyuXjhz8vH4 Kd8C0M/1p6oecHzJdpLFJxOFFvSd1awFQImJJ8UxY8OttPscnqbcy2SzHSIF4JtY hSacqzdI/Ppk7/ZCp5x1+PnLL9xtXMQhyyUUPCyqpoLNoPtFIzIZdOe9CrTX+FaM vf13LnQPG/K84OsXWFvqyISJwpcb9GP0kec2ZeiFPmdo/nkzuTCgZcOg5IaNSER0 //ItKZOfH2JK2PG8W3G0UOwLzhpsRkD3qIg48I2JM72ADJIIBUI= =ZUE4 -----END PGP SIGNATURE----- Merge tag 'sched_urgent_for_v5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull scheduler fixes from Borislav Petkov: - Have get_push_task() check whether current has migration disabled and thus avoid useless invocations of the migration thread - Rework initialization flow so that all rq->core's are initialized, even of CPUs which have not been onlined yet, so that iterating over them all works as expected * tag 'sched_urgent_for_v5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched: Fix get_push_task() vs migrate_disable() sched: Fix Core-wide rq->lock for uninitialized CPUs
2021-08-29 10:54:14 -07:00 · 2021-08-29 10:54:14 -07:00 · 537b57bd5a
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@ -237,9 +237,30 @@ static DEFINE_MUTEX(sched_core_mutex);
 static atomic_t sched_core_count;
 static struct cpumask sched_core_mask;

+static void sched_core_lock(int cpu, unsigned long *flags)
+{
+	const struct cpumask *smt_mask = cpu_smt_mask(cpu);
+	int t, i = 0;
+
+	local_irq_save(*flags);
+	for_each_cpu(t, smt_mask)
+		raw_spin_lock_nested(&cpu_rq(t)->__lock, i++);
+}
+
+static void sched_core_unlock(int cpu, unsigned long *flags)
+{
+	const struct cpumask *smt_mask = cpu_smt_mask(cpu);
+	int t;
+
+	for_each_cpu(t, smt_mask)
+		raw_spin_unlock(&cpu_rq(t)->__lock);
+	local_irq_restore(*flags);
+}
+
 static void __sched_core_flip(bool enabled)
 {
-	int cpu, t, i;
+	unsigned long flags;
+	int cpu, t;

 	cpus_read_lock();

@ -250,19 +271,12 @@ static void __sched_core_flip(bool enabled)
 	for_each_cpu(cpu, &sched_core_mask) {
 		const struct cpumask *smt_mask = cpu_smt_mask(cpu);

-		i = 0;
-		local_irq_disable();
-		for_each_cpu(t, smt_mask) {
-			/* supports up to SMT8 */
-			raw_spin_lock_nested(&cpu_rq(t)->__lock, i++);
-		}
+		sched_core_lock(cpu, &flags);

 		for_each_cpu(t, smt_mask)
 			cpu_rq(t)->core_enabled = enabled;

-		for_each_cpu(t, smt_mask)
-			raw_spin_unlock(&cpu_rq(t)->__lock);
-		local_irq_enable();
+		sched_core_unlock(cpu, &flags);

 		cpumask_andnot(&sched_core_mask, &sched_core_mask, smt_mask);
 	}
@ -5736,35 +5750,109 @@ void queue_core_balance(struct rq *rq)
 	queue_balance_callback(rq, &per_cpu(core_balance_head, rq->cpu), sched_core_balance);
 }

-static inline void sched_core_cpu_starting(unsigned int cpu)
+static void sched_core_cpu_starting(unsigned int cpu)
 {
 	const struct cpumask *smt_mask = cpu_smt_mask(cpu);
-	struct rq *rq, *core_rq = NULL;
-	int i;
+	struct rq *rq = cpu_rq(cpu), *core_rq = NULL;
+	unsigned long flags;
+	int t;

-	core_rq = cpu_rq(cpu)->core;
+	sched_core_lock(cpu, &flags);

-	if (!core_rq) {
-		for_each_cpu(i, smt_mask) {
-			rq = cpu_rq(i);
-			if (rq->core && rq->core == rq)
-				core_rq = rq;
-		}
+	WARN_ON_ONCE(rq->core != rq);

-		if (!core_rq)
-			core_rq = cpu_rq(cpu);
+	/* if we're the first, we'll be our own leader */
+	if (cpumask_weight(smt_mask) == 1)
+		goto unlock;

-		for_each_cpu(i, smt_mask) {
-			rq = cpu_rq(i);
-
-			WARN_ON_ONCE(rq->core && rq->core != core_rq);
-			rq->core = core_rq;
+	/* find the leader */
+	for_each_cpu(t, smt_mask) {
+		if (t == cpu)
+			continue;
+		rq = cpu_rq(t);
+		if (rq->core == rq) {
+			core_rq = rq;
+			break;
 		}
 	}
+
+	if (WARN_ON_ONCE(!core_rq)) /* whoopsie */
+		goto unlock;
+
+	/* install and validate core_rq */
+	for_each_cpu(t, smt_mask) {
+		rq = cpu_rq(t);
+
+		if (t == cpu)
+			rq->core = core_rq;
+
+		WARN_ON_ONCE(rq->core != core_rq);
+	}
+
+unlock:
+	sched_core_unlock(cpu, &flags);
 }
+
+static void sched_core_cpu_deactivate(unsigned int cpu)
+{
+	const struct cpumask *smt_mask = cpu_smt_mask(cpu);
+	struct rq *rq = cpu_rq(cpu), *core_rq = NULL;
+	unsigned long flags;
+	int t;
+
+	sched_core_lock(cpu, &flags);
+
+	/* if we're the last man standing, nothing to do */
+	if (cpumask_weight(smt_mask) == 1) {
+		WARN_ON_ONCE(rq->core != rq);
+		goto unlock;
+	}
+
+	/* if we're not the leader, nothing to do */
+	if (rq->core != rq)
+		goto unlock;
+
+	/* find a new leader */
+	for_each_cpu(t, smt_mask) {
+		if (t == cpu)
+			continue;
+		core_rq = cpu_rq(t);
+		break;
+	}
+
+	if (WARN_ON_ONCE(!core_rq)) /* impossible */
+		goto unlock;
+
+	/* copy the shared state to the new leader */
+	core_rq->core_task_seq      = rq->core_task_seq;
+	core_rq->core_pick_seq      = rq->core_pick_seq;
+	core_rq->core_cookie        = rq->core_cookie;
+	core_rq->core_forceidle     = rq->core_forceidle;
+	core_rq->core_forceidle_seq = rq->core_forceidle_seq;
+
+	/* install new leader */
+	for_each_cpu(t, smt_mask) {
+		rq = cpu_rq(t);
+		rq->core = core_rq;
+	}
+
+unlock:
+	sched_core_unlock(cpu, &flags);
+}
+
+static inline void sched_core_cpu_dying(unsigned int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+
+	if (rq->core != rq)
+		rq->core = rq;
+}
+
 #else /* !CONFIG_SCHED_CORE */

 static inline void sched_core_cpu_starting(unsigned int cpu) {}
+static inline void sched_core_cpu_deactivate(unsigned int cpu) {}
+static inline void sched_core_cpu_dying(unsigned int cpu) {}

 static struct task_struct *
 pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
@ -8707,6 +8795,8 @@ int sched_cpu_deactivate(unsigned int cpu)
 	 */
 	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
 		static_branch_dec_cpuslocked(&sched_smt_present);
+
+	sched_core_cpu_deactivate(cpu);
 #endif

 	if (!sched_smp_initialized)
@ -8811,6 +8901,7 @@ int sched_cpu_dying(unsigned int cpu)
 	calc_load_migrate(rq);
 	update_max_interval();
 	hrtick_clear(rq);
+	sched_core_cpu_dying(cpu);
 	return 0;
 }
 #endif
@ -9022,7 +9113,7 @@ void __init sched_init(void)
 		atomic_set(&rq->nr_iowait, 0);

 #ifdef CONFIG_SCHED_CORE
-		rq->core = NULL;
+		rq->core = rq;
 		rq->core_pick = NULL;
 		rq->core_enabled = 0;
 		rq->core_tree = RB_ROOT;
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@ -1093,7 +1093,7 @@ struct rq {
 	unsigned int		core_sched_seq;
 	struct rb_root		core_tree;

-	/* shared state */
+	/* shared state -- careful with sched_core_cpu_deactivate() */
 	unsigned int		core_task_seq;
 	unsigned int		core_pick_seq;
 	unsigned long		core_cookie;
@ -2255,6 +2255,9 @@ static inline struct task_struct *get_push_task(struct rq *rq)
 	if (p->nr_cpus_allowed == 1)
 		return NULL;

+	if (p->migration_disabled)
+		return NULL;
+
 	rq->push_busy = true;
 	return get_task_struct(p);
 }