sched/fair: unlink misfit task from cpu overutilized

By taking into account uclamp_min, the 1:1 relation between task misfit
and cpu overutilized is no more true as a task with a small util_avg may
not fit a high capacity cpu because of uclamp_min constraint.

Add a new state in util_fits_cpu() to reflect the case that task would fit
a CPU except for the uclamp_min hint which is a performance requirement.

Use -1 to reflect that a CPU doesn't fit only because of uclamp_min so we
can use this new value to take additional action to select the best CPU
that doesn't match uclamp_min hint.

When util_fits_cpu() returns -1, we will continue to look for a possible
CPU with better performance, which replaces Capacity Inversion detection
with capacity_orig_of() - thermal_load_avg to detect a capacity inversion.

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Reviewed-and-tested-by: Qais Yousef <qyousef@layalina.io>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Tested-by: Kajetan Puchalski <kajetan.puchalski@arm.com>
Link: https://lore.kernel.org/r/20230201143628.270912-2-vincent.guittot@linaro.org
This commit is contained in:
Vincent Guittot 2023-02-01 15:36:27 +01:00 коммит произвёл Peter Zijlstra
Родитель 443ed4c302
Коммит e5ed0550c0
1 изменённых файлов: 84 добавлений и 25 удалений

Просмотреть файл

@ -4561,8 +4561,8 @@ static inline int util_fits_cpu(unsigned long util,
* handle the case uclamp_min > uclamp_max.
*/
uclamp_min = min(uclamp_min, uclamp_max);
if (util < uclamp_min && capacity_orig != SCHED_CAPACITY_SCALE)
fits = fits && (uclamp_min <= capacity_orig_thermal);
if (fits && (util < uclamp_min) && (uclamp_min > capacity_orig_thermal))
return -1;
return fits;
}
@ -4572,7 +4572,11 @@ static inline int task_fits_cpu(struct task_struct *p, int cpu)
unsigned long uclamp_min = uclamp_eff_value(p, UCLAMP_MIN);
unsigned long uclamp_max = uclamp_eff_value(p, UCLAMP_MAX);
unsigned long util = task_util_est(p);
return util_fits_cpu(util, uclamp_min, uclamp_max, cpu);
/*
* Return true only if the cpu fully fits the task requirements, which
* include the utilization but also the performance hints.
*/
return (util_fits_cpu(util, uclamp_min, uclamp_max, cpu) > 0);
}
static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
@ -6138,6 +6142,7 @@ static inline bool cpu_overutilized(int cpu)
unsigned long rq_util_min = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MIN);
unsigned long rq_util_max = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MAX);
/* Return true only if the utilization doesn't fit CPU's capacity */
return !util_fits_cpu(cpu_util_cfs(cpu), rq_util_min, rq_util_max, cpu);
}
@ -6931,6 +6936,7 @@ static int
select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
{
unsigned long task_util, util_min, util_max, best_cap = 0;
int fits, best_fits = 0;
int cpu, best_cpu = -1;
struct cpumask *cpus;
@ -6946,12 +6952,28 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu))
continue;
if (util_fits_cpu(task_util, util_min, util_max, cpu))
return cpu;
if (cpu_cap > best_cap) {
fits = util_fits_cpu(task_util, util_min, util_max, cpu);
/* This CPU fits with all requirements */
if (fits > 0)
return cpu;
/*
* Only the min performance hint (i.e. uclamp_min) doesn't fit.
* Look for the CPU with best capacity.
*/
else if (fits < 0)
cpu_cap = capacity_orig_of(cpu) - thermal_load_avg(cpu_rq(cpu));
/*
* First, select CPU which fits better (-1 being better than 0).
* Then, select the one with best capacity at same level.
*/
if ((fits < best_fits) ||
((fits == best_fits) && (cpu_cap > best_cap))) {
best_cap = cpu_cap;
best_cpu = cpu;
best_fits = fits;
}
}
@ -6964,7 +6986,11 @@ static inline bool asym_fits_cpu(unsigned long util,
int cpu)
{
if (sched_asym_cpucap_active())
return util_fits_cpu(util, util_min, util_max, cpu);
/*
* Return true only if the cpu fully fits the task requirements
* which include the utilization and the performance hints.
*/
return (util_fits_cpu(util, util_min, util_max, cpu) > 0);
return true;
}
@ -7331,6 +7357,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
unsigned long p_util_max = uclamp_is_used() ? uclamp_eff_value(p, UCLAMP_MAX) : 1024;
struct root_domain *rd = this_rq()->rd;
int cpu, best_energy_cpu, target = -1;
int prev_fits = -1, best_fits = -1;
unsigned long best_thermal_cap = 0;
unsigned long prev_thermal_cap = 0;
struct sched_domain *sd;
struct perf_domain *pd;
struct energy_env eenv;
@ -7366,6 +7395,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
unsigned long prev_spare_cap = 0;
int max_spare_cap_cpu = -1;
unsigned long base_energy;
int fits, max_fits = -1;
cpumask_and(cpus, perf_domain_span(pd), cpu_online_mask);
@ -7415,7 +7445,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
util_min = max(rq_util_min, p_util_min);
util_max = max(rq_util_max, p_util_max);
}
if (!util_fits_cpu(util, util_min, util_max, cpu))
fits = util_fits_cpu(util, util_min, util_max, cpu);
if (!fits)
continue;
lsub_positive(&cpu_cap, util);
@ -7423,7 +7455,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
if (cpu == prev_cpu) {
/* Always use prev_cpu as a candidate. */
prev_spare_cap = cpu_cap;
} else if (cpu_cap > max_spare_cap) {
prev_fits = fits;
} else if ((fits > max_fits) ||
((fits == max_fits) && (cpu_cap > max_spare_cap))) {
/*
* Find the CPU with the maximum spare capacity
* among the remaining CPUs in the performance
@ -7431,6 +7465,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
*/
max_spare_cap = cpu_cap;
max_spare_cap_cpu = cpu;
max_fits = fits;
}
}
@ -7449,26 +7484,50 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
if (prev_delta < base_energy)
goto unlock;
prev_delta -= base_energy;
prev_thermal_cap = cpu_thermal_cap;
best_delta = min(best_delta, prev_delta);
}
/* Evaluate the energy impact of using max_spare_cap_cpu. */
if (max_spare_cap_cpu >= 0 && max_spare_cap > prev_spare_cap) {
/* Current best energy cpu fits better */
if (max_fits < best_fits)
continue;
/*
* Both don't fit performance hint (i.e. uclamp_min)
* but best energy cpu has better capacity.
*/
if ((max_fits < 0) &&
(cpu_thermal_cap <= best_thermal_cap))
continue;
cur_delta = compute_energy(&eenv, pd, cpus, p,
max_spare_cap_cpu);
/* CPU utilization has changed */
if (cur_delta < base_energy)
goto unlock;
cur_delta -= base_energy;
if (cur_delta < best_delta) {
best_delta = cur_delta;
best_energy_cpu = max_spare_cap_cpu;
}
/*
* Both fit for the task but best energy cpu has lower
* energy impact.
*/
if ((max_fits > 0) && (best_fits > 0) &&
(cur_delta >= best_delta))
continue;
best_delta = cur_delta;
best_energy_cpu = max_spare_cap_cpu;
best_fits = max_fits;
best_thermal_cap = cpu_thermal_cap;
}
}
rcu_read_unlock();
if (best_delta < prev_delta)
if ((best_fits > prev_fits) ||
((best_fits > 0) && (best_delta < prev_delta)) ||
((best_fits < 0) && (best_thermal_cap > prev_thermal_cap)))
target = best_energy_cpu;
return target;
@ -10271,6 +10330,16 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
*/
update_sd_lb_stats(env, &sds);
/* There is no busy sibling group to pull tasks from */
if (!sds.busiest)
goto out_balanced;
busiest = &sds.busiest_stat;
/* Misfit tasks should be dealt with regardless of the avg load */
if (busiest->group_type == group_misfit_task)
goto force_balance;
if (sched_energy_enabled()) {
struct root_domain *rd = env->dst_rq->rd;
@ -10278,17 +10347,6 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
goto out_balanced;
}
local = &sds.local_stat;
busiest = &sds.busiest_stat;
/* There is no busy sibling group to pull tasks from */
if (!sds.busiest)
goto out_balanced;
/* Misfit tasks should be dealt with regardless of the avg load */
if (busiest->group_type == group_misfit_task)
goto force_balance;
/* ASYM feature bypasses nice load balance check */
if (busiest->group_type == group_asym_packing)
goto force_balance;
@ -10301,6 +10359,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
if (busiest->group_type == group_imbalanced)
goto force_balance;
local = &sds.local_stat;
/*
* If the local group is busier than the selected busiest group
* don't try and pull any tasks.