Weird topologies can lead to asymmetric domain setups. This needs
further consideration since these setups are typically non-minimal
too.

For now, make it work by adding an extra mask selecting which CPUs
are allowed to iterate up.

The topology that triggered it is the one from David Rientjes:

	10 20 20 30
	20 10 20 20
	20 20 10 20
	30 20 20 10

resulting in boxes that wouldn't even boot.

Reported-by: David Rientjes <rientjes@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-3p86l9cuaqnxz7uxsojmz5rm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Peter Zijlstra 2012-05-31 14:47:33 +02:00 коммит произвёл Ingo Molnar
Родитель 7f1b43936f
Коммит c117487687
4 изменённых файлов: 72 добавлений и 10 удалений

Просмотреть файл

@ -876,6 +876,8 @@ struct sched_group_power {
* Number of busy cpus in this group. * Number of busy cpus in this group.
*/ */
atomic_t nr_busy_cpus; atomic_t nr_busy_cpus;
unsigned long cpumask[0]; /* iteration mask */
}; };
struct sched_group { struct sched_group {
@ -900,6 +902,15 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
return to_cpumask(sg->cpumask); return to_cpumask(sg->cpumask);
} }
/*
* cpumask masking which cpus in the group are allowed to iterate up the domain
* tree.
*/
static inline struct cpumask *sched_group_mask(struct sched_group *sg)
{
return to_cpumask(sg->sgp->cpumask);
}
/** /**
* group_first_cpu - Returns the first cpu in the cpumask of a sched_group. * group_first_cpu - Returns the first cpu in the cpumask of a sched_group.
* @group: The group whose first cpu is to be returned. * @group: The group whose first cpu is to be returned.

Просмотреть файл

@ -5994,6 +5994,44 @@ struct sched_domain_topology_level {
struct sd_data data; struct sd_data data;
}; };
/*
* Build an iteration mask that can exclude certain CPUs from the upwards
* domain traversal.
*
* Asymmetric node setups can result in situations where the domain tree is of
* unequal depth, make sure to skip domains that already cover the entire
* range.
*
* In that case build_sched_domains() will have terminated the iteration early
* and our sibling sd spans will be empty. Domains should always include the
* cpu they're built on, so check that.
*
*/
static void build_group_mask(struct sched_domain *sd, struct sched_group *sg)
{
const struct cpumask *span = sched_domain_span(sd);
struct sd_data *sdd = sd->private;
struct sched_domain *sibling;
int i;
for_each_cpu(i, span) {
sibling = *per_cpu_ptr(sdd->sd, i);
if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
continue;
cpumask_set_cpu(i, sched_group_mask(sg));
}
}
/*
* Return the canonical balance cpu for this group, this is the first cpu
* of this group that's also in the iteration mask.
*/
int group_balance_cpu(struct sched_group *sg)
{
return cpumask_first_and(sched_group_cpus(sg), sched_group_mask(sg));
}
static int static int
build_overlap_sched_groups(struct sched_domain *sd, int cpu) build_overlap_sched_groups(struct sched_domain *sd, int cpu)
{ {
@ -6012,6 +6050,12 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
if (cpumask_test_cpu(i, covered)) if (cpumask_test_cpu(i, covered))
continue; continue;
child = *per_cpu_ptr(sdd->sd, i);
/* See the comment near build_group_mask(). */
if (!cpumask_test_cpu(i, sched_domain_span(child)))
continue;
sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(), sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
GFP_KERNEL, cpu_to_node(cpu)); GFP_KERNEL, cpu_to_node(cpu));
@ -6019,8 +6063,6 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
goto fail; goto fail;
sg_span = sched_group_cpus(sg); sg_span = sched_group_cpus(sg);
child = *per_cpu_ptr(sdd->sd, i);
if (child->child) { if (child->child) {
child = child->child; child = child->child;
cpumask_copy(sg_span, sched_domain_span(child)); cpumask_copy(sg_span, sched_domain_span(child));
@ -6030,13 +6072,18 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
cpumask_or(covered, covered, sg_span); cpumask_or(covered, covered, sg_span);
sg->sgp = *per_cpu_ptr(sdd->sgp, i); sg->sgp = *per_cpu_ptr(sdd->sgp, i);
atomic_inc(&sg->sgp->ref); if (atomic_inc_return(&sg->sgp->ref) == 1)
build_group_mask(sd, sg);
/*
* Make sure the first group of this domain contains the
* canonical balance cpu. Otherwise the sched_domain iteration
* breaks. See update_sg_lb_stats().
*/
if ((!groups && cpumask_test_cpu(cpu, sg_span)) || if ((!groups && cpumask_test_cpu(cpu, sg_span)) ||
cpumask_first(sg_span) == cpu) { group_balance_cpu(sg) == cpu)
WARN_ON_ONCE(!cpumask_test_cpu(cpu, sg_span));
groups = sg; groups = sg;
}
if (!first) if (!first)
first = sg; first = sg;
@ -6109,6 +6156,7 @@ build_sched_groups(struct sched_domain *sd, int cpu)
cpumask_clear(sched_group_cpus(sg)); cpumask_clear(sched_group_cpus(sg));
sg->sgp->power = 0; sg->sgp->power = 0;
cpumask_setall(sched_group_mask(sg));
for_each_cpu(j, span) { for_each_cpu(j, span) {
if (get_group(j, sdd, NULL) != group) if (get_group(j, sdd, NULL) != group)
@ -6150,7 +6198,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
sg = sg->next; sg = sg->next;
} while (sg != sd->groups); } while (sg != sd->groups);
if (cpu != group_first_cpu(sg)) if (cpu != group_balance_cpu(sg))
return; return;
update_group_power(sd, cpu); update_group_power(sd, cpu);
@ -6525,7 +6573,7 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
*per_cpu_ptr(sdd->sg, j) = sg; *per_cpu_ptr(sdd->sg, j) = sg;
sgp = kzalloc_node(sizeof(struct sched_group_power), sgp = kzalloc_node(sizeof(struct sched_group_power) + cpumask_size(),
GFP_KERNEL, cpu_to_node(j)); GFP_KERNEL, cpu_to_node(j));
if (!sgp) if (!sgp)
return -ENOMEM; return -ENOMEM;

Просмотреть файл

@ -3652,7 +3652,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
int i; int i;
if (local_group) if (local_group)
balance_cpu = group_first_cpu(group); balance_cpu = group_balance_cpu(group);
/* Tally up the load of all CPUs in the group */ /* Tally up the load of all CPUs in the group */
max_cpu_load = 0; max_cpu_load = 0;
@ -3667,7 +3667,8 @@ static inline void update_sg_lb_stats(struct lb_env *env,
/* Bias balancing toward cpus of our domain */ /* Bias balancing toward cpus of our domain */
if (local_group) { if (local_group) {
if (idle_cpu(i) && !first_idle_cpu) { if (idle_cpu(i) && !first_idle_cpu &&
cpumask_test_cpu(i, sched_group_mask(group))) {
first_idle_cpu = 1; first_idle_cpu = 1;
balance_cpu = i; balance_cpu = i;
} }

Просмотреть файл

@ -526,6 +526,8 @@ static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
DECLARE_PER_CPU(struct sched_domain *, sd_llc); DECLARE_PER_CPU(struct sched_domain *, sd_llc);
DECLARE_PER_CPU(int, sd_llc_id); DECLARE_PER_CPU(int, sd_llc_id);
extern int group_balance_cpu(struct sched_group *sg);
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
#include "stats.h" #include "stats.h"