powerpc/smp: Cache CPU to chip lookup
On systems with large CPUs per node, even with the filtered matching of related CPUs, there can be large number of calls to cpu_to_chip_id for the same CPU. For example with 4096 vCPU, 1 node QEMU configuration, with 4 threads per core, system could be see upto 1024 calls to cpu_to_chip_id() for the same CPU. On a given system, cpu_to_chip_id() for a given CPU would always return the same. Hence cache the result in a lookup table for use in subsequent calls. Since all CPUs sharing the same core will belong to the same chip, the lookup_table has an entry for one CPU per core. chip_id_lookup_table is not being freed and would be used on subsequent CPU online post CPU offline. Reported-by: Daniel Henrique Barboza <danielhb413@gmail.com> Suggested-by: Michael Ellerman <mpe@ellerman.id.au> Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Tested-by: Daniel Henrique Barboza <danielhb413@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20210415120934.232271-4-srikar@linux.vnet.ibm.com
This commit is contained in:
Родитель
131c82b6a1
Коммит
c1e53367da
|
@ -31,6 +31,7 @@ extern u32 *cpu_to_phys_id;
|
||||||
extern bool coregroup_enabled;
|
extern bool coregroup_enabled;
|
||||||
|
|
||||||
extern int cpu_to_chip_id(int cpu);
|
extern int cpu_to_chip_id(int cpu);
|
||||||
|
extern int *chip_id_lookup_table;
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
|
|
||||||
|
|
|
@ -65,6 +65,8 @@
|
||||||
#define DBG(fmt...)
|
#define DBG(fmt...)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
int *chip_id_lookup_table;
|
||||||
|
|
||||||
#ifdef CONFIG_PPC64
|
#ifdef CONFIG_PPC64
|
||||||
int __initdata iommu_is_off;
|
int __initdata iommu_is_off;
|
||||||
int __initdata iommu_force_on;
|
int __initdata iommu_force_on;
|
||||||
|
@ -914,13 +916,22 @@ EXPORT_SYMBOL(of_get_ibm_chip_id);
|
||||||
int cpu_to_chip_id(int cpu)
|
int cpu_to_chip_id(int cpu)
|
||||||
{
|
{
|
||||||
struct device_node *np;
|
struct device_node *np;
|
||||||
|
int ret = -1, idx;
|
||||||
|
|
||||||
|
idx = cpu / threads_per_core;
|
||||||
|
if (chip_id_lookup_table && chip_id_lookup_table[idx] != -1)
|
||||||
|
return chip_id_lookup_table[idx];
|
||||||
|
|
||||||
np = of_get_cpu_node(cpu, NULL);
|
np = of_get_cpu_node(cpu, NULL);
|
||||||
if (!np)
|
if (np) {
|
||||||
return -1;
|
ret = of_get_ibm_chip_id(np);
|
||||||
|
|
||||||
of_node_put(np);
|
of_node_put(np);
|
||||||
return of_get_ibm_chip_id(np);
|
|
||||||
|
if (chip_id_lookup_table)
|
||||||
|
chip_id_lookup_table[idx] = ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(cpu_to_chip_id);
|
EXPORT_SYMBOL(cpu_to_chip_id);
|
||||||
|
|
||||||
|
|
|
@ -1073,6 +1073,20 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
|
||||||
cpu_smallcore_mask(boot_cpuid));
|
cpu_smallcore_mask(boot_cpuid));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (cpu_to_chip_id(boot_cpuid) != -1) {
|
||||||
|
int idx = num_possible_cpus() / threads_per_core;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* All threads of a core will all belong to the same core,
|
||||||
|
* chip_id_lookup_table will have one entry per core.
|
||||||
|
* Assumption: if boot_cpuid doesn't have a chip-id, then no
|
||||||
|
* other CPUs, will also not have chip-id.
|
||||||
|
*/
|
||||||
|
chip_id_lookup_table = kcalloc(idx, sizeof(int), GFP_KERNEL);
|
||||||
|
if (chip_id_lookup_table)
|
||||||
|
memset(chip_id_lookup_table, -1, sizeof(int) * idx);
|
||||||
|
}
|
||||||
|
|
||||||
if (smp_ops && smp_ops->probe)
|
if (smp_ops && smp_ops->probe)
|
||||||
smp_ops->probe();
|
smp_ops->probe();
|
||||||
}
|
}
|
||||||
|
@ -1468,8 +1482,8 @@ static void add_cpu_to_masks(int cpu)
|
||||||
{
|
{
|
||||||
struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
|
struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
|
||||||
int first_thread = cpu_first_thread_sibling(cpu);
|
int first_thread = cpu_first_thread_sibling(cpu);
|
||||||
int chip_id = cpu_to_chip_id(cpu);
|
|
||||||
cpumask_var_t mask;
|
cpumask_var_t mask;
|
||||||
|
int chip_id = -1;
|
||||||
bool ret;
|
bool ret;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
@ -1492,7 +1506,10 @@ static void add_cpu_to_masks(int cpu)
|
||||||
if (has_coregroup_support())
|
if (has_coregroup_support())
|
||||||
update_coregroup_mask(cpu, &mask);
|
update_coregroup_mask(cpu, &mask);
|
||||||
|
|
||||||
if (chip_id == -1 || !ret) {
|
if (chip_id_lookup_table && ret)
|
||||||
|
chip_id = cpu_to_chip_id(cpu);
|
||||||
|
|
||||||
|
if (chip_id == -1) {
|
||||||
cpumask_copy(per_cpu(cpu_core_map, cpu), cpu_cpu_mask(cpu));
|
cpumask_copy(per_cpu(cpu_core_map, cpu), cpu_cpu_mask(cpu));
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
Загрузка…
Ссылка в новой задаче