powerpc/smp: Add support detecting thread-groups sharing L2 cache

On POWER systems, groups of threads within a core sharing the L2-cache can be indicated by the "ibm,thread-groups" property array with the identifier "2". This patch adds support for detecting this, and when present, populate the populating the cpu_l2_cache_mask of every CPU to the core-siblings which share L2 with the CPU as specified in the by the "ibm,thread-groups" property array. On a platform with the following "ibm,thread-group" configuration 00000001 00000002 00000004 00000000 00000002 00000004 00000006 00000001 00000003 00000005 00000007 00000002 00000002 00000004 00000000 00000002 00000004 00000006 00000001 00000003 00000005 00000007 Without this patch, the sched-domain hierarchy for CPUs 0,1 would be CPU0 attaching sched-domain(s): domain-0: span=0,2,4,6 level=SMT domain-1: span=0-7 level=CACHE domain-2: span=0-15,24-39,48-55 level=MC domain-3: span=0-55 level=DIE CPU1 attaching sched-domain(s): domain-0: span=1,3,5,7 level=SMT domain-1: span=0-7 level=CACHE domain-2: span=0-15,24-39,48-55 level=MC domain-3: span=0-55 level=DIE The CACHE domain at 0-7 is incorrect since the ibm,thread-groups sub-array [00000002 00000002 00000004 00000000 00000002 00000004 00000006 00000001 00000003 00000005 00000007] indicates that L2 (Property "2") is shared only between the threads of a single group. There are "2" groups of threads where each group contains "4" threads each. The groups being {0,2,4,6} and {1,3,5,7}. With this patch, the sched-domain hierarchy for CPUs 0,1 would be CPU0 attaching sched-domain(s): domain-0: span=0,2,4,6 level=SMT domain-1: span=0-15,24-39,48-55 level=MC domain-2: span=0-55 level=DIE CPU1 attaching sched-domain(s): domain-0: span=1,3,5,7 level=SMT domain-1: span=0-15,24-39,48-55 level=MC domain-2: span=0-55 level=DIE The CACHE domain with span=0,2,4,6 for CPU 0 (span=1,3,5,7 for CPU 1 resp.) gets degenerated into the SMT domain. Furthermore, the last-level-cache domain gets correctly set to the SMT sched-domain. Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/1607596739-32439-5-git-send-email-ego@linux.vnet.ibm.com
2020-12-10 16:08:58 +05:30 · 2020-12-10 16:08:58 +05:30 · 9538abee18
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@ -134,6 +134,7 @@ static inline struct cpumask *cpu_smallcore_mask(int cpu)
 extern int cpu_to_core_id(int cpu);

 extern bool has_big_cores;
+extern bool thread_group_shares_l2;

 #define cpu_smt_mask cpu_smt_mask
 #ifdef CONFIG_SCHED_SMT
@ -187,6 +188,7 @@ extern void __cpu_die(unsigned int cpu);
 /* for UP */
 #define hard_smp_processor_id()		get_hard_smp_processor_id(0)
 #define smp_setup_cpu_maps()
+#define thread_group_shares_l2  0
 static inline void inhibit_secondary_onlining(void) {}
 static inline void uninhibit_secondary_onlining(void) {}
 static inline const struct cpumask *cpu_sibling_mask(int cpu)
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@ -76,6 +76,7 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 };
 struct task_struct *secondary_current;
 bool has_big_cores;
 bool coregroup_enabled;
+bool thread_group_shares_l2;

 DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
 DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
@ -99,6 +100,7 @@ enum {

 #define MAX_THREAD_LIST_SIZE	8
 #define THREAD_GROUP_SHARE_L1   1
+#define THREAD_GROUP_SHARE_L2   2
 struct thread_groups {
 	unsigned int property;
 	unsigned int nr_groups;
@ -107,7 +109,7 @@ struct thread_groups {
 };

 /* Maximum number of properties that groups of threads within a core can share */
-#define MAX_THREAD_GROUP_PROPERTIES 1
+#define MAX_THREAD_GROUP_PROPERTIES 2

 struct thread_groups_list {
 	unsigned int nr_properties;
@ -121,6 +123,13 @@ static struct thread_groups_list tgl[NR_CPUS] __initdata;
 */
 DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);

+/*
+ * On some big-cores system, thread_group_l2_cache_map for each CPU
+ * corresponds to the set its siblings within the core that share the
+ * L2-cache.
+ */
+DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
+
 /* SMP operations for this machine */
 struct smp_ops_t *smp_ops;

@ -718,7 +727,9 @@ static void or_cpumasks_related(int i, int j, struct cpumask *(*srcmask)(int),
 *
 * ibm,thread-groups[i + 0] tells us the property based on which the
 * threads are being grouped together. If this value is 1, it implies
- * that the threads in the same group share L1, translation cache.
+ * that the threads in the same group share L1, translation cache. If
+ * the value is 2, it implies that the threads in the same group share
+ * the same L2 cache.
 *
 * ibm,thread-groups[i+1] tells us how many such thread groups exist for the
 * property ibm,thread-groups[i]
@ -872,9 +883,10 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property)
 	int first_thread = cpu_first_thread_sibling(cpu);
 	int i, cpu_group_start = -1, err = 0;
 	struct thread_groups *tg = NULL;
-	cpumask_var_t *mask;
+	cpumask_var_t *mask = NULL;

-	if (cache_property != THREAD_GROUP_SHARE_L1)
+	if (cache_property != THREAD_GROUP_SHARE_L1 &&
+	    cache_property != THREAD_GROUP_SHARE_L2)
 		return -EINVAL;

 	tg = get_thread_groups(cpu, cache_property, &err);
@ -888,7 +900,11 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property)
 		return -ENODATA;
 	}

-	mask = &per_cpu(thread_group_l1_cache_map, cpu);
+	if (cache_property == THREAD_GROUP_SHARE_L1)
+		mask = &per_cpu(thread_group_l1_cache_map, cpu);
+	else if (cache_property == THREAD_GROUP_SHARE_L2)
+		mask = &per_cpu(thread_group_l2_cache_map, cpu);
+
 	zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));

 	for (i = first_thread; i < first_thread + threads_per_core; i++) {
@ -990,6 +1006,16 @@ static int init_big_cores(void)
 	}

 	has_big_cores = true;
+
+	for_each_possible_cpu(cpu) {
+		int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2);
+
+		if (err)
+			return err;
+	}
+
+	thread_group_shares_l2 = true;
+	pr_debug("L2 cache only shared by the threads in the small core\n");
 	return 0;
 }

@ -1304,6 +1330,28 @@ static bool update_mask_by_l2(int cpu, cpumask_var_t *mask)
 	if (has_big_cores)
 		submask_fn = cpu_smallcore_mask;

+	/*
+	 * If the threads in a thread-group share L2 cache, then the
+	 * L2-mask can be obtained from thread_group_l2_cache_map.
+	 */
+	if (thread_group_shares_l2) {
+		cpumask_set_cpu(cpu, cpu_l2_cache_mask(cpu));
+
+		for_each_cpu(i, per_cpu(thread_group_l2_cache_map, cpu)) {
+			if (cpu_online(i))
+				set_cpus_related(i, cpu, cpu_l2_cache_mask);
+		}
+
+		/* Verify that L1-cache siblings are a subset of L2 cache-siblings */
+		if (!cpumask_equal(submask_fn(cpu), cpu_l2_cache_mask(cpu)) &&
+		    !cpumask_subset(submask_fn(cpu), cpu_l2_cache_mask(cpu))) {
+			pr_warn_once("CPU %d : Inconsistent L1 and L2 cache siblings\n",
+				     cpu);
+		}
+
+		return true;
+	}
+
 	l2_cache = cpu_to_l2cache(cpu);
 	if (!l2_cache || !*mask) {
 		/* Assume only core siblings share cache with this CPU */