powerpc/smp: Add support detecting thread-groups sharing L2 cache

author Gautham R. Shenoy <ego@linux.vnet.ibm.com>

Thu, 10 Dec 2020 10:38:58 +0000 (16:08 +0530)

committer Michael Ellerman <mpe@ellerman.id.au>

Thu, 10 Dec 2020 13:10:25 +0000 (00:10 +1100)
author Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Thu, 10 Dec 2020 10:38:58 +0000 (16:08 +0530)
committer Michael Ellerman <mpe@ellerman.id.au>
Thu, 10 Dec 2020 13:10:25 +0000 (00:10 +1100)
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h

index b2035b2f57ce3f3254110dfc921c8b2d1259ac9e..035459ce6a1a43cf52f150497203db4bef15a6e5 100644 (file)
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -134,6 +134,7 @@ static inline struct cpumask *cpu_smallcore_mask(int cpu)
  extern int cpu_to_core_id(int cpu);
  
  extern bool has_big_cores;
+extern bool thread_group_shares_l2;
  
  #define cpu_smt_mask cpu_smt_mask
  #ifdef CONFIG_SCHED_SMT
@@ -187,6 +188,7 @@ extern void __cpu_die(unsigned int cpu);
  /* for UP */
  #define hard_smp_processor_id()                get_hard_smp_processor_id(0)
  #define smp_setup_cpu_maps()
+#define thread_group_shares_l2  0
  static inline void inhibit_secondary_onlining(void) {}
  static inline void uninhibit_secondary_onlining(void) {}
  static inline const struct cpumask *cpu_sibling_mask(int cpu)
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c

index 9078b5b5d6e40db276aeda4da14b4ead5a5b09a5..2b9b1bb4c5f28bd1872c7d39cf22d2c4eaf5e387 100644 (file)
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -76,6 +76,7 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 };
  struct task_struct *secondary_current;
  bool has_big_cores;
  bool coregroup_enabled;
+bool thread_group_shares_l2;
  
  DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
  DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
@@ -99,6 +100,7 @@ enum {
  
  #define MAX_THREAD_LIST_SIZE   8
  #define THREAD_GROUP_SHARE_L1   1
+#define THREAD_GROUP_SHARE_L2   2
  struct thread_groups {
         unsigned int property;
         unsigned int nr_groups;
@@ -107,7 +109,7 @@ struct thread_groups {
  };
  
  /* Maximum number of properties that groups of threads within a core can share */
-#define MAX_THREAD_GROUP_PROPERTIES 1
+#define MAX_THREAD_GROUP_PROPERTIES 2
  
  struct thread_groups_list {
         unsigned int nr_properties;
@@ -121,6 +123,13 @@ static struct thread_groups_list tgl[NR_CPUS] __initdata;
   */
  DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
  
+/*
+ * On some big-cores system, thread_group_l2_cache_map for each CPU
+ * corresponds to the set its siblings within the core that share the
+ * L2-cache.
+ */
+DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
+
  /* SMP operations for this machine */
  struct smp_ops_t *smp_ops;
  
@@ -718,7 +727,9 @@ static void or_cpumasks_related(int i, int j, struct cpumask *(*srcmask)(int),
   *
   * ibm,thread-groups[i + 0] tells us the property based on which the
   * threads are being grouped together. If this value is 1, it implies
- * that the threads in the same group share L1, translation cache.
+ * that the threads in the same group share L1, translation cache. If
+ * the value is 2, it implies that the threads in the same group share
+ * the same L2 cache.
   *
   * ibm,thread-groups[i+1] tells us how many such thread groups exist for the
   * property ibm,thread-groups[i]
@@ -872,9 +883,10 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property)
         int first_thread = cpu_first_thread_sibling(cpu);
         int i, cpu_group_start = -1, err = 0;
         struct thread_groups *tg = NULL;
-       cpumask_var_t *mask;
+       cpumask_var_t *mask = NULL;
  
-       if (cache_property != THREAD_GROUP_SHARE_L1)
+       if (cache_property != THREAD_GROUP_SHARE_L1 &&
+           cache_property != THREAD_GROUP_SHARE_L2)
                 return -EINVAL;
  
         tg = get_thread_groups(cpu, cache_property, &err);
@@ -888,7 +900,11 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property)
                 return -ENODATA;
         }
  
-       mask = &per_cpu(thread_group_l1_cache_map, cpu);
+       if (cache_property == THREAD_GROUP_SHARE_L1)
+               mask = &per_cpu(thread_group_l1_cache_map, cpu);
+       else if (cache_property == THREAD_GROUP_SHARE_L2)
+               mask = &per_cpu(thread_group_l2_cache_map, cpu);
+
         zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
  
         for (i = first_thread; i < first_thread + threads_per_core; i++) {
@@ -990,6 +1006,16 @@ static int init_big_cores(void)
         }
  
         has_big_cores = true;
+
+       for_each_possible_cpu(cpu) {
+               int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2);
+
+               if (err)
+                       return err;
+       }
+
+       thread_group_shares_l2 = true;
+       pr_debug("L2 cache only shared by the threads in the small core\n");
         return 0;
  }
  
@@ -1304,6 +1330,28 @@ static bool update_mask_by_l2(int cpu, cpumask_var_t *mask)
         if (has_big_cores)
                 submask_fn = cpu_smallcore_mask;
  
+       /*
+        * If the threads in a thread-group share L2 cache, then the
+        * L2-mask can be obtained from thread_group_l2_cache_map.
+        */
+       if (thread_group_shares_l2) {
+               cpumask_set_cpu(cpu, cpu_l2_cache_mask(cpu));
+
+               for_each_cpu(i, per_cpu(thread_group_l2_cache_map, cpu)) {
+                       if (cpu_online(i))
+                               set_cpus_related(i, cpu, cpu_l2_cache_mask);
+               }
+
+               /* Verify that L1-cache siblings are a subset of L2 cache-siblings */
+               if (!cpumask_equal(submask_fn(cpu), cpu_l2_cache_mask(cpu)) &&
+                   !cpumask_subset(submask_fn(cpu), cpu_l2_cache_mask(cpu))) {
+                       pr_warn_once("CPU %d : Inconsistent L1 and L2 cache siblings\n",
+                                    cpu);
+               }
+
+               return true;
+       }
+
         l2_cache = cpu_to_l2cache(cpu);
         if (!l2_cache || !*mask) {
                 /* Assume only core siblings share cache with this CPU */
author	Gautham R. Shenoy <ego@linux.vnet.ibm.com>
	Thu, 10 Dec 2020 10:38:58 +0000 (16:08 +0530)
committer	Michael Ellerman <mpe@ellerman.id.au>
	Thu, 10 Dec 2020 13:10:25 +0000 (00:10 +1100)
arch/powerpc/include/asm/smp.h		patch \| blob \| history
arch/powerpc/kernel/smp.c		patch \| blob \| history