From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 9 May 2016 08:38:41 +0000 (+0200)
Subject: sched/core: Optimize SCHED_SMT
X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=1b568f0aabf2;p=linux.git

sched/core: Optimize SCHED_SMT

Avoid pointless SCHED_SMT code when running on !SMT hardware.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 75ecd4f29199a..94115453c1c4c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7412,6 +7412,22 @@ int sched_cpu_dying(unsigned int cpu)
 }
 #endif
 
+#ifdef CONFIG_SCHED_SMT
+DEFINE_STATIC_KEY_FALSE(sched_smt_present);
+
+static void sched_init_smt(void)
+{
+	/*
+	 * We've enumerated all CPUs and will assume that if any CPU
+	 * has SMT siblings, CPU0 will too.
+	 */
+	if (cpumask_weight(cpu_smt_mask(0)) > 1)
+		static_branch_enable(&sched_smt_present);
+}
+#else
+static inline void sched_init_smt(void) { }
+#endif
+
 void __init sched_init_smp(void)
 {
 	cpumask_var_t non_isolated_cpus;
@@ -7441,6 +7457,9 @@ void __init sched_init_smp(void)
 
 	init_sched_rt_class();
 	init_sched_dl_class();
+
+	sched_init_smt();
+
 	sched_smp_initialized = true;
 }
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6b41589c41e43..87caf2bd26f08 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5357,7 +5357,7 @@ static inline bool test_idle_cores(int cpu, bool def)
  * Since SMT siblings share all cache levels, inspecting this limited remote
  * state should be fairly cheap.
  */
-void update_idle_core(struct rq *rq)
+void __update_idle_core(struct rq *rq)
 {
 	int core = cpu_of(rq);
 	int cpu;
@@ -5389,6 +5389,9 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
 	struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
 	int core, cpu, wrap;
 
+	if (!static_branch_likely(&sched_smt_present))
+		return -1;
+
 	if (!test_idle_cores(target, false))
 		return -1;
 
@@ -5422,6 +5425,9 @@ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int t
 {
 	int cpu;
 
+	if (!static_branch_likely(&sched_smt_present))
+		return -1;
+
 	for_each_cpu(cpu, cpu_smt_mask(target)) {
 		if (!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
 			continue;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c917dcad82add..01b5189235f28 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -36,12 +36,6 @@ extern void cpu_load_update_active(struct rq *this_rq);
 static inline void cpu_load_update_active(struct rq *this_rq) { }
 #endif
 
-#ifdef CONFIG_SCHED_SMT
-extern void update_idle_core(struct rq *rq);
-#else
-static inline void update_idle_core(struct rq *rq) { }
-#endif
-
 /*
  * Helpers for converting nanosecond timing to jiffy resolution
  */
@@ -730,6 +724,23 @@ static inline int cpu_of(struct rq *rq)
 #endif
 }
 
+
+#ifdef CONFIG_SCHED_SMT
+
+extern struct static_key_false sched_smt_present;
+
+extern void __update_idle_core(struct rq *rq);
+
+static inline void update_idle_core(struct rq *rq)
+{
+	if (static_branch_unlikely(&sched_smt_present))
+		__update_idle_core(rq);
+}
+
+#else
+static inline void update_idle_core(struct rq *rq) { }
+#endif
+
 DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
 
 #define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))