x86/percpu: Use compiler segment prefix qualifier
authorNadav Amit <namit@vmware.com>
Wed, 4 Oct 2023 14:49:43 +0000 (16:49 +0200)
committerIngo Molnar <mingo@kernel.org>
Thu, 5 Oct 2023 07:01:52 +0000 (09:01 +0200)
Using a segment prefix qualifier is cleaner than using a segment prefix
in the inline assembly, and provides the compiler with more information,
telling it that __seg_gs:[addr] is different than [addr] when it
analyzes data dependencies. It also enables various optimizations that
will be implemented in the next patches.

Use segment prefix qualifiers when they are supported. Unfortunately,
gcc does not provide a way to remove segment qualifiers, which is needed
to use typeof() to create local instances of the per-CPU variable. For
this reason, do not use the segment qualifier for per-CPU variables, and
do casting using the segment qualifier instead.

Uros: Improve compiler support detection and update the patch
to the current mainline.

Signed-off-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lore.kernel.org/r/20231004145137.86537-4-ubizjak@gmail.com
arch/x86/include/asm/percpu.h
arch/x86/include/asm/preempt.h

index 20624b80f89041fdb8ac4bbdbdac9b60b66e9682..da451202a1b9bb536ab7c98217d8b7e72ececcd9 100644 (file)
 #include <linux/stringify.h>
 
 #ifdef CONFIG_SMP
+
+#ifdef CONFIG_CC_HAS_NAMED_AS
+
+#ifdef CONFIG_X86_64
+#define __percpu_seg_override  __seg_gs
+#else
+#define __percpu_seg_override  __seg_fs
+#endif
+
+#define __percpu_prefix                ""
+
+#else /* CONFIG_CC_HAS_NAMED_AS */
+
+#define __percpu_seg_override
 #define __percpu_prefix                "%%"__stringify(__percpu_seg)":"
+
+#endif /* CONFIG_CC_HAS_NAMED_AS */
+
+#define __force_percpu_prefix  "%%"__stringify(__percpu_seg)":"
 #define __my_cpu_offset                this_cpu_read(this_cpu_off)
 
 /*
  * Compared to the generic __my_cpu_offset version, the following
  * saves one instruction and avoids clobbering a temp register.
  */
-#define arch_raw_cpu_ptr(ptr)                          \
-({                                                     \
-       unsigned long tcp_ptr__;                        \
-       asm ("add " __percpu_arg(1) ", %0"              \
-            : "=r" (tcp_ptr__)                         \
-            : "m" (this_cpu_off), "0" (ptr));          \
-       (typeof(*(ptr)) __kernel __force *)tcp_ptr__;   \
+#define arch_raw_cpu_ptr(ptr)                                  \
+({                                                             \
+       unsigned long tcp_ptr__;                                \
+       asm ("add " __percpu_arg(1) ", %0"                      \
+            : "=r" (tcp_ptr__)                                 \
+            : "m" (__my_cpu_var(this_cpu_off)), "0" (ptr));    \
+       (typeof(*(ptr)) __kernel __force *)tcp_ptr__;           \
 })
-#else
+#else /* CONFIG_SMP */
+#define __percpu_seg_override
 #define __percpu_prefix                ""
-#endif
+#define __force_percpu_prefix  ""
+#endif /* CONFIG_SMP */
 
+#define __my_cpu_type(var)     typeof(var) __percpu_seg_override
+#define __my_cpu_ptr(ptr)      (__my_cpu_type(*ptr) *)(uintptr_t)(ptr)
+#define __my_cpu_var(var)      (*__my_cpu_ptr(&var))
 #define __percpu_arg(x)                __percpu_prefix "%" #x
+#define __force_percpu_arg(x)  __force_percpu_prefix "%" #x
 
 /*
  * Initialized pointers to per-cpu variables needed for the boot
@@ -107,14 +131,14 @@ do {                                                                      \
                (void)pto_tmp__;                                        \
        }                                                               \
        asm qual(__pcpu_op2_##size(op, "%[val]", __percpu_arg([var]))   \
-           : [var] "+m" (_var)                                         \
+           : [var] "+m" (__my_cpu_var(_var))                           \
            : [val] __pcpu_reg_imm_##size(pto_val__));                  \
 } while (0)
 
 #define percpu_unary_op(size, qual, op, _var)                          \
 ({                                                                     \
        asm qual (__pcpu_op1_##size(op, __percpu_arg([var]))            \
-           : [var] "+m" (_var));                                       \
+           : [var] "+m" (__my_cpu_var(_var)));                         \
 })
 
 /*
@@ -144,14 +168,14 @@ do {                                                                      \
        __pcpu_type_##size pfo_val__;                                   \
        asm qual (__pcpu_op2_##size(op, __percpu_arg([var]), "%[val]")  \
            : [val] __pcpu_reg_##size("=", pfo_val__)                   \
-           : [var] "m" (_var));                                        \
+           : [var] "m" (__my_cpu_var(_var)));                          \
        (typeof(_var))(unsigned long) pfo_val__;                        \
 })
 
 #define percpu_stable_op(size, op, _var)                               \
 ({                                                                     \
        __pcpu_type_##size pfo_val__;                                   \
-       asm(__pcpu_op2_##size(op, __percpu_arg(P[var]), "%[val]")       \
+       asm(__pcpu_op2_##size(op, __force_percpu_arg(P[var]), "%[val]") \
            : [val] __pcpu_reg_##size("=", pfo_val__)                   \
            : [var] "p" (&(_var)));                                     \
        (typeof(_var))(unsigned long) pfo_val__;                        \
@@ -166,7 +190,7 @@ do {                                                                        \
        asm qual (__pcpu_op2_##size("xadd", "%[tmp]",                   \
                                     __percpu_arg([var]))               \
                  : [tmp] __pcpu_reg_##size("+", paro_tmp__),           \
-                   [var] "+m" (_var)                                   \
+                   [var] "+m" (__my_cpu_var(_var))                     \
                  : : "memory");                                        \
        (typeof(_var))(unsigned long) (paro_tmp__ + _val);              \
 })
@@ -187,7 +211,7 @@ do {                                                                        \
                                    __percpu_arg([var]))                \
                  "\n\tjnz 1b"                                          \
                  : [oval] "=&a" (pxo_old__),                           \
-                   [var] "+m" (_var)                                   \
+                   [var] "+m" (__my_cpu_var(_var))                     \
                  : [nval] __pcpu_reg_##size(, pxo_new__)               \
                  : "memory");                                          \
        (typeof(_var))(unsigned long) pxo_old__;                        \
@@ -204,7 +228,7 @@ do {                                                                        \
        asm qual (__pcpu_op2_##size("cmpxchg", "%[nval]",               \
                                    __percpu_arg([var]))                \
                  : [oval] "+a" (pco_old__),                            \
-                   [var] "+m" (_var)                                   \
+                   [var] "+m" (__my_cpu_var(_var))                     \
                  : [nval] __pcpu_reg_##size(, pco_new__)               \
                  : "memory");                                          \
        (typeof(_var))(unsigned long) pco_old__;                        \
@@ -221,7 +245,7 @@ do {                                                                        \
                  CC_SET(z)                                             \
                  : CC_OUT(z) (success),                                \
                    [oval] "+a" (pco_old__),                            \
-                   [var] "+m" (_var)                                   \
+                   [var] "+m" (__my_cpu_var(_var))                     \
                  : [nval] __pcpu_reg_##size(, pco_new__)               \
                  : "memory");                                          \
        if (unlikely(!success))                                         \
@@ -244,7 +268,7 @@ do {                                                                        \
                                                                        \
        asm qual (ALTERNATIVE("call this_cpu_cmpxchg8b_emu",            \
                              "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
-                 : [var] "+m" (_var),                                  \
+                 : [var] "+m" (__my_cpu_var(_var)),                    \
                    "+a" (old__.low),                                   \
                    "+d" (old__.high)                                   \
                  : "b" (new__.low),                                    \
@@ -276,7 +300,7 @@ do {                                                                        \
                              "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
                  CC_SET(z)                                             \
                  : CC_OUT(z) (success),                                \
-                   [var] "+m" (_var),                                  \
+                   [var] "+m" (__my_cpu_var(_var)),                    \
                    "+a" (old__.low),                                   \
                    "+d" (old__.high)                                   \
                  : "b" (new__.low),                                    \
@@ -313,7 +337,7 @@ do {                                                                        \
                                                                        \
        asm qual (ALTERNATIVE("call this_cpu_cmpxchg16b_emu",           \
                              "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
-                 : [var] "+m" (_var),                                  \
+                 : [var] "+m" (__my_cpu_var(_var)),                    \
                    "+a" (old__.low),                                   \
                    "+d" (old__.high)                                   \
                  : "b" (new__.low),                                    \
@@ -345,7 +369,7 @@ do {                                                                        \
                              "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
                  CC_SET(z)                                             \
                  : CC_OUT(z) (success),                                \
-                   [var] "+m" (_var),                                  \
+                   [var] "+m" (__my_cpu_var(_var)),                    \
                    "+a" (old__.low),                                   \
                    "+d" (old__.high)                                   \
                  : "b" (new__.low),                                    \
@@ -494,7 +518,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr,
        asm volatile("btl "__percpu_arg(2)",%1"
                        CC_SET(c)
                        : CC_OUT(c) (oldbit)
-                       : "m" (*(unsigned long __percpu *)addr), "Ir" (nr));
+                       : "m" (*__my_cpu_ptr((unsigned long __percpu *)(addr))), "Ir" (nr));
 
        return oldbit;
 }
index 4527e1430c6dc13dbfd0f49a4edd26bf9fce007d..4b2a35d8d56a837e38017f2331307fe767b5e12f 100644 (file)
@@ -92,7 +92,7 @@ static __always_inline void __preempt_count_sub(int val)
  */
 static __always_inline bool __preempt_count_dec_and_test(void)
 {
-       return GEN_UNARY_RMWcc("decl", pcpu_hot.preempt_count, e,
+       return GEN_UNARY_RMWcc("decl", __my_cpu_var(pcpu_hot.preempt_count), e,
                               __percpu_arg([var]));
 }