x86/percpu: Define {raw,this}_cpu_try_cmpxchg{64,128}
authorUros Bizjak <ubizjak@gmail.com>
Wed, 6 Sep 2023 18:58:44 +0000 (20:58 +0200)
committerIngo Molnar <mingo@kernel.org>
Fri, 15 Sep 2023 11:16:35 +0000 (13:16 +0200)
Define target-specific {raw,this}_cpu_try_cmpxchg64() and
{raw,this}_cpu_try_cmpxchg128() macros. These definitions override
the generic fallback definitions and enable target-specific
optimized implementations.

Several places in mm/slub.o improve from e.g.:

    53bc: 48 8d 4f 40           lea    0x40(%rdi),%rcx
    53c0: 48 89 fa              mov    %rdi,%rdx
    53c3: 49 8b 5c 05 00        mov    0x0(%r13,%rax,1),%rbx
    53c8: 4c 89 e8              mov    %r13,%rax
    53cb: 49 8d 30              lea    (%r8),%rsi
    53ce: e8 00 00 00 00        call   53d3 <...>
53cf: R_X86_64_PLT32 this_cpu_cmpxchg16b_emu-0x4
    53d3: 48 31 d7              xor    %rdx,%rdi
    53d6: 4c 31 e8              xor    %r13,%rax
    53d9: 48 09 c7              or     %rax,%rdi
    53dc: 75 ae                 jne    538c <...>

to:

    53bc: 48 8d 4a 40           lea    0x40(%rdx),%rcx
    53c0: 49 8b 1c 07           mov    (%r15,%rax,1),%rbx
    53c4: 4c 89 f8              mov    %r15,%rax
    53c7: 48 8d 37              lea    (%rdi),%rsi
    53ca: e8 00 00 00 00        call   53cf <...>
53cb: R_X86_64_PLT32 this_cpu_cmpxchg16b_emu-0x4
    53cf: 75 bb                 jne    538c <...>

reducing the size of mm/slub.o by 80 bytes:

   text    data     bss     dec     hex filename
  39758    5337    4208   49303    c097 slub-new.o
  39838    5337    4208   49383    c0e7 slub-old.o

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20230906185941.53527-1-ubizjak@gmail.com
arch/x86/include/asm/percpu.h

index 34734d73046397058357e113df1b26a8ce75f24b..4c3641927f397cc5f26c7db462c4d487f7a5a6ea 100644 (file)
@@ -237,12 +237,47 @@ do {                                                                      \
 
 #define raw_cpu_cmpxchg64(pcp, oval, nval)     percpu_cmpxchg64_op(8,         , pcp, oval, nval)
 #define this_cpu_cmpxchg64(pcp, oval, nval)    percpu_cmpxchg64_op(8, volatile, pcp, oval, nval)
+
+#define percpu_try_cmpxchg64_op(size, qual, _var, _ovalp, _nval)       \
+({                                                                     \
+       bool success;                                                   \
+       u64 *_oval = (u64 *)(_ovalp);                                   \
+       union {                                                         \
+               u64 var;                                                \
+               struct {                                                \
+                       u32 low, high;                                  \
+               };                                                      \
+       } old__, new__;                                                 \
+                                                                       \
+       old__.var = *_oval;                                             \
+       new__.var = _nval;                                              \
+                                                                       \
+       asm qual (ALTERNATIVE("leal %P[var], %%esi; call this_cpu_cmpxchg8b_emu", \
+                             "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
+                 CC_SET(z)                                             \
+                 : CC_OUT(z) (success),                                \
+                   [var] "+m" (_var),                                  \
+                   "+a" (old__.low),                                   \
+                   "+d" (old__.high)                                   \
+                 : "b" (new__.low),                                    \
+                   "c" (new__.high)                                    \
+                 : "memory", "esi");                                   \
+       if (unlikely(!success))                                         \
+               *_oval = old__.var;                                     \
+       likely(success);                                                \
+})
+
+#define raw_cpu_try_cmpxchg64(pcp, ovalp, nval)                percpu_try_cmpxchg64_op(8,         , pcp, ovalp, nval)
+#define this_cpu_try_cmpxchg64(pcp, ovalp, nval)       percpu_try_cmpxchg64_op(8, volatile, pcp, ovalp, nval)
 #endif
 
 #ifdef CONFIG_X86_64
 #define raw_cpu_cmpxchg64(pcp, oval, nval)     percpu_cmpxchg_op(8,         , pcp, oval, nval);
 #define this_cpu_cmpxchg64(pcp, oval, nval)    percpu_cmpxchg_op(8, volatile, pcp, oval, nval);
 
+#define raw_cpu_try_cmpxchg64(pcp, ovalp, nval)                percpu_try_cmpxchg_op(8,         , pcp, ovalp, nval);
+#define this_cpu_try_cmpxchg64(pcp, ovalp, nval)       percpu_try_cmpxchg_op(8, volatile, pcp, ovalp, nval);
+
 #define percpu_cmpxchg128_op(size, qual, _var, _oval, _nval)           \
 ({                                                                     \
        union {                                                         \
@@ -269,6 +304,38 @@ do {                                                                       \
 
 #define raw_cpu_cmpxchg128(pcp, oval, nval)    percpu_cmpxchg128_op(16,         , pcp, oval, nval)
 #define this_cpu_cmpxchg128(pcp, oval, nval)   percpu_cmpxchg128_op(16, volatile, pcp, oval, nval)
+
+#define percpu_try_cmpxchg128_op(size, qual, _var, _ovalp, _nval)      \
+({                                                                     \
+       bool success;                                                   \
+       u128 *_oval = (u128 *)(_ovalp);                                 \
+       union {                                                         \
+               u128 var;                                               \
+               struct {                                                \
+                       u64 low, high;                                  \
+               };                                                      \
+       } old__, new__;                                                 \
+                                                                       \
+       old__.var = *_oval;                                             \
+       new__.var = _nval;                                              \
+                                                                       \
+       asm qual (ALTERNATIVE("leaq %P[var], %%rsi; call this_cpu_cmpxchg16b_emu", \
+                             "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
+                 CC_SET(z)                                             \
+                 : CC_OUT(z) (success),                                \
+                   [var] "+m" (_var),                                  \
+                   "+a" (old__.low),                                   \
+                   "+d" (old__.high)                                   \
+                 : "b" (new__.low),                                    \
+                   "c" (new__.high)                                    \
+                 : "memory", "rsi");                                   \
+       if (unlikely(!success))                                         \
+               *_oval = old__.var;                                     \
+       likely(success);                                                \
+})
+
+#define raw_cpu_try_cmpxchg128(pcp, ovalp, nval)       percpu_try_cmpxchg128_op(16,         , pcp, ovalp, nval)
+#define this_cpu_try_cmpxchg128(pcp, ovalp, nval)      percpu_try_cmpxchg128_op(16, volatile, pcp, ovalp, nval)
 #endif
 
 /*