x86/percpu: Do not clobber %rsi in percpu_{try_,}cmpxchg{64,128}_op
authorUros Bizjak <ubizjak@gmail.com>
Mon, 18 Sep 2023 15:14:10 +0000 (17:14 +0200)
committerIngo Molnar <mingo@kernel.org>
Thu, 21 Sep 2023 07:35:50 +0000 (09:35 +0200)
The fallback alternative uses %rsi register to manually load pointer
to the percpu variable before the call to the emulation function.
This is unoptimal, because the load is hidden from the compiler.

Move the load of %rsi outside inline asm, so the compiler can
reuse the value. The code in slub.o improves from:

    55ac: 49 8b 3c 24           mov    (%r12),%rdi
    55b0: 48 8d 4a 40           lea    0x40(%rdx),%rcx
    55b4: 49 8b 1c 07           mov    (%r15,%rax,1),%rbx
    55b8: 4c 89 f8              mov    %r15,%rax
    55bb: 48 8d 37              lea    (%rdi),%rsi
    55be: e8 00 00 00 00        callq  55c3 <...>
55bf: R_X86_64_PLT32 this_cpu_cmpxchg16b_emu-0x4
    55c3: 75 a3                 jne    5568 <...>
    55c5: ...

 0000000000000000 <.altinstr_replacement>:
   5: 65 48 0f c7 0f        cmpxchg16b %gs:(%rdi)

to:

    55ac: 49 8b 34 24           mov    (%r12),%rsi
    55b0: 48 8d 4a 40           lea    0x40(%rdx),%rcx
    55b4: 49 8b 1c 07           mov    (%r15,%rax,1),%rbx
    55b8: 4c 89 f8              mov    %r15,%rax
    55bb: e8 00 00 00 00        callq  55c0 <...>
55bc: R_X86_64_PLT32 this_cpu_cmpxchg16b_emu-0x4
    55c0: 75 a6                 jne    5568 <...>
    55c2: ...

Where the alternative replacement instruction now uses %rsi:

 0000000000000000 <.altinstr_replacement>:
   5: 65 48 0f c7 0e        cmpxchg16b %gs:(%rsi)

The instruction (effectively a reg-reg move) at 55bb: in the original
assembly is removed. Also, both the CALL and replacement CMPXCHG16B
are 5 bytes long, removing the need for NOPs in the asm code.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230918151452.62344-1-ubizjak@gmail.com
arch/x86/include/asm/percpu.h

index a87db6140fe2ae8774b951368c653afc682b22b3..20624b80f89041fdb8ac4bbdbdac9b60b66e9682 100644 (file)
@@ -242,14 +242,15 @@ do {                                                                      \
        old__.var = _oval;                                              \
        new__.var = _nval;                                              \
                                                                        \
-       asm qual (ALTERNATIVE("leal %P[var], %%esi; call this_cpu_cmpxchg8b_emu", \
+       asm qual (ALTERNATIVE("call this_cpu_cmpxchg8b_emu",            \
                              "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
                  : [var] "+m" (_var),                                  \
                    "+a" (old__.low),                                   \
                    "+d" (old__.high)                                   \
                  : "b" (new__.low),                                    \
-                   "c" (new__.high)                                    \
-                 : "memory", "esi");                                   \
+                   "c" (new__.high),                                   \
+                   "S" (&(_var))                                       \
+                 : "memory");                                          \
                                                                        \
        old__.var;                                                      \
 })
@@ -271,7 +272,7 @@ do {                                                                        \
        old__.var = *_oval;                                             \
        new__.var = _nval;                                              \
                                                                        \
-       asm qual (ALTERNATIVE("leal %P[var], %%esi; call this_cpu_cmpxchg8b_emu", \
+       asm qual (ALTERNATIVE("call this_cpu_cmpxchg8b_emu",            \
                              "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
                  CC_SET(z)                                             \
                  : CC_OUT(z) (success),                                \
@@ -279,8 +280,9 @@ do {                                                                        \
                    "+a" (old__.low),                                   \
                    "+d" (old__.high)                                   \
                  : "b" (new__.low),                                    \
-                   "c" (new__.high)                                    \
-                 : "memory", "esi");                                   \
+                   "c" (new__.high),                                   \
+                   "S" (&(_var))                                       \
+                 : "memory");                                          \
        if (unlikely(!success))                                         \
                *_oval = old__.var;                                     \
        likely(success);                                                \
@@ -309,14 +311,15 @@ do {                                                                      \
        old__.var = _oval;                                              \
        new__.var = _nval;                                              \
                                                                        \
-       asm qual (ALTERNATIVE("leaq %P[var], %%rsi; call this_cpu_cmpxchg16b_emu", \
+       asm qual (ALTERNATIVE("call this_cpu_cmpxchg16b_emu",           \
                              "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
                  : [var] "+m" (_var),                                  \
                    "+a" (old__.low),                                   \
                    "+d" (old__.high)                                   \
                  : "b" (new__.low),                                    \
-                   "c" (new__.high)                                    \
-                 : "memory", "rsi");                                   \
+                   "c" (new__.high),                                   \
+                   "S" (&(_var))                                       \
+                 : "memory");                                          \
                                                                        \
        old__.var;                                                      \
 })
@@ -338,7 +341,7 @@ do {                                                                        \
        old__.var = *_oval;                                             \
        new__.var = _nval;                                              \
                                                                        \
-       asm qual (ALTERNATIVE("leaq %P[var], %%rsi; call this_cpu_cmpxchg16b_emu", \
+       asm qual (ALTERNATIVE("call this_cpu_cmpxchg16b_emu",           \
                              "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
                  CC_SET(z)                                             \
                  : CC_OUT(z) (success),                                \
@@ -346,8 +349,9 @@ do {                                                                        \
                    "+a" (old__.low),                                   \
                    "+d" (old__.high)                                   \
                  : "b" (new__.low),                                    \
-                   "c" (new__.high)                                    \
-                 : "memory", "rsi");                                   \
+                   "c" (new__.high),                                   \
+                   "S" (&(_var))                                       \
+                 : "memory");                                          \
        if (unlikely(!success))                                         \
                *_oval = old__.var;                                     \
        likely(success);                                                \