locking/atomic/x86: Introduce arch_try_cmpxchg64() for !CONFIG_X86_CMPXCHG64
authorUros Bizjak <ubizjak@gmail.com>
Mon, 8 Apr 2024 09:13:58 +0000 (11:13 +0200)
committerIngo Molnar <mingo@kernel.org>
Tue, 9 Apr 2024 07:51:03 +0000 (09:51 +0200)
Commit:

  6d12c8d308e68 ("percpu: Wire up cmpxchg128")

improved emulated cmpxchg8b_emu() library function to return
success/failure in a ZF flag.

Define arch_try_cmpxchg64() for !CONFIG_X86_CMPXCHG64 targets
to override the generic archy_try_cmpxchg() with an optimized
target specific implementation that handles ZF flag.

The assembly code at the call sites improves from:

   bf56d: e8 fc ff ff ff        call   cmpxchg8b_emu
   bf572: 8b 74 24 28           mov    0x28(%esp),%esi
   bf576: 89 c3                 mov    %eax,%ebx
   bf578: 89 d1                 mov    %edx,%ecx
   bf57a: 8b 7c 24 2c           mov    0x2c(%esp),%edi
   bf57e: 89 f0                 mov    %esi,%eax
   bf580: 89 fa                 mov    %edi,%edx
   bf582: 31 d8                 xor    %ebx,%eax
   bf584: 31 ca                 xor    %ecx,%edx
   bf586: 09 d0                 or     %edx,%eax
   bf588: 0f 84 e3 01 00 00     je     bf771 <...>

to:

   bf572: e8 fc ff ff ff        call   cmpxchg8b_emu
   bf577: 0f 84 b6 01 00 00     je     bf733 <...>

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: https://lore.kernel.org/r/20240408091547.90111-4-ubizjak@gmail.com
arch/x86/include/asm/cmpxchg_32.h

index fe40d0681ea8dc1aa30ad4a3032c9c60f646ac29..9e0d330dd5d0add6ab74b46db715b64a623740a0 100644 (file)
@@ -122,6 +122,34 @@ static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64
 }
 #define arch_cmpxchg64_local arch_cmpxchg64_local
 
+#define __arch_try_cmpxchg64_emu(_ptr, _oldp, _new)                    \
+({                                                                     \
+       union __u64_halves o = { .full = *(_oldp), },                   \
+                          n = { .full = (_new), };                     \
+       bool ret;                                                       \
+                                                                       \
+       asm volatile(ALTERNATIVE(LOCK_PREFIX_HERE                       \
+                                "call cmpxchg8b_emu",                  \
+                                "lock; cmpxchg8b %[ptr]", X86_FEATURE_CX8) \
+                    CC_SET(e)                                          \
+                    : CC_OUT(e) (ret),                                 \
+                      [ptr] "+m" (*(_ptr)),                            \
+                      "+a" (o.low), "+d" (o.high)                      \
+                    : "b" (n.low), "c" (n.high), "S" (_ptr)            \
+                    : "memory");                                       \
+                                                                       \
+       if (unlikely(!ret))                                             \
+               *(_oldp) = o.full;                                      \
+                                                                       \
+       likely(ret);                                                    \
+})
+
+static __always_inline bool arch_try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 new)
+{
+       return __arch_try_cmpxchg64_emu(ptr, oldp, new);
+}
+#define arch_try_cmpxchg64 arch_try_cmpxchg64
+
 #endif
 
 #define system_has_cmpxchg64()         boot_cpu_has(X86_FEATURE_CX8)