locking/atomic/x86: Introduce arch_atomic64_try_cmpxchg() to x86_32
authorUros Bizjak <ubizjak@gmail.com>
Wed, 10 Apr 2024 06:29:33 +0000 (08:29 +0200)
committerIngo Molnar <mingo@kernel.org>
Wed, 10 Apr 2024 13:04:54 +0000 (15:04 +0200)
Introduce arch_atomic64_try_cmpxchg() for 32-bit targets to use
optimized target specific implementation instead of a generic one.
This implementation eliminates dual-word compare after
cmpxchg8b instruction and improves generated asm code from:

    2273: f0 0f c7 0f           lock cmpxchg8b (%edi)
    2277: 8b 74 24 2c           mov    0x2c(%esp),%esi
    227b: 89 d3                 mov    %edx,%ebx
    227d: 89 c2                 mov    %eax,%edx
    227f: 89 5c 24 10           mov    %ebx,0x10(%esp)
    2283: 8b 7c 24 30           mov    0x30(%esp),%edi
    2287: 89 44 24 1c           mov    %eax,0x1c(%esp)
    228b: 31 f2                 xor    %esi,%edx
    228d: 89 d0                 mov    %edx,%eax
    228f: 89 da                 mov    %ebx,%edx
    2291: 31 fa                 xor    %edi,%edx
    2293: 09 d0                 or     %edx,%eax
    2295: 0f 85 a5 00 00 00     jne    2340 <...>

to:

    2270: f0 0f c7 0f           lock cmpxchg8b (%edi)
    2274: 0f 85 a6 00 00 00     jne    2320 <...>

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20240410062957.322614-1-ubizjak@gmail.com
arch/x86/include/asm/atomic64_32.h

index 3486d91b8595f1dffa03d718dc671dc8000c8770..ec217aaf41eb8028396a979a51f6c783771a3709 100644 (file)
@@ -61,12 +61,18 @@ ATOMIC64_DECL(add_unless);
 #undef __ATOMIC64_DECL
 #undef ATOMIC64_EXPORT
 
-static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
+static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
 {
-       return arch_cmpxchg64(&v->counter, o, n);
+       return arch_cmpxchg64(&v->counter, old, new);
 }
 #define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
 
+static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
+{
+       return arch_try_cmpxchg64(&v->counter, old, new);
+}
+#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
+
 static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n)
 {
        s64 o;