riscv: Fix fully ordered LR/SC xchg[8|16]() implementations
authorAlexandre Ghiti <alexghiti@rivosinc.com>
Thu, 30 May 2024 14:55:46 +0000 (16:55 +0200)
committerPalmer Dabbelt <palmer@rivosinc.com>
Thu, 30 May 2024 16:43:14 +0000 (09:43 -0700)
The fully ordered versions of xchg[8|16]() using LR/SC lack the
necessary memory barriers to guarantee the order.

Fix this by matching what is already implemented in the fully ordered
versions of cmpxchg() using LR/SC.

Suggested-by: Andrea Parri <parri.andrea@gmail.com>
Reported-by: Andrea Parri <parri.andrea@gmail.com>
Closes: https://lore.kernel.org/linux-riscv/ZlYbupL5XgzgA0MX@andrea/T/#u
Fixes: a8ed2b7a2c13 ("riscv/cmpxchg: Implement xchg for variables of size 1 and 2")
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Reviewed-by: Andrea Parri <parri.andrea@gmail.com>
Link: https://lore.kernel.org/r/20240530145546.394248-1-alexghiti@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
arch/riscv/include/asm/cmpxchg.h

index ddb002ed89dea0dbf4956a5161a0f426bcba557f..808b4c78462e5aa363fc55d7871497efa23d57b4 100644 (file)
@@ -10,7 +10,7 @@
 
 #include <asm/fence.h>
 
-#define __arch_xchg_masked(prepend, append, r, p, n)                   \
+#define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n)           \
 ({                                                                     \
        u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3);                     \
        ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE;  \
@@ -25,7 +25,7 @@
               "0:      lr.w %0, %2\n"                                  \
               "        and  %1, %0, %z4\n"                             \
               "        or   %1, %1, %z3\n"                             \
-              "        sc.w %1, %1, %2\n"                              \
+              "        sc.w" sc_sfx " %1, %1, %2\n"                    \
               "        bnez %1, 0b\n"                                  \
               append                                                   \
               : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b))       \
@@ -46,7 +46,8 @@
                : "memory");                                            \
 })
 
-#define _arch_xchg(ptr, new, sfx, prepend, append)                     \
+#define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend,                        \
+                  sc_append, swap_append)                              \
 ({                                                                     \
        __typeof__(ptr) __ptr = (ptr);                                  \
        __typeof__(*(__ptr)) __new = (new);                             \
        switch (sizeof(*__ptr)) {                                       \
        case 1:                                                         \
        case 2:                                                         \
-               __arch_xchg_masked(prepend, append,                     \
+               __arch_xchg_masked(sc_sfx, prepend, sc_append,          \
                                   __ret, __ptr, __new);                \
                break;                                                  \
        case 4:                                                         \
-               __arch_xchg(".w" sfx, prepend, append,                  \
+               __arch_xchg(".w" swap_sfx, prepend, swap_append,        \
                              __ret, __ptr, __new);                     \
                break;                                                  \
        case 8:                                                         \
-               __arch_xchg(".d" sfx, prepend, append,                  \
+               __arch_xchg(".d" swap_sfx, prepend, swap_append,        \
                              __ret, __ptr, __new);                     \
                break;                                                  \
        default:                                                        \
 })
 
 #define arch_xchg_relaxed(ptr, x)                                      \
-       _arch_xchg(ptr, x, "", "", "")
+       _arch_xchg(ptr, x, "", "", "", "", "")
 
 #define arch_xchg_acquire(ptr, x)                                      \
-       _arch_xchg(ptr, x, "", "", RISCV_ACQUIRE_BARRIER)
+       _arch_xchg(ptr, x, "", "", "",                                  \
+                  RISCV_ACQUIRE_BARRIER, RISCV_ACQUIRE_BARRIER)
 
 #define arch_xchg_release(ptr, x)                                      \
-       _arch_xchg(ptr, x, "", RISCV_RELEASE_BARRIER, "")
+       _arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "", "")
 
 #define arch_xchg(ptr, x)                                              \
-       _arch_xchg(ptr, x, ".aqrl", "", "")
+       _arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER, "")
 
 #define xchg32(ptr, x)                                                 \
 ({                                                                     \