From: Palmer Dabbelt <palmer@rivosinc.com>
Date: Mon, 8 Apr 2024 17:55:03 +0000 (-0700)
Subject: Merge patch series "Rework & improve riscv cmpxchg.h and atomic.h"
X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=300ce44cbe2924aa83330fc5f24e035665f51b03;p=linux.git

Merge patch series "Rework & improve riscv cmpxchg.h and atomic.h"

Leonardo Bras <leobras@redhat.com> says:

While studying riscv's cmpxchg.h file, I got really interested in
understanding how RISCV asm implemented the different versions of
{cmp,}xchg.

When I understood the pattern, it made sense for me to remove the
duplications and create macros to make it easier to understand what exactly
changes between the versions: Instruction sufixes & barriers.

Also, did the same kind of work on atomic.c.

After that, I noted both cmpxchg and xchg only accept variables of
size 4 and 8, compared to x86 and arm64 which do 1,2,4,8.

Now that deduplication is done, it is quite direct to implement them
for variable sizes 1 and 2, so I did it. Then Guo Ren already presented
me some possible users :)

I did compare the generated asm on a test.c that contained usage for every
changed function, and could not detect any change on patches 1 + 2 + 3
compared with upstream.

Pathes 4 & 5 were compiled-tested, merged with guoren/qspinlock_v11 and
booted just fine with qemu -machine virt -append "qspinlock".

(tree: https://gitlab.com/LeoBras/linux/-/commits/guo_qspinlock_v11)

Latest tests happened based on this tree:
https://github.com/guoren83/linux/tree/qspinlock_v12

* b4-shazam-lts:
  riscv/cmpxchg: Implement xchg for variables of size 1 and 2
  riscv/cmpxchg: Implement cmpxchg for variables of size 1 and 2
  riscv/atomic.h : Deduplicate arch_atomic.*
  riscv/cmpxchg: Deduplicate cmpxchg() asm and macros
  riscv/cmpxchg: Deduplicate xchg() asm functions

Link: https://lore.kernel.org/r/20240103163203.72768-2-leobras@redhat.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---

300ce44cbe2924aa83330fc5f24e035665f51b03
diff --cc arch/riscv/include/asm/cmpxchg.h
index 2fee65cc84432,26cea2395aae8..4d23f0c35b949
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@@ -8,27 -8,64 +8,63 @@@
  
  #include <linux/bug.h>
  
 -#include <asm/barrier.h>
  #include <asm/fence.h>
  
- #define __xchg_relaxed(ptr, new, size)					\
+ #define __arch_xchg_masked(prepend, append, r, p, n)			\
+ ({									\
+ 	u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3);			\
+ 	ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE;	\
+ 	ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0)	\
+ 			<< __s;						\
+ 	ulong __newx = (ulong)(n) << __s;				\
+ 	ulong __retx;							\
+ 	ulong __rc;							\
+ 									\
+ 	__asm__ __volatile__ (						\
+ 	       prepend							\
+ 	       "0:	lr.w %0, %2\n"					\
+ 	       "	and  %1, %0, %z4\n"				\
+ 	       "	or   %1, %1, %z3\n"				\
+ 	       "	sc.w %1, %1, %2\n"				\
+ 	       "	bnez %1, 0b\n"					\
+ 	       append							\
+ 	       : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b))	\
+ 	       : "rJ" (__newx), "rJ" (~__mask)				\
+ 	       : "memory");						\
+ 									\
+ 	r = (__typeof__(*(p)))((__retx & __mask) >> __s);		\
+ })
+ 
+ #define __arch_xchg(sfx, prepend, append, r, p, n)			\
+ ({									\
+ 	__asm__ __volatile__ (						\
+ 		prepend							\
+ 		"	amoswap" sfx " %0, %2, %1\n"			\
+ 		append							\
+ 		: "=r" (r), "+A" (*(p))					\
+ 		: "r" (n)						\
+ 		: "memory");						\
+ })
+ 
+ #define _arch_xchg(ptr, new, sfx, prepend, append)			\
  ({									\
  	__typeof__(ptr) __ptr = (ptr);					\
- 	__typeof__(new) __new = (new);					\
- 	__typeof__(*(ptr)) __ret;					\
- 	switch (size) {							\
+ 	__typeof__(*(__ptr)) __new = (new);				\
+ 	__typeof__(*(__ptr)) __ret;					\
+ 									\
+ 	switch (sizeof(*__ptr)) {					\
+ 	case 1:								\
+ 	case 2:								\
+ 		__arch_xchg_masked(prepend, append,			\
+ 				   __ret, __ptr, __new);		\
+ 		break;							\
  	case 4:								\
- 		__asm__ __volatile__ (					\
- 			"	amoswap.w %0, %2, %1\n"			\
- 			: "=r" (__ret), "+A" (*__ptr)			\
- 			: "r" (__new)					\
- 			: "memory");					\
+ 		__arch_xchg(".w" sfx, prepend, append,			\
+ 			      __ret, __ptr, __new);			\
  		break;							\
  	case 8:								\
- 		__asm__ __volatile__ (					\
- 			"	amoswap.d %0, %2, %1\n"			\
- 			: "=r" (__ret), "+A" (*__ptr)			\
- 			: "r" (__new)					\
- 			: "memory");					\
+ 		__arch_xchg(".d" sfx, prepend, append,			\
+ 			      __ret, __ptr, __new);			\
  		break;							\
  	default:							\
  		BUILD_BUG();						\