From e807c2a37044a51de89d6d4f8a1f5ecfb3752f36 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 24 Jan 2024 11:58:16 +0100 Subject: [PATCH] locking/x86: Implement local_xchg() using CMPXCHG without the LOCK prefix Implement local_xchg() using the CMPXCHG instruction without the LOCK prefix. XCHG is expensive due to the implied LOCK prefix. The processor cannot prefetch cachelines if XCHG is used. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Waiman Long Cc: Will Deacon Cc: Thomas Gleixner Cc: Linus Torvalds Cc: Paul E. McKenney Cc: "H. Peter Anvin" Link: https://lore.kernel.org/r/20240124105816.612670-1-ubizjak@gmail.com --- arch/x86/include/asm/local.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h index 73dba8b944430..59aa966dc2127 100644 --- a/arch/x86/include/asm/local.h +++ b/arch/x86/include/asm/local.h @@ -131,8 +131,20 @@ static inline bool local_try_cmpxchg(local_t *l, long *old, long new) (typeof(l->a.counter) *) old, new); } -/* Always has a lock prefix */ -#define local_xchg(l, n) (xchg(&((l)->a.counter), (n))) +/* + * Implement local_xchg using CMPXCHG instruction without the LOCK prefix. + * XCHG is expensive due to the implied LOCK prefix. The processor + * cannot prefetch cachelines if XCHG is used. + */ +static __always_inline long +local_xchg(local_t *l, long n) +{ + long c = local_read(l); + + do { } while (!local_try_cmpxchg(l, &c, n)); + + return c; +} /** * local_add_unless - add unless the number is already a given value -- 2.30.2