From: Nick Desaulniers Date: Mon, 28 Aug 2023 18:53:57 +0000 (-0700) Subject: x86/asm/bitops: Use __builtin_clz{l|ll} to evaluate constant expressions X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=3dae5c43badf285e22f6d88388e8a232a83bdfec;p=linux.git x86/asm/bitops: Use __builtin_clz{l|ll} to evaluate constant expressions Micro-optimize the bitops code some more, similar to commits: fdb6649ab7c1 ("x86/asm/bitops: Use __builtin_ctzl() to evaluate constant expressions") 2fcff790dcb4 ("powerpc: Use builtin functions for fls()/__fls()/fls64()") From a recent discussion, I noticed that x86 is lacking an optimization that appears in arch/powerpc/include/asm/bitops.h related to constant folding. If you add a BUILD_BUG_ON(__builtin_constant_p(param)) to these functions, you'll find that there were cases where the use of inline asm pessimized the compiler's ability to perform constant folding resulting in runtime calculation of a value that could have been computed at compile time. Signed-off-by: Nick Desaulniers Signed-off-by: Ingo Molnar Cc: Linus Torvalds Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230828-x86_fls-v1-1-e6a31b9f79c3@google.com --- diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 2edf68475fec4..50e5ebf9d0a0d 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -293,6 +293,9 @@ static __always_inline unsigned long variable_ffz(unsigned long word) */ static __always_inline unsigned long __fls(unsigned long word) { + if (__builtin_constant_p(word)) + return BITS_PER_LONG - 1 - __builtin_clzl(word); + asm("bsr %1,%0" : "=r" (word) : "rm" (word)); @@ -360,6 +363,9 @@ static __always_inline int fls(unsigned int x) { int r; + if (__builtin_constant_p(x)) + return x ? 32 - __builtin_clz(x) : 0; + #ifdef CONFIG_X86_64 /* * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the @@ -401,6 +407,9 @@ static __always_inline int fls(unsigned int x) static __always_inline int fls64(__u64 x) { int bitpos = -1; + + if (__builtin_constant_p(x)) + return x ? 64 - __builtin_clzll(x) : 0; /* * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the * dest reg is undefined if x==0, but their CPU architect says its