bcachefs: bch2_bkey_cmp_packed_inlined()
authorKent Overstreet <kent.overstreet@linux.dev>
Fri, 21 Oct 2022 23:20:09 +0000 (19:20 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:45 +0000 (17:09 -0400)
This adds an inlined version of bch2_bkey_cmp_packed(), and uses it in
bch2_sort_keys(), where it's part of the inner loop.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/bkey.c
fs/bcachefs/bkey_cmp.h [new file with mode: 0644]
fs/bcachefs/bkey_sort.c

index e09a5e3fd709724e63fad4362735e3ad59573aa9..161b5bd60a63b5f68fff367bb888658ca68195a7 100644 (file)
@@ -2,6 +2,7 @@
 
 #include "bcachefs.h"
 #include "bkey.h"
+#include "bkey_cmp.h"
 #include "bkey_methods.h"
 #include "bset.h"
 #include "util.h"
@@ -763,50 +764,6 @@ unsigned bch2_bkey_ffs(const struct btree *b, const struct bkey_packed *k)
 
 #ifdef HAVE_BCACHEFS_COMPILED_UNPACK
 
-static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
-                                 unsigned nr_key_bits)
-{
-       long d0, d1, d2, d3;
-       int cmp;
-
-       /* we shouldn't need asm for this, but gcc is being retarded: */
-
-       asm(".intel_syntax noprefix;"
-           "xor eax, eax;"
-           "xor edx, edx;"
-           "1:;"
-           "mov r8, [rdi];"
-           "mov r9, [rsi];"
-           "sub ecx, 64;"
-           "jl 2f;"
-
-           "cmp r8, r9;"
-           "jnz 3f;"
-
-           "lea rdi, [rdi - 8];"
-           "lea rsi, [rsi - 8];"
-           "jmp 1b;"
-
-           "2:;"
-           "not ecx;"
-           "shr r8, 1;"
-           "shr r9, 1;"
-           "shr r8, cl;"
-           "shr r9, cl;"
-           "cmp r8, r9;"
-
-           "3:\n"
-           "seta al;"
-           "setb dl;"
-           "sub eax, edx;"
-           ".att_syntax prefix;"
-           : "=&D" (d0), "=&S" (d1), "=&d" (d2), "=&c" (d3), "=&a" (cmp)
-           : "0" (l), "1" (r), "3" (nr_key_bits)
-           : "r8", "r9", "cc", "memory");
-
-       return cmp;
-}
-
 #define I(_x)                  (*(out)++ = (_x))
 #define I1(i0)                                         I(i0)
 #define I2(i0, i1)             (I1(i0),                I(i1))
@@ -1037,40 +994,6 @@ int bch2_compile_bkey_format(const struct bkey_format *format, void *_out)
 }
 
 #else
-static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
-                                 unsigned nr_key_bits)
-{
-       u64 l_v, r_v;
-
-       if (!nr_key_bits)
-               return 0;
-
-       /* for big endian, skip past header */
-       nr_key_bits += high_bit_offset;
-       l_v = *l & (~0ULL >> high_bit_offset);
-       r_v = *r & (~0ULL >> high_bit_offset);
-
-       while (1) {
-               if (nr_key_bits < 64) {
-                       l_v >>= 64 - nr_key_bits;
-                       r_v >>= 64 - nr_key_bits;
-                       nr_key_bits = 0;
-               } else {
-                       nr_key_bits -= 64;
-               }
-
-               if (!nr_key_bits || l_v != r_v)
-                       break;
-
-               l = next_word(l);
-               r = next_word(r);
-
-               l_v = *l;
-               r_v = *r;
-       }
-
-       return cmp_int(l_v, r_v);
-}
 #endif
 
 __pure
@@ -1078,19 +1001,7 @@ int __bch2_bkey_cmp_packed_format_checked(const struct bkey_packed *l,
                                          const struct bkey_packed *r,
                                          const struct btree *b)
 {
-       const struct bkey_format *f = &b->format;
-       int ret;
-
-       EBUG_ON(!bkey_packed(l) || !bkey_packed(r));
-       EBUG_ON(b->nr_key_bits != bkey_format_key_bits(f));
-
-       ret = __bkey_cmp_bits(high_word(f, l),
-                             high_word(f, r),
-                             b->nr_key_bits);
-
-       EBUG_ON(ret != bpos_cmp(bkey_unpack_pos(b, l),
-                               bkey_unpack_pos(b, r)));
-       return ret;
+       return __bch2_bkey_cmp_packed_format_checked_inlined(l, r, b);
 }
 
 __pure __flatten
@@ -1106,20 +1017,7 @@ int bch2_bkey_cmp_packed(const struct btree *b,
                         const struct bkey_packed *l,
                         const struct bkey_packed *r)
 {
-       struct bkey unpacked;
-
-       if (likely(bkey_packed(l) && bkey_packed(r)))
-               return __bch2_bkey_cmp_packed_format_checked(l, r, b);
-
-       if (bkey_packed(l)) {
-               __bkey_unpack_key_format_checked(b, &unpacked, l);
-               l = (void *) &unpacked;
-       } else if (bkey_packed(r)) {
-               __bkey_unpack_key_format_checked(b, &unpacked, r);
-               r = (void *) &unpacked;
-       }
-
-       return bpos_cmp(((struct bkey *) l)->p, ((struct bkey *) r)->p);
+       return bch2_bkey_cmp_packed_inlined(b, l, r);
 }
 
 __pure __flatten
diff --git a/fs/bcachefs/bkey_cmp.h b/fs/bcachefs/bkey_cmp.h
new file mode 100644 (file)
index 0000000..5f42a6e
--- /dev/null
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BKEY_CMP_H
+#define _BCACHEFS_BKEY_CMP_H
+
+#include "bkey.h"
+
+#ifdef CONFIG_X86_64
+static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
+                                 unsigned nr_key_bits)
+{
+       long d0, d1, d2, d3;
+       int cmp;
+
+       /* we shouldn't need asm for this, but gcc is being retarded: */
+
+       asm(".intel_syntax noprefix;"
+           "xor eax, eax;"
+           "xor edx, edx;"
+           "1:;"
+           "mov r8, [rdi];"
+           "mov r9, [rsi];"
+           "sub ecx, 64;"
+           "jl 2f;"
+
+           "cmp r8, r9;"
+           "jnz 3f;"
+
+           "lea rdi, [rdi - 8];"
+           "lea rsi, [rsi - 8];"
+           "jmp 1b;"
+
+           "2:;"
+           "not ecx;"
+           "shr r8, 1;"
+           "shr r9, 1;"
+           "shr r8, cl;"
+           "shr r9, cl;"
+           "cmp r8, r9;"
+
+           "3:\n"
+           "seta al;"
+           "setb dl;"
+           "sub eax, edx;"
+           ".att_syntax prefix;"
+           : "=&D" (d0), "=&S" (d1), "=&d" (d2), "=&c" (d3), "=&a" (cmp)
+           : "0" (l), "1" (r), "3" (nr_key_bits)
+           : "r8", "r9", "cc", "memory");
+
+       return cmp;
+}
+#else
+static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
+                                 unsigned nr_key_bits)
+{
+       u64 l_v, r_v;
+
+       if (!nr_key_bits)
+               return 0;
+
+       /* for big endian, skip past header */
+       nr_key_bits += high_bit_offset;
+       l_v = *l & (~0ULL >> high_bit_offset);
+       r_v = *r & (~0ULL >> high_bit_offset);
+
+       while (1) {
+               if (nr_key_bits < 64) {
+                       l_v >>= 64 - nr_key_bits;
+                       r_v >>= 64 - nr_key_bits;
+                       nr_key_bits = 0;
+               } else {
+                       nr_key_bits -= 64;
+               }
+
+               if (!nr_key_bits || l_v != r_v)
+                       break;
+
+               l = next_word(l);
+               r = next_word(r);
+
+               l_v = *l;
+               r_v = *r;
+       }
+
+       return cmp_int(l_v, r_v);
+}
+#endif
+
+static inline __pure __flatten
+int __bch2_bkey_cmp_packed_format_checked_inlined(const struct bkey_packed *l,
+                                         const struct bkey_packed *r,
+                                         const struct btree *b)
+{
+       const struct bkey_format *f = &b->format;
+       int ret;
+
+       EBUG_ON(!bkey_packed(l) || !bkey_packed(r));
+       EBUG_ON(b->nr_key_bits != bkey_format_key_bits(f));
+
+       ret = __bkey_cmp_bits(high_word(f, l),
+                             high_word(f, r),
+                             b->nr_key_bits);
+
+       EBUG_ON(ret != bpos_cmp(bkey_unpack_pos(b, l),
+                               bkey_unpack_pos(b, r)));
+       return ret;
+}
+
+static inline __pure __flatten
+int bch2_bkey_cmp_packed_inlined(const struct btree *b,
+                        const struct bkey_packed *l,
+                        const struct bkey_packed *r)
+{
+       struct bkey unpacked;
+
+       if (likely(bkey_packed(l) && bkey_packed(r)))
+               return __bch2_bkey_cmp_packed_format_checked_inlined(l, r, b);
+
+       if (bkey_packed(l)) {
+               __bkey_unpack_key_format_checked(b, &unpacked, l);
+               l = (void *) &unpacked;
+       } else if (bkey_packed(r)) {
+               __bkey_unpack_key_format_checked(b, &unpacked, r);
+               r = (void *) &unpacked;
+       }
+
+       return bpos_cmp(((struct bkey *) l)->p, ((struct bkey *) r)->p);
+}
+
+#endif /* _BCACHEFS_BKEY_CMP_H */
index b1385a77da1146f6efd643d389a73aa999745244..be0d4bc1afd3404856f732de3e210a4b1124cac3 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "bcachefs.h"
 #include "bkey_buf.h"
+#include "bkey_cmp.h"
 #include "bkey_sort.h"
 #include "bset.h"
 #include "extents.h"
@@ -155,7 +156,7 @@ static inline int sort_keys_cmp(struct btree *b,
                                struct bkey_packed *l,
                                struct bkey_packed *r)
 {
-       return bch2_bkey_cmp_packed(b, l, r) ?:
+       return bch2_bkey_cmp_packed_inlined(b, l, r) ?:
                (int) bkey_deleted(r) - (int) bkey_deleted(l) ?:
                (int) l->needs_whiteout - (int) r->needs_whiteout;
 }
@@ -177,7 +178,7 @@ unsigned bch2_sort_keys(struct bkey_packed *dst,
                        continue;
 
                while ((next = sort_iter_peek(iter)) &&
-                      !bch2_bkey_cmp_packed(iter->b, in, next)) {
+                      !bch2_bkey_cmp_packed_inlined(iter->b, in, next)) {
                        BUG_ON(in->needs_whiteout &&
                               next->needs_whiteout);
                        needs_whiteout |= in->needs_whiteout;