bcachefs: Fix journal pins in btree write buffer
authorKent Overstreet <kent.overstreet@linux.dev>
Fri, 22 Mar 2024 08:01:27 +0000 (04:01 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Mon, 1 Apr 2024 00:36:10 +0000 (20:36 -0400)
btree write buffer flush has two phases
 - in natural key order, which is more efficient but may fail
 - then in journal order

The journal order flush was assuming that keys were still correctly
ordered by journal sequence number - but due to coalescing by the
previous phase, we need an additional sort.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_write_buffer.c

index 5cbad8445782c4006074365c13551b1c38b57849..baf63e2fddb64cd8f4c745d0cc80c864c86ffaa6 100644 (file)
@@ -11,6 +11,7 @@
 #include "journal_reclaim.h"
 
 #include <linux/prefetch.h>
+#include <linux/sort.h>
 
 static int bch2_btree_write_buffer_journal_flush(struct journal *,
                                struct journal_entry_pin *, u64);
@@ -46,6 +47,14 @@ static inline bool wb_key_ref_cmp(const struct wb_key_ref *l, const struct wb_ke
 #endif
 }
 
+static int wb_key_seq_cmp(const void *_l, const void *_r)
+{
+       const struct btree_write_buffered_key *l = _l;
+       const struct btree_write_buffered_key *r = _r;
+
+       return cmp_int(l->journal_seq, r->journal_seq);
+}
+
 /* Compare excluding idx, the low 24 bits: */
 static inline bool wb_key_eq(const void *_l, const void *_r)
 {
@@ -357,6 +366,11 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
                 */
                trace_and_count(c, write_buffer_flush_slowpath, trans, slowpath, wb->flushing.keys.nr);
 
+               sort(wb->flushing.keys.data,
+                    wb->flushing.keys.nr,
+                    sizeof(wb->flushing.keys.data[0]),
+                    wb_key_seq_cmp, NULL);
+
                darray_for_each(wb->flushing.keys, i) {
                        if (!i->journal_seq)
                                continue;