From ceda1b9a179ffd8ece3f7d15d5b1379eb2552215 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@gmail.com>
Date: Sun, 25 Apr 2021 16:24:03 -0400
Subject: [PATCH] bcachefs: Evict btree nodes we're deleting

There was a bug that led to duplicate btree node pointers being inserted
at the wrong level. The new topology repair code can fix that, except
that the btree cache code gets confused when we read in a btree node
from the pointer that was at the wrong level. This patch evicts nodes
that we're deleting to, which nicely solves the problem.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/btree_cache.c | 30 ++++++++++++++++++++++++++++++
 fs/bcachefs/btree_cache.h |  2 ++
 fs/bcachefs/btree_gc.c    |  2 ++
 3 files changed, 34 insertions(+)

diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index a42e0922f5e9f..85c19e4e52168 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -948,6 +948,36 @@ void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter,
 	bch2_btree_node_fill(c, iter, k, btree_id, level, SIX_LOCK_read, false);
 }
 
+void bch2_btree_node_evict(struct bch_fs *c, const struct bkey_i *k)
+{
+	struct btree_cache *bc = &c->btree_cache;
+	struct btree *b;
+
+	b = btree_cache_find(bc, k);
+	if (!b)
+		return;
+
+	six_lock_intent(&b->c.lock, NULL, NULL);
+	six_lock_write(&b->c.lock, NULL, NULL);
+
+	wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
+		       TASK_UNINTERRUPTIBLE);
+	__bch2_btree_node_write(c, b);
+
+	/* wait for any in flight btree write */
+	btree_node_wait_on_io(b);
+
+	BUG_ON(btree_node_dirty(b));
+
+	mutex_lock(&bc->lock);
+	btree_node_data_free(c, b);
+	bch2_btree_node_hash_remove(bc, b);
+	mutex_unlock(&bc->lock);
+
+	six_unlock_write(&b->c.lock);
+	six_unlock_intent(&b->c.lock);
+}
+
 void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
 			     struct btree *b)
 {
diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h
index c517cc0294540..40dd263a7caa7 100644
--- a/fs/bcachefs/btree_cache.h
+++ b/fs/bcachefs/btree_cache.h
@@ -30,6 +30,8 @@ struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *,
 void bch2_btree_node_prefetch(struct bch_fs *, struct btree_iter *,
 			      const struct bkey_i *, enum btree_id, unsigned);
 
+void bch2_btree_node_evict(struct bch_fs *, const struct bkey_i *);
+
 void bch2_fs_btree_cache_exit(struct bch_fs *);
 int bch2_fs_btree_cache_init(struct bch_fs *);
 void bch2_fs_btree_cache_init_early(struct btree_cache *);
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 09a49dc631440..1c2eab41f7ca4 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -298,6 +298,7 @@ again:
 				bch2_btree_ids[b->c.btree_id],
 				b->c.level - 1,
 				(bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(tmp.k)), buf))) {
+			bch2_btree_node_evict(c, tmp.k);
 			ret = bch2_journal_key_delete(c, b->c.btree_id,
 						      b->c.level, tmp.k->k.p);
 			if (ret)
@@ -359,6 +360,7 @@ again:
 		cur = NULL;
 
 		if (ret == DROP_THIS_NODE) {
+			bch2_btree_node_evict(c, tmp.k);
 			ret = bch2_journal_key_delete(c, b->c.btree_id,
 						      b->c.level, tmp.k->k.p);
 			dropped_children = true;
-- 
2.30.2