From 8cbb0002509a605972781c0e747ae68112f94f54 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Sat, 1 Oct 2022 00:34:02 -0400
Subject: [PATCH] bcachefs: Write new btree nodes after parent update

In order to avoid locking all btree nodes up to the root for btree node
splits, we're going to have to introduce a new error path into
bch2_btree_insert_node(); this mean we can't have done any writes or
modified global state before that point.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/btree_io.c              | 15 ++++------
 fs/bcachefs/btree_update_interior.c | 46 +++++++++++++++--------------
 2 files changed, 30 insertions(+), 31 deletions(-)

diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 603b825ed6fee..a322a83676881 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -1908,6 +1908,8 @@ do_write:
 	u64s = bch2_sort_keys(i->start, &sort_iter, false);
 	le16_add_cpu(&i->u64s, u64s);
 
+	BUG_ON(!b->written && i->u64s != b->data->keys.u64s);
+
 	set_needs_whiteout(i, false);
 
 	/* do we have data to write? */
@@ -1917,6 +1919,10 @@ do_write:
 	bytes_to_write = vstruct_end(i) - data;
 	sectors_to_write = round_up(bytes_to_write, block_bytes(c)) >> 9;
 
+	if (!b->written &&
+	    b->key.k.type == KEY_TYPE_btree_ptr_v2)
+		BUG_ON(btree_ptr_sectors_written(&b->key) != sectors_to_write);
+
 	memset(data + bytes_to_write, 0,
 	       (sectors_to_write << 9) - bytes_to_write);
 
@@ -2005,11 +2011,6 @@ do_write:
 
 	b->written += sectors_to_write;
 
-	if (wbio->wbio.first_btree_write &&
-	    b->key.k.type == KEY_TYPE_btree_ptr_v2)
-		bkey_i_to_btree_ptr_v2(&b->key)->v.sectors_written =
-			cpu_to_le16(b->written);
-
 	if (wbio->key.k.type == KEY_TYPE_btree_ptr_v2)
 		bkey_i_to_btree_ptr_v2(&wbio->key)->v.sectors_written =
 			cpu_to_le16(b->written);
@@ -2022,10 +2023,6 @@ do_write:
 	return;
 err:
 	set_btree_node_noevict(b);
-	if (!b->written &&
-	    b->key.k.type == KEY_TYPE_btree_ptr_v2)
-		bkey_i_to_btree_ptr_v2(&b->key)->v.sectors_written =
-			cpu_to_le16(sectors_to_write);
 	b->written += sectors_to_write;
 nowrite:
 	btree_bounce_free(c, bytes, used_mempool, data);
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 247555dffa3f9..ac1e6d7286aa7 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -392,8 +392,6 @@ static struct btree *__btree_root_alloc(struct btree_update *as,
 
 	btree_node_set_format(b, b->data->format);
 	bch2_btree_build_aux_trees(b);
-
-	bch2_btree_update_add_new_node(as, b);
 	six_unlock_write(&b->c.lock);
 
 	return b;
@@ -860,6 +858,14 @@ static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree
 	mutex_unlock(&c->btree_interior_update_lock);
 
 	btree_update_add_key(as, &as->new_keys, b);
+
+	if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
+		unsigned bytes = vstruct_end(&b->data->keys) - (void *) b->data;
+		unsigned sectors = round_up(bytes, block_bytes(c)) >> 9;
+
+		bkey_i_to_btree_ptr_v2(&b->key)->v.sectors_written =
+			cpu_to_le16(sectors);
+	}
 }
 
 /*
@@ -1192,7 +1198,6 @@ static void bch2_btree_set_root(struct btree_update *as,
 	struct btree *old;
 
 	trace_and_count(c, btree_node_set_root, c, b);
-	BUG_ON(!b->written);
 
 	old = btree_node_root(c, b);
 
@@ -1316,8 +1321,6 @@ static struct btree *__btree_split_node(struct btree_update *as,
 	SET_BTREE_NODE_SEQ(n2->data, BTREE_NODE_SEQ(n1->data));
 	n2->key.k.p = n1->key.k.p;
 
-	bch2_btree_update_add_new_node(as, n2);
-
 	set1 = btree_bset_first(n1);
 	set2 = btree_bset_first(n2);
 
@@ -1500,9 +1503,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
 		bch2_btree_path_level_init(trans, path2, n2);
 
 		bch2_btree_update_add_new_node(as, n1);
-
-		bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
-		bch2_btree_node_write(c, n2, SIX_LOCK_intent, 0);
+		bch2_btree_update_add_new_node(as, n2);
 
 		/*
 		 * Note that on recursive parent_keys == keys, so we
@@ -1525,9 +1526,9 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
 			n3->sib_u64s[0] = U16_MAX;
 			n3->sib_u64s[1] = U16_MAX;
 
-			btree_split_insert_keys(as, trans, path, n3, &as->parent_keys);
+			bch2_btree_update_add_new_node(as, n3);
 
-			bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0);
+			btree_split_insert_keys(as, trans, path, n3, &as->parent_keys);
 		}
 	} else {
 		trace_and_count(c, btree_node_compact, c, b);
@@ -1542,8 +1543,6 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
 
 		bch2_btree_update_add_new_node(as, n1);
 
-		bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
-
 		if (parent)
 			bch2_keylist_add(&as->parent_keys, &n1->key);
 	}
@@ -1560,11 +1559,16 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
 		bch2_btree_set_root(as, trans, path, n1);
 	}
 
-	bch2_btree_update_get_open_buckets(as, n1);
-	if (n2)
-		bch2_btree_update_get_open_buckets(as, n2);
-	if (n3)
+	if (n3) {
 		bch2_btree_update_get_open_buckets(as, n3);
+		bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0);
+	}
+	if (n2) {
+		bch2_btree_update_get_open_buckets(as, n2);
+		bch2_btree_node_write(c, n2, SIX_LOCK_intent, 0);
+	}
+	bch2_btree_update_get_open_buckets(as, n1);
+	bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
 
 	/*
 	 * The old node must be freed (in memory) _before_ unlocking the new
@@ -1824,8 +1828,6 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
 	btree_set_min(n, prev->data->min_key);
 	btree_set_max(n, next->data->max_key);
 
-	bch2_btree_update_add_new_node(as, n);
-
 	n->data->format	 = new_f;
 	btree_node_set_format(n, new_f);
 
@@ -1835,13 +1837,13 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
 	bch2_btree_build_aux_trees(n);
 	six_unlock_write(&n->c.lock);
 
+	bch2_btree_update_add_new_node(as, n);
+
 	new_path = get_unlocked_mut_path(trans, path->btree_id, n->c.level, n->key.k.p);
 	six_lock_increment(&n->c.lock, SIX_LOCK_intent);
 	mark_btree_node_locked(trans, new_path, n->c.level, SIX_LOCK_intent);
 	bch2_btree_path_level_init(trans, new_path, n);
 
-	bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
-
 	bkey_init(&delete.k);
 	delete.k.p = prev->key.k.p;
 	bch2_keylist_add(&as->parent_keys, &delete);
@@ -1854,6 +1856,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
 	bch2_trans_verify_paths(trans);
 
 	bch2_btree_update_get_open_buckets(as, n);
+	bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
 
 	bch2_btree_node_free_inmem(trans, path, b);
 	bch2_btree_node_free_inmem(trans, sib_path, m);
@@ -1914,8 +1917,6 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
 
 	trace_and_count(c, btree_node_rewrite, c, b);
 
-	bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
-
 	if (parent) {
 		bch2_keylist_add(&as->parent_keys, &n->key);
 		bch2_btree_insert_node(as, trans, iter->path, parent,
@@ -1925,6 +1926,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
 	}
 
 	bch2_btree_update_get_open_buckets(as, n);
+	bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
 
 	bch2_btree_node_free_inmem(trans, iter->path, b);
 
-- 
2.30.2