bcachefs: Write new btree nodes after parent update
authorKent Overstreet <kent.overstreet@linux.dev>
Sat, 1 Oct 2022 04:34:02 +0000 (00:34 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:43 +0000 (17:09 -0400)
In order to avoid locking all btree nodes up to the root for btree node
splits, we're going to have to introduce a new error path into
bch2_btree_insert_node(); this mean we can't have done any writes or
modified global state before that point.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_io.c
fs/bcachefs/btree_update_interior.c

index 603b825ed6fee282637fafd34e8f46969da7098e..a322a83676881bdd3b2d08e361dd316482757972 100644 (file)
@@ -1908,6 +1908,8 @@ do_write:
        u64s = bch2_sort_keys(i->start, &sort_iter, false);
        le16_add_cpu(&i->u64s, u64s);
 
+       BUG_ON(!b->written && i->u64s != b->data->keys.u64s);
+
        set_needs_whiteout(i, false);
 
        /* do we have data to write? */
@@ -1917,6 +1919,10 @@ do_write:
        bytes_to_write = vstruct_end(i) - data;
        sectors_to_write = round_up(bytes_to_write, block_bytes(c)) >> 9;
 
+       if (!b->written &&
+           b->key.k.type == KEY_TYPE_btree_ptr_v2)
+               BUG_ON(btree_ptr_sectors_written(&b->key) != sectors_to_write);
+
        memset(data + bytes_to_write, 0,
               (sectors_to_write << 9) - bytes_to_write);
 
@@ -2005,11 +2011,6 @@ do_write:
 
        b->written += sectors_to_write;
 
-       if (wbio->wbio.first_btree_write &&
-           b->key.k.type == KEY_TYPE_btree_ptr_v2)
-               bkey_i_to_btree_ptr_v2(&b->key)->v.sectors_written =
-                       cpu_to_le16(b->written);
-
        if (wbio->key.k.type == KEY_TYPE_btree_ptr_v2)
                bkey_i_to_btree_ptr_v2(&wbio->key)->v.sectors_written =
                        cpu_to_le16(b->written);
@@ -2022,10 +2023,6 @@ do_write:
        return;
 err:
        set_btree_node_noevict(b);
-       if (!b->written &&
-           b->key.k.type == KEY_TYPE_btree_ptr_v2)
-               bkey_i_to_btree_ptr_v2(&b->key)->v.sectors_written =
-                       cpu_to_le16(sectors_to_write);
        b->written += sectors_to_write;
 nowrite:
        btree_bounce_free(c, bytes, used_mempool, data);
index 247555dffa3f909d227914a9e4860310d3eb82f1..ac1e6d7286aa72d82527f3ad8b16759e98d12cdf 100644 (file)
@@ -392,8 +392,6 @@ static struct btree *__btree_root_alloc(struct btree_update *as,
 
        btree_node_set_format(b, b->data->format);
        bch2_btree_build_aux_trees(b);
-
-       bch2_btree_update_add_new_node(as, b);
        six_unlock_write(&b->c.lock);
 
        return b;
@@ -860,6 +858,14 @@ static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree
        mutex_unlock(&c->btree_interior_update_lock);
 
        btree_update_add_key(as, &as->new_keys, b);
+
+       if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
+               unsigned bytes = vstruct_end(&b->data->keys) - (void *) b->data;
+               unsigned sectors = round_up(bytes, block_bytes(c)) >> 9;
+
+               bkey_i_to_btree_ptr_v2(&b->key)->v.sectors_written =
+                       cpu_to_le16(sectors);
+       }
 }
 
 /*
@@ -1192,7 +1198,6 @@ static void bch2_btree_set_root(struct btree_update *as,
        struct btree *old;
 
        trace_and_count(c, btree_node_set_root, c, b);
-       BUG_ON(!b->written);
 
        old = btree_node_root(c, b);
 
@@ -1316,8 +1321,6 @@ static struct btree *__btree_split_node(struct btree_update *as,
        SET_BTREE_NODE_SEQ(n2->data, BTREE_NODE_SEQ(n1->data));
        n2->key.k.p = n1->key.k.p;
 
-       bch2_btree_update_add_new_node(as, n2);
-
        set1 = btree_bset_first(n1);
        set2 = btree_bset_first(n2);
 
@@ -1500,9 +1503,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
                bch2_btree_path_level_init(trans, path2, n2);
 
                bch2_btree_update_add_new_node(as, n1);
-
-               bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
-               bch2_btree_node_write(c, n2, SIX_LOCK_intent, 0);
+               bch2_btree_update_add_new_node(as, n2);
 
                /*
                 * Note that on recursive parent_keys == keys, so we
@@ -1525,9 +1526,9 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
                        n3->sib_u64s[0] = U16_MAX;
                        n3->sib_u64s[1] = U16_MAX;
 
-                       btree_split_insert_keys(as, trans, path, n3, &as->parent_keys);
+                       bch2_btree_update_add_new_node(as, n3);
 
-                       bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0);
+                       btree_split_insert_keys(as, trans, path, n3, &as->parent_keys);
                }
        } else {
                trace_and_count(c, btree_node_compact, c, b);
@@ -1542,8 +1543,6 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
 
                bch2_btree_update_add_new_node(as, n1);
 
-               bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
-
                if (parent)
                        bch2_keylist_add(&as->parent_keys, &n1->key);
        }
@@ -1560,11 +1559,16 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
                bch2_btree_set_root(as, trans, path, n1);
        }
 
-       bch2_btree_update_get_open_buckets(as, n1);
-       if (n2)
-               bch2_btree_update_get_open_buckets(as, n2);
-       if (n3)
+       if (n3) {
                bch2_btree_update_get_open_buckets(as, n3);
+               bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0);
+       }
+       if (n2) {
+               bch2_btree_update_get_open_buckets(as, n2);
+               bch2_btree_node_write(c, n2, SIX_LOCK_intent, 0);
+       }
+       bch2_btree_update_get_open_buckets(as, n1);
+       bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
 
        /*
         * The old node must be freed (in memory) _before_ unlocking the new
@@ -1824,8 +1828,6 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
        btree_set_min(n, prev->data->min_key);
        btree_set_max(n, next->data->max_key);
 
-       bch2_btree_update_add_new_node(as, n);
-
        n->data->format  = new_f;
        btree_node_set_format(n, new_f);
 
@@ -1835,13 +1837,13 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
        bch2_btree_build_aux_trees(n);
        six_unlock_write(&n->c.lock);
 
+       bch2_btree_update_add_new_node(as, n);
+
        new_path = get_unlocked_mut_path(trans, path->btree_id, n->c.level, n->key.k.p);
        six_lock_increment(&n->c.lock, SIX_LOCK_intent);
        mark_btree_node_locked(trans, new_path, n->c.level, SIX_LOCK_intent);
        bch2_btree_path_level_init(trans, new_path, n);
 
-       bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
-
        bkey_init(&delete.k);
        delete.k.p = prev->key.k.p;
        bch2_keylist_add(&as->parent_keys, &delete);
@@ -1854,6 +1856,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
        bch2_trans_verify_paths(trans);
 
        bch2_btree_update_get_open_buckets(as, n);
+       bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
 
        bch2_btree_node_free_inmem(trans, path, b);
        bch2_btree_node_free_inmem(trans, sib_path, m);
@@ -1914,8 +1917,6 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
 
        trace_and_count(c, btree_node_rewrite, c, b);
 
-       bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
-
        if (parent) {
                bch2_keylist_add(&as->parent_keys, &n->key);
                bch2_btree_insert_node(as, trans, iter->path, parent,
@@ -1925,6 +1926,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
        }
 
        bch2_btree_update_get_open_buckets(as, n);
+       bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
 
        bch2_btree_node_free_inmem(trans, iter->path, b);