bcachefs: backpointers fsck no longer uses BTREE_ITER_ALL_LEVELS
authorKent Overstreet <kent.overstreet@linux.dev>
Mon, 13 Nov 2023 01:20:35 +0000 (20:20 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Mon, 1 Jan 2024 16:47:37 +0000 (11:47 -0500)
It appears that BTREE_ITER_ALL_LEVELS is racy with concurrent interior
node btree updates; unfortunate but not terribly surprising it's a
difficult problem - that was the original reason for gc_lock.

BTREE_ITER_ALL_LEVELS will probably be deleted in a subsequent patch,
this changes backpointers fsck to instead walk keys at one level of the
btree at a time.

This fixes the tiering_drop_alloc test, which stopped working with the
patch to not flush the journal after journal replay.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/backpointers.c

index 5025a71ad6851709957f1d27bf3e46a3b66d962b..69bd6c281d1a9811a463a8b50470434e6b7786b4 100644 (file)
@@ -426,11 +426,13 @@ static int check_bp_exists(struct btree_trans *trans,
            memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) {
                if (last_flushed->level != bp.level ||
                    !bpos_eq(last_flushed->pos, orig_k.k->p)) {
+                       ret = bch2_btree_write_buffer_flush_sync(trans);
+                       if (ret)
+                               goto err;
+
                        last_flushed->level = bp.level;
                        last_flushed->pos = orig_k.k->p;
-
-                       ret = bch2_btree_write_buffer_flush_sync(trans) ?:
-                               -BCH_ERR_transaction_restart_write_buffer_flush;
+                       ret = -BCH_ERR_transaction_restart_write_buffer_flush;
                        goto out;
                }
                goto missing;
@@ -457,25 +459,18 @@ missing:
 }
 
 static int check_extent_to_backpointers(struct btree_trans *trans,
-                                       struct btree_iter *iter,
+                                       enum btree_id btree, unsigned level,
                                        struct bpos bucket_start,
                                        struct bpos bucket_end,
-                                       struct bpos_level *last_flushed)
+                                       struct bpos_level *last_flushed,
+                                       struct bkey_s_c k)
 {
        struct bch_fs *c = trans->c;
        struct bkey_ptrs_c ptrs;
        const union bch_extent_entry *entry;
        struct extent_ptr_decoded p;
-       struct bkey_s_c k;
        int ret;
 
-       k = bch2_btree_iter_peek_all_levels(iter);
-       ret = bkey_err(k);
-       if (ret)
-               return ret;
-       if (!k.k)
-               return 0;
-
        ptrs = bch2_bkey_ptrs_c(k);
        bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
                struct bpos bucket_pos;
@@ -484,7 +479,7 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
                if (p.ptr.cached)
                        continue;
 
-               bch2_extent_ptr_to_bp(c, iter->btree_id, iter->path->level,
+               bch2_extent_ptr_to_bp(c, btree, level,
                                      k, p, &bucket_pos, &bp);
 
                ret = check_bp_exists(trans, bucket_pos, bp, k,
@@ -501,44 +496,33 @@ static int check_btree_root_to_backpointers(struct btree_trans *trans,
                                            enum btree_id btree_id,
                                            struct bpos bucket_start,
                                            struct bpos bucket_end,
-                                           struct bpos_level *last_flushed)
+                                           struct bpos_level *last_flushed,
+                                           int *level)
 {
        struct bch_fs *c = trans->c;
-       struct btree_root *r = bch2_btree_id_root(c, btree_id);
        struct btree_iter iter;
        struct btree *b;
        struct bkey_s_c k;
-       struct bkey_ptrs_c ptrs;
-       struct extent_ptr_decoded p;
-       const union bch_extent_entry *entry;
        int ret;
-
-       bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, r->level, 0);
+retry:
+       bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN,
+                                 0, bch2_btree_id_root(c, btree_id)->b->c.level, 0);
        b = bch2_btree_iter_peek_node(&iter);
        ret = PTR_ERR_OR_ZERO(b);
        if (ret)
                goto err;
 
-       BUG_ON(b != btree_node_root(c, b));
-
-       k = bkey_i_to_s_c(&b->key);
-       ptrs = bch2_bkey_ptrs_c(k);
-       bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-               struct bpos bucket_pos;
-               struct bch_backpointer bp;
-
-               if (p.ptr.cached)
-                       continue;
+       if (b != btree_node_root(c, b)) {
+               bch2_trans_iter_exit(trans, &iter);
+               goto retry;
+       }
 
-               bch2_extent_ptr_to_bp(c, iter.btree_id, b->c.level + 1,
-                                     k, p, &bucket_pos, &bp);
+       *level = b->c.level;
 
-               ret = check_bp_exists(trans, bucket_pos, bp, k,
+       k = bkey_i_to_s_c(&b->key);
+       ret = check_extent_to_backpointers(trans, btree_id, b->c.level + 1,
                                      bucket_start, bucket_end,
-                                     last_flushed);
-               if (ret)
-                       goto err;
-       }
+                                     last_flushed, k);
 err:
        bch2_trans_iter_exit(trans, &iter);
        return ret;
@@ -616,43 +600,58 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
        struct bch_fs *c = trans->c;
        struct btree_iter iter;
        enum btree_id btree_id;
-       struct bpos_level last_flushed = { UINT_MAX, POS_MIN };
+       struct bkey_s_c k;
        int ret = 0;
 
        for (btree_id = 0; btree_id < btree_id_nr_alive(c); btree_id++) {
-               unsigned depth = btree_type_has_ptrs(btree_id) ? 0 : 1;
-
-               bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0,
-                                         depth,
-                                         BTREE_ITER_ALL_LEVELS|
-                                         BTREE_ITER_PREFETCH);
-
-               do {
-                       ret = commit_do(trans, NULL, NULL,
-                                       BCH_TRANS_COMMIT_lazy_rw|
-                                       BCH_TRANS_COMMIT_no_enospc,
-                                       check_extent_to_backpointers(trans, &iter,
-                                                               bucket_start, bucket_end,
-                                                               &last_flushed));
-                       if (ret)
-                               break;
-               } while (!bch2_btree_iter_advance(&iter));
-
-               bch2_trans_iter_exit(trans, &iter);
-
-               if (ret)
-                       break;
+               struct bpos_level last_flushed = { UINT_MAX, POS_MIN };
+               int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1;
 
                ret = commit_do(trans, NULL, NULL,
                                BCH_TRANS_COMMIT_lazy_rw|
                                BCH_TRANS_COMMIT_no_enospc,
                                check_btree_root_to_backpointers(trans, btree_id,
                                                        bucket_start, bucket_end,
-                                                       &last_flushed));
+                                                       &last_flushed, &level));
                if (ret)
-                       break;
+                       return ret;
+
+               while (level >= depth) {
+                       bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0,
+                                                 level,
+                                                 BTREE_ITER_PREFETCH);
+                       while (1) {
+                               bch2_trans_begin(trans);
+                               k = bch2_btree_iter_peek(&iter);
+                               if (!k.k)
+                                       break;
+                               ret = bkey_err(k) ?:
+                                       check_extent_to_backpointers(trans, btree_id, level,
+                                                                    bucket_start, bucket_end,
+                                                                    &last_flushed, k) ?:
+                                       bch2_trans_commit(trans, NULL, NULL,
+                                                         BCH_TRANS_COMMIT_lazy_rw|
+                                                         BCH_TRANS_COMMIT_no_enospc);
+                               if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
+                                       ret = 0;
+                                       continue;
+                               }
+                               if (ret)
+                                       break;
+                               if (bpos_eq(iter.pos, SPOS_MAX))
+                                       break;
+                               bch2_btree_iter_advance(&iter);
+                       }
+                       bch2_trans_iter_exit(trans, &iter);
+
+                       if (ret)
+                               return ret;
+
+                       --level;
+               }
        }
-       return ret;
+
+       return 0;
 }
 
 static struct bpos bucket_pos_to_bp_safe(const struct bch_fs *c,