bcachefs: bch2_propagate_key_to_snapshot_leaves()
authorKent Overstreet <kent.overstreet@linux.dev>
Sat, 19 Aug 2023 01:14:33 +0000 (21:14 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:10:11 +0000 (17:10 -0400)
If fsck finds a key that needs work done, the primary example being an
unlinked inode that needs to be deleted, and the key is in an internal
snapshot node, we have a bit of a conundrum.

The conundrum is that internal snapshot nodes are shared, and we in
general do updates in internal snapshot nodes because there may be
overwrites in some snapshots and not others, and this may affect other
keys referenced by this key (i.e. extents).

For example, we might be seeing an unlinked inode in an internal
snapshot node, but then in one child snapshot the inode might have been
reattached and might not be unlinked. Deleting the inode in the internal
snapshot node would be wrong, because then we'll delete all the extents
that the child snapshot references.

But if an unlinked inode does not have any overwrites in child
snapshots, we're fine: the inode is overwrritten in all child snapshots,
so we can do the deletion at the point of comonality in the snapshot
tree, i.e. the node where we found it.

This patch adds a new helper, bch2_propagate_key_to_snapshot_leaves(),
to handle the case where we need a to update a key that does have
overwrites in child snapshots: we copy the key to leaf snapshot nodes,
and then rewind fsck and process the needed updates there.

With this, fsck can now always correctly handle unlinked inodes found in
internal snapshot nodes.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/fsck.c
fs/bcachefs/snapshot.c
fs/bcachefs/snapshot.h

index 9524bd621b2cf25df6c98ba9dc8e1d332c271df2..238caeeaf06ce411a13455b74218f83ebf1126a1 100644 (file)
@@ -853,14 +853,6 @@ static int check_inode(struct btree_trans *trans,
        if (ret)
                goto err;
 
-       /*
-        * if snapshot id isn't a leaf node, skip it - deletion in
-        * particular is not atomic, so on the internal snapshot nodes
-        * we can see inodes marked for deletion after a clean shutdown
-        */
-       if (bch2_snapshot_is_internal_node(c, k.k->p.snapshot))
-               return 0;
-
        if (!bkey_is_inode(k.k))
                return 0;
 
@@ -882,6 +874,27 @@ static int check_inode(struct btree_trans *trans,
                return -EINVAL;
        }
 
+       if ((u.bi_flags & (BCH_INODE_I_SIZE_DIRTY|BCH_INODE_UNLINKED)) &&
+           bch2_key_has_snapshot_overwrites(trans, BTREE_ID_inodes, k.k->p)) {
+               struct bpos new_min_pos;
+
+               ret = bch2_propagate_key_to_snapshot_leaves(trans, iter->btree_id, k, &new_min_pos);
+               if (ret)
+                       goto err;
+
+               u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY|BCH_INODE_UNLINKED;
+
+               ret = __write_inode(trans, &u, iter->pos.snapshot);
+               if (ret) {
+                       bch_err_msg(c, ret, "in fsck: error updating inode");
+                       return ret;
+               }
+
+               if (!bpos_eq(new_min_pos, POS_MIN))
+                       bch2_btree_iter_set_pos(iter, bpos_predecessor(new_min_pos));
+               return 0;
+       }
+
        if (u.bi_flags & BCH_INODE_UNLINKED &&
            (!c->sb.clean ||
             fsck_err(c, "filesystem marked clean, but inode %llu unlinked",
@@ -960,9 +973,10 @@ static int check_inode(struct btree_trans *trans,
 
        if (do_update) {
                ret = __write_inode(trans, &u, iter->pos.snapshot);
-               if (ret)
-                       bch_err(c, "error in fsck: error updating inode: %s",
-                               bch2_err_str(ret));
+               if (ret) {
+                       bch_err_msg(c, ret, "in fsck: error updating inode");
+                       return ret;
+               }
        }
 err:
 fsck_err:
index 25c888051ca4350b42c8de10e7e434e6e7450d71..07e5c1b44b067bcac3d523950fa2ef29b5550fe3 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
+#include "bkey_buf.h"
 #include "btree_key_cache.h"
 #include "btree_update.h"
 #include "buckets.h"
@@ -1536,7 +1537,7 @@ void bch2_delete_dead_snapshots_async(struct bch_fs *c)
 }
 
 int bch2_delete_dead_snapshots_hook(struct btree_trans *trans,
-                                          struct btree_trans_commit_hook *h)
+                                   struct btree_trans_commit_hook *h)
 {
        struct bch_fs *c = trans->c;
 
@@ -1583,6 +1584,94 @@ int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
        return ret;
 }
 
+static u32 bch2_snapshot_smallest_child(struct bch_fs *c, u32 id)
+{
+       const struct snapshot_t *s = snapshot_t(c, id);
+
+       return s->children[1] ?: s->children[0];
+}
+
+static u32 bch2_snapshot_smallest_descendent(struct bch_fs *c, u32 id)
+{
+       u32 child;
+
+       while ((child = bch2_snapshot_smallest_child(c, id)))
+               id = child;
+       return id;
+}
+
+static int bch2_propagate_key_to_snapshot_leaf(struct btree_trans *trans,
+                                              enum btree_id btree,
+                                              struct bkey_s_c interior_k,
+                                              u32 leaf_id, struct bpos *new_min_pos)
+{
+       struct btree_iter iter;
+       struct bpos pos = interior_k.k->p;
+       struct bkey_s_c k;
+       struct bkey_i *new;
+       int ret;
+
+       pos.snapshot = leaf_id;
+
+       bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_INTENT);
+       k = bch2_btree_iter_peek_slot(&iter);
+       ret = bkey_err(k);
+       if (ret)
+               goto out;
+
+       /* key already overwritten in this snapshot? */
+       if (k.k->p.snapshot != interior_k.k->p.snapshot)
+               goto out;
+
+       if (bpos_eq(*new_min_pos, POS_MIN)) {
+               *new_min_pos = k.k->p;
+               new_min_pos->snapshot = leaf_id;
+       }
+
+       new = bch2_bkey_make_mut_noupdate(trans, interior_k);
+       ret = PTR_ERR_OR_ZERO(new);
+       if (ret)
+               goto out;
+
+       new->k.p.snapshot = leaf_id;
+       ret = bch2_trans_update(trans, &iter, new, 0);
+out:
+       bch2_trans_iter_exit(trans, &iter);
+       return ret;
+}
+
+int bch2_propagate_key_to_snapshot_leaves(struct btree_trans *trans,
+                                         enum btree_id btree,
+                                         struct bkey_s_c k,
+                                         struct bpos *new_min_pos)
+{
+       struct bch_fs *c = trans->c;
+       struct bkey_buf sk;
+       int ret;
+
+       bch2_bkey_buf_init(&sk);
+       bch2_bkey_buf_reassemble(&sk, c, k);
+       k = bkey_i_to_s_c(sk.k);
+
+       *new_min_pos = POS_MIN;
+
+       for (u32 id = bch2_snapshot_smallest_descendent(c, k.k->p.snapshot);
+            id < k.k->p.snapshot;
+            id++) {
+               if (!bch2_snapshot_is_ancestor(c, id, k.k->p.snapshot) ||
+                   !bch2_snapshot_is_leaf(c, id))
+                       continue;
+
+               ret = commit_do(trans, NULL, NULL, 0,
+                               bch2_propagate_key_to_snapshot_leaf(trans, btree, k, id, new_min_pos));
+               if (ret)
+                       break;
+       }
+
+       bch2_bkey_buf_exit(&sk, c);
+       return ret;
+}
+
 int bch2_snapshots_read(struct bch_fs *c)
 {
        struct btree_iter iter;
index 624a42d1c8b7a1c297b36ecafcfa7d9cbcf435d1..dabc9b9d921b4766be3f84d1b9bc4c494a518f8f 100644 (file)
@@ -263,6 +263,9 @@ static inline int bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
        return __bch2_key_has_snapshot_overwrites(trans, id, pos);
 }
 
+int bch2_propagate_key_to_snapshot_leaves(struct btree_trans *, enum btree_id,
+                                         struct bkey_s_c, struct bpos *);
+
 int bch2_snapshots_read(struct bch_fs *);
 void bch2_fs_snapshots_exit(struct bch_fs *);