bcachefs: Fix corruption with writeable snapshots
authorKent Overstreet <kent.overstreet@linux.dev>
Sun, 28 May 2023 03:19:13 +0000 (23:19 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:10:03 +0000 (17:10 -0400)
When partially overwriting an extent in an older snapshot, the existing
extent has to be split.

If the existing extent was overwritten in a different (sibling)
snapshot, we have to ensure that the split won't be visible in the
sibling snapshot.

data_update.c already has code for this,
bch2_insert_snapshot_writeouts() - we just need to move it into
btree_update_leaf.c and change bch2_trans_update_extent() to use it as
well.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_update.h
fs/bcachefs/btree_update_leaf.c
fs/bcachefs/data_update.c

index 1ac3a81e0af6fba206cc796449c34d81c445753a..e90cf292f80bf7155b80ddf30a4e825291ca6f6d 100644 (file)
@@ -4,6 +4,7 @@
 
 #include "btree_iter.h"
 #include "journal.h"
+#include "journal.h"
 
 struct bch_fs;
 struct btree;
@@ -83,6 +84,28 @@ int bch2_btree_node_update_key(struct btree_trans *, struct btree_iter *,
 int bch2_btree_node_update_key_get_iter(struct btree_trans *,
                                struct btree *, struct bkey_i *, bool);
 
+int __bch2_insert_snapshot_whiteouts(struct btree_trans *, enum btree_id,
+                                    struct bpos, struct bpos);
+
+/*
+ * For use when splitting extents in existing snapshots:
+ *
+ * If @old_pos is an interior snapshot node, iterate over descendent snapshot
+ * nodes: for every descendent snapshot in whiche @old_pos is overwritten and
+ * not visible, emit a whiteout at @new_pos.
+ */
+static inline int bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
+                                                enum btree_id btree,
+                                                struct bpos old_pos,
+                                                struct bpos new_pos)
+{
+       if (!btree_type_has_snapshots(btree) ||
+           bkey_eq(old_pos, new_pos))
+               return 0;
+
+       return __bch2_insert_snapshot_whiteouts(trans, btree, old_pos, new_pos);
+}
+
 int bch2_trans_update_extent(struct btree_trans *, struct btree_iter *,
                             struct bkey_i *, enum btree_update_flags);
 
index 33693467810b20a9fc4fcbaf54ee595a51b95eca..25d73db9adc69ab4fc1b1cb272036dc23d2e7e76 100644 (file)
@@ -1343,6 +1343,69 @@ static int need_whiteout_for_snapshot(struct btree_trans *trans,
 
        return ret;
 }
+
+int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
+                                  enum btree_id id,
+                                  struct bpos old_pos,
+                                  struct bpos new_pos)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter old_iter, new_iter;
+       struct bkey_s_c old_k, new_k;
+       snapshot_id_list s;
+       struct bkey_i *update;
+       int ret;
+
+       if (!bch2_snapshot_has_children(c, old_pos.snapshot))
+               return 0;
+
+       darray_init(&s);
+
+       bch2_trans_iter_init(trans, &old_iter, id, old_pos,
+                            BTREE_ITER_NOT_EXTENTS|
+                            BTREE_ITER_ALL_SNAPSHOTS);
+       while ((old_k = bch2_btree_iter_prev(&old_iter)).k &&
+              !(ret = bkey_err(old_k)) &&
+              bkey_eq(old_pos, old_k.k->p)) {
+               struct bpos whiteout_pos =
+                       SPOS(new_pos.inode, new_pos.offset, old_k.k->p.snapshot);;
+
+               if (!bch2_snapshot_is_ancestor(c, old_k.k->p.snapshot, old_pos.snapshot) ||
+                   snapshot_list_has_ancestor(c, &s, old_k.k->p.snapshot))
+                       continue;
+
+               new_k = bch2_bkey_get_iter(trans, &new_iter, id, whiteout_pos,
+                                          BTREE_ITER_NOT_EXTENTS|
+                                          BTREE_ITER_INTENT);
+               ret = bkey_err(new_k);
+               if (ret)
+                       break;
+
+               if (new_k.k->type == KEY_TYPE_deleted) {
+                       update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i));
+                       ret = PTR_ERR_OR_ZERO(update);
+                       if (ret)
+                               break;
+
+                       bkey_init(&update->k);
+                       update->k.p             = whiteout_pos;
+                       update->k.type          = KEY_TYPE_whiteout;
+
+                       ret = bch2_trans_update(trans, &new_iter, update,
+                                               BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+               }
+               bch2_trans_iter_exit(trans, &new_iter);
+
+               ret = snapshot_list_add(c, &s, old_k.k->p.snapshot);
+               if (ret)
+                       break;
+       }
+       bch2_trans_iter_exit(trans, &old_iter);
+       darray_exit(&s);
+
+       return ret;
+}
+
 int bch2_trans_update_extent(struct btree_trans *trans,
                             struct btree_iter *orig_iter,
                             struct bkey_i *insert,
@@ -1396,8 +1459,10 @@ int bch2_trans_update_extent(struct btree_trans *trans,
 
                        bch2_cut_back(start, update);
 
-                       ret = bch2_btree_insert_nonextent(trans, btree_id, update,
-                                                 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags);
+                       ret =   bch2_insert_snapshot_whiteouts(trans, btree_id,
+                                               k.k->p, update->k.p) ?:
+                               bch2_btree_insert_nonextent(trans, btree_id, update,
+                                               BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags);
                        if (ret)
                                goto err;
                }
@@ -1411,7 +1476,9 @@ int bch2_trans_update_extent(struct btree_trans *trans,
                        bch2_cut_front(start, update);
                        bch2_cut_back(insert->k.p, update);
 
-                       ret = bch2_btree_insert_nonextent(trans, btree_id, update,
+                       ret =   bch2_insert_snapshot_whiteouts(trans, btree_id,
+                                               k.k->p, update->k.p) ?:
+                               bch2_btree_insert_nonextent(trans, btree_id, update,
                                                  BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags);
                        if (ret)
                                goto err;
index ae7e60d6e583671ec938ccd7aba8868b2641f0dd..c89ee14f8b6b8b5ce7fcbd5f5e963df65abdcfaf 100644 (file)
 #include "subvolume.h"
 #include "trace.h"
 
-static int insert_snapshot_whiteouts(struct btree_trans *trans,
-                                    enum btree_id id,
-                                    struct bpos old_pos,
-                                    struct bpos new_pos)
-{
-       struct bch_fs *c = trans->c;
-       struct btree_iter iter, iter2;
-       struct bkey_s_c k, k2;
-       snapshot_id_list s;
-       struct bkey_i *update;
-       int ret;
-
-       if (!btree_type_has_snapshots(id))
-               return 0;
-
-       darray_init(&s);
-
-       if (!bch2_snapshot_has_children(c, old_pos.snapshot))
-               return 0;
-
-       bch2_trans_iter_init(trans, &iter, id, old_pos,
-                            BTREE_ITER_NOT_EXTENTS|
-                            BTREE_ITER_ALL_SNAPSHOTS);
-       while (1) {
-               k = bch2_btree_iter_prev(&iter);
-               ret = bkey_err(k);
-               if (ret)
-                       break;
-
-               if (!k.k)
-                       break;
-
-               if (!bkey_eq(old_pos, k.k->p))
-                       break;
-
-               if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, old_pos.snapshot) &&
-                   !snapshot_list_has_ancestor(c, &s, k.k->p.snapshot)) {
-                       struct bpos whiteout_pos = new_pos;
-
-                       whiteout_pos.snapshot = k.k->p.snapshot;
-
-                       k2 = bch2_bkey_get_iter(trans, &iter2, id, whiteout_pos,
-                                               BTREE_ITER_NOT_EXTENTS|
-                                               BTREE_ITER_INTENT);
-                       ret = bkey_err(k2);
-
-                       if (!ret && k2.k->type == KEY_TYPE_deleted) {
-                               update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i));
-                               ret = PTR_ERR_OR_ZERO(update);
-                               if (ret)
-                                       break;
-
-                               bkey_init(&update->k);
-                               update->k.p             = whiteout_pos;
-                               update->k.type          = KEY_TYPE_whiteout;
-
-                               ret = bch2_trans_update(trans, &iter2, update,
-                                                       BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
-                       }
-                       bch2_trans_iter_exit(trans, &iter2);
-
-                       if (ret)
-                               break;
-
-                       ret = snapshot_list_add(c, &s, k.k->p.snapshot);
-                       if (ret)
-                               break;
-               }
-       }
-       bch2_trans_iter_exit(trans, &iter);
-       darray_exit(&s);
-
-       return ret;
-}
-
 static void trace_move_extent_finish2(struct bch_fs *c, struct bkey_s_c k)
 {
        if (trace_move_extent_finish_enabled()) {
@@ -327,19 +252,12 @@ restart_drop_extra_replicas:
 
                next_pos = insert->k.p;
 
-               if (!bkey_eq(bkey_start_pos(&insert->k), bkey_start_pos(k.k))) {
-                       ret = insert_snapshot_whiteouts(trans, m->btree_id, k.k->p,
-                                                       bkey_start_pos(&insert->k));
-                       if (ret)
-                               goto err;
-               }
-
-               if (!bkey_eq(insert->k.p, k.k->p)) {
-                       ret = insert_snapshot_whiteouts(trans, m->btree_id,
-                                                       k.k->p, insert->k.p);
-                       if (ret)
-                               goto err;
-               }
+               ret =   bch2_insert_snapshot_whiteouts(trans, m->btree_id,
+                                               k.k->p, bkey_start_pos(&insert->k)) ?:
+                       bch2_insert_snapshot_whiteouts(trans, m->btree_id,
+                                               k.k->p, insert->k.p);
+               if (ret)
+                       goto err;
 
                ret   = bch2_trans_update(trans, &iter, insert,
                                BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: