bcachefs: BTREE_ID_snapshot_tree
authorKent Overstreet <kent.overstreet@linux.dev>
Wed, 29 Mar 2023 15:18:52 +0000 (11:18 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:10:01 +0000 (17:10 -0400)
This adds a new btree which gets us a persistent per-snapshot-tree
identifier.

 - BTREE_ID_snapshot_trees
 - KEY_TYPE_snapshot_tree
 - bch_snapshot now has a field that points to a snapshot_tree

This is going to be used to designate one snapshot ID/subvolume out of a
given tree of snapshots as the "main" subvolume, so that we can do quota
accounting in that subvolume and not the rest.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/bcachefs.h
fs/bcachefs/bcachefs_format.h
fs/bcachefs/bkey_methods.c
fs/bcachefs/errcode.h
fs/bcachefs/fsck.c
fs/bcachefs/recovery.c
fs/bcachefs/subvolume.c
fs/bcachefs/subvolume.h
fs/bcachefs/subvolume_types.h

index e5834729b52a39e9b3683ae3636db79975600b94..39fd154477533aac55f90f3c48edd56852f64aef 100644 (file)
@@ -445,6 +445,7 @@ enum gc_phase {
        GC_PHASE_BTREE_need_discard,
        GC_PHASE_BTREE_backpointers,
        GC_PHASE_BTREE_bucket_gens,
+       GC_PHASE_BTREE_snapshot_trees,
 
        GC_PHASE_PENDING_DELETE,
 };
index 013d5e185d97b3c955eda90e582c33b1f31148f2..e9ac3aa6d91caeec061cfda04ded404b111fb1b9 100644 (file)
@@ -364,7 +364,8 @@ static inline void bkey_init(struct bkey *k)
        x(alloc_v4,             27)                     \
        x(backpointer,          28)                     \
        x(inode_v3,             29)                     \
-       x(bucket_gens,          30)
+       x(bucket_gens,          30)                     \
+       x(snapshot_tree,        31)
 
 enum bch_bkey_type {
 #define x(name, nr) KEY_TYPE_##name    = nr,
@@ -1123,7 +1124,7 @@ struct bch_snapshot {
        __le32                  parent;
        __le32                  children[2];
        __le32                  subvol;
-       __le32                  pad;
+       __le32                  tree;
 };
 
 LE32_BITMASK(BCH_SNAPSHOT_DELETED,     struct bch_snapshot, flags,  0,  1)
@@ -1131,6 +1132,19 @@ LE32_BITMASK(BCH_SNAPSHOT_DELETED,       struct bch_snapshot, flags,  0,  1)
 /* True if a subvolume points to this snapshot node: */
 LE32_BITMASK(BCH_SNAPSHOT_SUBVOL,      struct bch_snapshot, flags,  1,  2)
 
+/*
+ * Snapshot trees:
+ *
+ * The snapshot_trees btree gives us persistent indentifier for each tree of
+ * bch_snapshot nodes, and allow us to record and easily find the root/master
+ * subvolume that other snapshots were created from:
+ */
+struct bch_snapshot_tree {
+       struct bch_val          v;
+       __le32                  master_subvol;
+       __le32                  root_snapshot;
+};
+
 /* LRU btree: */
 
 struct bch_lru {
@@ -1559,7 +1573,8 @@ struct bch_sb_field_journal_seq_blacklist {
        x(bucket_gens,                  25)             \
        x(lru_v2,                       26)             \
        x(fragmentation_lru,            27)             \
-       x(no_bps_in_alloc_keys,         28)
+       x(no_bps_in_alloc_keys,         28)             \
+       x(snapshot_trees,               29)
 
 enum bcachefs_metadata_version {
        bcachefs_metadata_version_min = 9,
@@ -1569,6 +1584,8 @@ enum bcachefs_metadata_version {
        bcachefs_metadata_version_max
 };
 
+static const unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_snapshot_trees;
+
 #define bcachefs_metadata_version_current      (bcachefs_metadata_version_max - 1)
 
 #define BCH_SB_SECTOR                  8
@@ -2095,7 +2112,8 @@ LE32_BITMASK(JSET_NO_FLUSH,       struct jset, flags, 5, 6);
        x(freespace,            11)             \
        x(need_discard,         12)             \
        x(backpointers,         13)             \
-       x(bucket_gens,          14)
+       x(bucket_gens,          14)             \
+       x(snapshot_trees,       15)
 
 enum btree_id {
 #define x(kwd, val) BTREE_ID_##kwd = val,
index 47f0ab023d641c1ee6d711b75a8fd3d81ba1dd19..79f3fbe925d5290235e15c6e4a44d270ba4c277b 100644 (file)
@@ -204,6 +204,9 @@ static unsigned bch2_key_types_allowed[] = {
        [BKEY_TYPE_bucket_gens] =
                (1U << KEY_TYPE_deleted)|
                (1U << KEY_TYPE_bucket_gens),
+       [BKEY_TYPE_snapshot_trees] =
+               (1U << KEY_TYPE_deleted)|
+               (1U << KEY_TYPE_snapshot_tree),
        [BKEY_TYPE_btree] =
                (1U << KEY_TYPE_deleted)|
                (1U << KEY_TYPE_btree_ptr)|
index c73a5e78e26038d5982220dddf7d79c9fb60da1c..c8ac08e5548bcd798c7f0de98e4a3b787a5a10b6 100644 (file)
@@ -93,6 +93,7 @@
        x(ENOSPC,                       ENOSPC_sb_members)                      \
        x(ENOSPC,                       ENOSPC_sb_crypt)                        \
        x(ENOSPC,                       ENOSPC_btree_slot)                      \
+       x(ENOSPC,                       ENOSPC_snapshot_tree)                   \
        x(0,                            open_buckets_empty)                     \
        x(0,                            freelist_empty)                         \
        x(BCH_ERR_freelist_empty,       no_buckets_found)                       \
index 4b28fc4f77c60c0a552d63f5601e920c3559b4f3..eb3609aa45933afcca1d37b3e3554b15540ba99e 100644 (file)
@@ -2427,7 +2427,8 @@ int bch2_fsck_full(struct bch_fs *c)
 {
        int ret;
 again:
-       ret =   bch2_fs_check_snapshots(c) ?:
+       ret =   bch2_fs_check_snapshot_trees(c);
+               bch2_fs_check_snapshots(c) ?:
                bch2_fs_check_subvols(c) ?:
                bch2_delete_dead_snapshots(c) ?:
                check_inodes(c, true) ?:
index 6214691fa441657e37e6f51aff5936211c4ce71c..af76c029fb6a530cfb5589dd1a7dfc1213a9c382 100644 (file)
@@ -1025,16 +1025,25 @@ fsck_err:
 
 static int bch2_fs_initialize_subvolumes(struct bch_fs *c)
 {
-       struct bkey_i_snapshot  root_snapshot;
-       struct bkey_i_subvolume root_volume;
+       struct bkey_i_snapshot_tree     root_tree;
+       struct bkey_i_snapshot          root_snapshot;
+       struct bkey_i_subvolume         root_volume;
        int ret;
 
+       bkey_snapshot_tree_init(&root_tree.k_i);
+       root_tree.k.p.offset            = 1;
+       root_tree.v.master_subvol       = cpu_to_le32(1);
+       root_tree.v.root_snapshot       = cpu_to_le32(U32_MAX);
+       ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees,
+                               &root_tree.k_i,
+                               NULL, NULL, 0);
+
        bkey_snapshot_init(&root_snapshot.k_i);
        root_snapshot.k.p.offset = U32_MAX;
        root_snapshot.v.flags   = 0;
        root_snapshot.v.parent  = 0;
        root_snapshot.v.subvol  = BCACHEFS_ROOT_SUBVOL;
-       root_snapshot.v.pad     = 0;
+       root_snapshot.v.tree    = cpu_to_le32(1);
        SET_BCH_SNAPSHOT_SUBVOL(&root_snapshot.v, true);
 
        ret = bch2_btree_insert(c, BTREE_ID_snapshots,
@@ -1135,8 +1144,12 @@ int bch2_fs_recovery(struct bch_fs *c)
        }
 
        if (!c->opts.nochanges) {
-               if (c->sb.version < bcachefs_metadata_version_no_bps_in_alloc_keys) {
-                       bch_info(c, "version prior to no_bps_in_alloc_keys, upgrade and fsck required");
+               if (c->sb.version < bcachefs_metadata_required_upgrade_below) {
+                       bch_info(c, "version %s (%u) prior to %s (%u), upgrade and fsck required",
+                                bch2_metadata_versions[c->sb.version],
+                                c->sb.version,
+                                bch2_metadata_versions[bcachefs_metadata_required_upgrade_below],
+                                bcachefs_metadata_required_upgrade_below);
                        c->opts.version_upgrade = true;
                        c->opts.fsck            = true;
                        c->opts.fix_errors      = FSCK_OPT_YES;
index 8d87f90a0ac699f9650ad6fd34345c06395b6d33..b14da196e7fd8d382992e15c698a01d11560d633 100644 (file)
 
 /* Snapshot tree: */
 
+void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c,
+                               struct bkey_s_c k)
+{
+       struct bkey_s_c_snapshot_tree t = bkey_s_c_to_snapshot_tree(k);
+
+       prt_printf(out, "subvol %u root snapshot %u",
+                  le32_to_cpu(t.v->master_subvol),
+                  le32_to_cpu(t.v->root_snapshot));
+}
+
+int bch2_snapshot_tree_invalid(const struct bch_fs *c, struct bkey_s_c k,
+                              unsigned flags, struct printbuf *err)
+{
+       if (bkey_gt(k.k->p, POS(0, U32_MAX)) ||
+           bkey_lt(k.k->p, POS(0, 1))) {
+               prt_printf(err, "bad pos");
+               return -BCH_ERR_invalid_bkey;
+       }
+
+       return 0;
+}
+
+static int snapshot_tree_lookup(struct btree_trans *trans, u32 id,
+                               struct bch_snapshot_tree *s)
+{
+       return bch2_bkey_get_val_typed(trans, BTREE_ID_snapshot_trees, POS(0, id),
+                                      BTREE_ITER_WITH_UPDATES, snapshot_tree, s);
+}
+
+static struct bkey_i_snapshot_tree *
+__snapshot_tree_create(struct btree_trans *trans)
+{
+       struct btree_iter iter;
+       int ret = bch2_bkey_get_empty_slot(trans, &iter,
+                       BTREE_ID_snapshot_trees, POS(0, U32_MAX));
+       struct bkey_i_snapshot_tree *s_t;
+
+       if (ret == -BCH_ERR_ENOSPC_btree_slot)
+               ret = -BCH_ERR_ENOSPC_snapshot_tree;
+       if (ret)
+               return ERR_PTR(ret);
+
+       s_t = bch2_bkey_alloc(trans, &iter, 0, snapshot_tree);
+       ret = PTR_ERR_OR_ZERO(s_t);
+       bch2_trans_iter_exit(trans, &iter);
+       return ret ? ERR_PTR(ret) : s_t;
+}
+
+static int snapshot_tree_create(struct btree_trans *trans,
+                               u32 root_id, u32 subvol_id, u32 *tree_id)
+{
+       struct bkey_i_snapshot_tree *n_tree =
+               __snapshot_tree_create(trans);
+
+       if (IS_ERR(n_tree))
+               return PTR_ERR(n_tree);
+
+       n_tree->v.master_subvol = cpu_to_le32(subvol_id);
+       n_tree->v.root_snapshot = cpu_to_le32(root_id);
+       *tree_id = n_tree->k.p.offset;
+       return 0;
+}
+
+/* Snapshot nodes: */
+
 void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c,
                           struct bkey_s_c k)
 {
@@ -90,11 +155,13 @@ int bch2_mark_snapshot(struct btree_trans *trans,
                t->children[0]  = le32_to_cpu(s.v->children[0]);
                t->children[1]  = le32_to_cpu(s.v->children[1]);
                t->subvol       = BCH_SNAPSHOT_SUBVOL(s.v) ? le32_to_cpu(s.v->subvol) : 0;
+               t->tree         = le32_to_cpu(s.v->tree);
        } else {
                t->parent       = 0;
                t->children[0]  = 0;
                t->children[1]  = 0;
                t->subvol       = 0;
+               t->tree         = 0;
        }
 
        return 0;
@@ -116,7 +183,7 @@ static int snapshot_live(struct btree_trans *trans, u32 id)
                return 0;
 
        ret = snapshot_lookup(trans, id, &v);
-       if (ret == -ENOENT)
+       if (bch2_err_matches(ret, ENOENT))
                bch_err(trans->c, "snapshot node %u not found", id);
        if (ret)
                return ret;
@@ -157,6 +224,274 @@ static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k)
 }
 
 /* fsck: */
+
+static u32 bch2_snapshot_child(struct bch_fs *c, u32 id, unsigned child)
+{
+       return snapshot_t(c, id)->children[child];
+}
+
+static u32 bch2_snapshot_left_child(struct bch_fs *c, u32 id)
+{
+       return bch2_snapshot_child(c, id, 0);
+}
+
+static u32 bch2_snapshot_right_child(struct bch_fs *c, u32 id)
+{
+       return bch2_snapshot_child(c, id, 1);
+}
+
+static u32 bch2_snapshot_tree_next(struct bch_fs *c, u32 id)
+{
+       u32 n, parent;
+
+       n = bch2_snapshot_left_child(c, id);
+       if (n)
+               return n;
+
+       while ((parent = bch2_snapshot_parent(c, id))) {
+               n = bch2_snapshot_right_child(c, parent);
+               if (n && n != id)
+                       return n;
+               id = parent;
+       }
+
+       return 0;
+}
+
+static u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root)
+{
+       u32 id = snapshot_root;
+       u32 subvol = 0, s;
+
+       while (id) {
+               s = snapshot_t(c, id)->subvol;
+
+               if (s && (!subvol || s < subvol))
+                       subvol = s;
+
+               id = bch2_snapshot_tree_next(c, id);
+       }
+
+       return subvol;
+}
+
+static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans,
+                                           u32 snapshot_root, u32 *subvol_id)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       struct bkey_s_c_subvolume s;
+       int ret;
+
+       for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN,
+                                    0, k, ret) {
+               if (k.k->type != KEY_TYPE_subvolume)
+                       continue;
+
+               s = bkey_s_c_to_subvolume(k);
+               if (!bch2_snapshot_is_ancestor(c, le32_to_cpu(s.v->snapshot), snapshot_root))
+                       continue;
+               if (!BCH_SUBVOLUME_SNAP(s.v)) {
+                       *subvol_id = s.k->p.offset;
+                       goto found;
+               }
+       }
+       ret = ret ?: -ENOENT;
+found:
+       bch2_trans_iter_exit(trans, &iter);
+
+       if (bch2_err_matches(ret, ENOENT)) {
+               struct bkey_i_subvolume *s;
+
+               *subvol_id = bch2_snapshot_tree_oldest_subvol(c, snapshot_root);
+
+               s = bch2_bkey_get_mut_typed(trans, &iter,
+                                           BTREE_ID_subvolumes, POS(0, *subvol_id),
+                                           0, subvolume);
+               ret = PTR_ERR_OR_ZERO(s);
+               if (ret)
+                       return ret;
+
+               SET_BCH_SUBVOLUME_SNAP(&s->v, false);
+       }
+
+       return ret;
+}
+
+static int check_snapshot_tree(struct btree_trans *trans,
+                              struct btree_iter *iter,
+                              struct bkey_s_c k)
+{
+       struct bch_fs *c = trans->c;
+       struct bkey_s_c_snapshot_tree st;
+       struct bch_snapshot s;
+       struct bch_subvolume subvol;
+       struct printbuf buf = PRINTBUF;
+       u32 root_id;
+       int ret;
+
+       if (k.k->type != KEY_TYPE_snapshot_tree)
+               return 0;
+
+       st = bkey_s_c_to_snapshot_tree(k);
+       root_id = le32_to_cpu(st.v->root_snapshot);
+
+       ret = snapshot_lookup(trans, root_id, &s);
+       if (ret && !bch2_err_matches(ret, ENOENT))
+               goto err;
+
+       if (fsck_err_on(ret ||
+                       root_id != bch2_snapshot_root(c, root_id) ||
+                       st.k->p.offset != le32_to_cpu(s.tree),
+                       c,
+                       "snapshot tree points to missing/incorrect snapshot:\n  %s",
+                       (bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) {
+               ret = bch2_btree_delete_at(trans, iter, 0);
+               goto err;
+       }
+
+       ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol),
+                                false, 0, &subvol);
+       if (ret && !bch2_err_matches(ret, ENOENT))
+               goto err;
+
+       if (fsck_err_on(ret, c,
+                       "snapshot tree points to missing subvolume:\n  %s",
+                       (printbuf_reset(&buf),
+                        bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
+           fsck_err_on(!bch2_snapshot_is_ancestor(c,
+                                                  le32_to_cpu(subvol.snapshot),
+                                                  root_id), c,
+                       "snapshot tree points to subvolume that does not point to snapshot in this tree:\n  %s",
+                       (printbuf_reset(&buf),
+                        bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
+           fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol), c,
+                       "snapshot tree points to snapshot subvolume:\n  %s",
+                       (printbuf_reset(&buf),
+                        bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) {
+               struct bkey_i_snapshot_tree *u;
+               u32 subvol_id;
+
+               ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id);
+               if (ret)
+                       goto err;
+
+               u = bch2_bkey_make_mut_typed(trans, iter, k, 0, snapshot_tree);
+               ret = PTR_ERR_OR_ZERO(u);
+               if (ret)
+                       goto err;
+
+               u->v.master_subvol = cpu_to_le32(subvol_id);
+               st = snapshot_tree_i_to_s_c(u);
+       }
+err:
+fsck_err:
+       printbuf_exit(&buf);
+       return ret;
+}
+
+/*
+ * For each snapshot_tree, make sure it points to the root of a snapshot tree
+ * and that snapshot entry points back to it, or delete it.
+ *
+ * And, make sure it points to a subvolume within that snapshot tree, or correct
+ * it to point to the oldest subvolume within that snapshot tree.
+ */
+int bch2_fs_check_snapshot_trees(struct bch_fs *c)
+{
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       int ret;
+
+       ret = bch2_trans_run(c,
+               for_each_btree_key_commit(&trans, iter,
+                       BTREE_ID_snapshot_trees, POS_MIN,
+                       BTREE_ITER_PREFETCH, k,
+                       NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
+               check_snapshot_tree(&trans, &iter, k)));
+
+       if (ret)
+               bch_err(c, "error %i checking snapshot trees", ret);
+       return ret;
+}
+
+/*
+ * Look up snapshot tree for @tree_id and find root,
+ * make sure @snap_id is a descendent:
+ */
+static int snapshot_tree_ptr_good(struct btree_trans *trans,
+                                 u32 snap_id, u32 tree_id)
+{
+       struct bch_snapshot_tree s_t;
+       int ret = snapshot_tree_lookup(trans, tree_id, &s_t);
+
+       if (bch2_err_matches(ret, ENOENT))
+               return 0;
+       if (ret)
+               return ret;
+
+       return bch2_snapshot_is_ancestor(trans->c, snap_id, le32_to_cpu(s_t.root_snapshot));
+}
+
+/*
+ * snapshot_tree pointer was incorrect: look up root snapshot node, make sure
+ * its snapshot_tree pointer is correct (allocate new one if necessary), then
+ * update this node's pointer to root node's pointer:
+ */
+static int snapshot_tree_ptr_repair(struct btree_trans *trans,
+                                   struct btree_iter *iter,
+                                   struct bkey_s_c_snapshot *s)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter root_iter;
+       struct bch_snapshot_tree s_t;
+       struct bkey_s_c_snapshot root;
+       struct bkey_i_snapshot *u;
+       u32 root_id = bch2_snapshot_root(c, s->k->p.offset), tree_id;
+       int ret;
+
+       root = bch2_bkey_get_iter_typed(trans, &root_iter,
+                              BTREE_ID_snapshots, POS(0, root_id),
+                              BTREE_ITER_WITH_UPDATES, snapshot);
+       ret = bkey_err(root);
+       if (ret)
+               goto err;
+
+       tree_id = le32_to_cpu(root.v->tree);
+
+       ret = snapshot_tree_lookup(trans, tree_id, &s_t);
+       if (ret && !bch2_err_matches(ret, ENOENT))
+               return ret;
+
+       if (ret || le32_to_cpu(s_t.root_snapshot) != root_id) {
+               u = bch2_bkey_make_mut_typed(trans, &root_iter, root.s_c, 0, snapshot);
+               ret =   PTR_ERR_OR_ZERO(u) ?:
+                       snapshot_tree_create(trans, root_id,
+                               bch2_snapshot_tree_oldest_subvol(c, root_id),
+                               &tree_id);
+               if (ret)
+                       goto err;
+
+               u->v.tree = cpu_to_le32(tree_id);
+               if (s->k->p.snapshot == root_id)
+                       *s = snapshot_i_to_s_c(u);
+       }
+
+       if (s->k->p.snapshot != root_id) {
+               u = bch2_bkey_make_mut_typed(trans, iter, s->s_c, 0, snapshot);
+               ret = PTR_ERR_OR_ZERO(u);
+               if (ret)
+                       goto err;
+
+               u->v.tree = cpu_to_le32(tree_id);
+               *s = snapshot_i_to_s_c(u);
+       }
+err:
+       bch2_trans_iter_exit(trans, &root_iter);
+       return ret;
+}
+
 static int check_snapshot(struct btree_trans *trans,
                          struct btree_iter *iter,
                          struct bkey_s_c k)
@@ -177,7 +512,7 @@ static int check_snapshot(struct btree_trans *trans,
        id = le32_to_cpu(s.v->parent);
        if (id) {
                ret = snapshot_lookup(trans, id, &v);
-               if (ret == -ENOENT)
+               if (bch2_err_matches(ret, ENOENT))
                        bch_err(c, "snapshot with nonexistent parent:\n  %s",
                                (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf));
                if (ret)
@@ -196,7 +531,7 @@ static int check_snapshot(struct btree_trans *trans,
                id = le32_to_cpu(s.v->children[i]);
 
                ret = snapshot_lookup(trans, id, &v);
-               if (ret == -ENOENT)
+               if (bch2_err_matches(ret, ENOENT))
                        bch_err(c, "snapshot node %llu has nonexistent child %u",
                                s.k->p.offset, id);
                if (ret)
@@ -216,7 +551,7 @@ static int check_snapshot(struct btree_trans *trans,
        if (should_have_subvol) {
                id = le32_to_cpu(s.v->subvol);
                ret = bch2_subvolume_get(trans, id, 0, false, &subvol);
-               if (ret == -ENOENT)
+               if (bch2_err_matches(ret, ENOENT))
                        bch_err(c, "snapshot points to nonexistent subvolume:\n  %s",
                                (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf));
                if (ret)
@@ -242,9 +577,23 @@ static int check_snapshot(struct btree_trans *trans,
                        ret = bch2_trans_update(trans, iter, &u->k_i, 0);
                        if (ret)
                                goto err;
+
+                       s = snapshot_i_to_s_c(u);
                }
        }
 
+       ret = snapshot_tree_ptr_good(trans, s.k->p.offset, le32_to_cpu(s.v->tree));
+       if (ret < 0)
+               goto err;
+
+       if (fsck_err_on(!ret, c, "snapshot points to missing/incorrect tree:\n  %s",
+                       (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
+               ret = snapshot_tree_ptr_repair(trans, iter, &s);
+               if (ret)
+                       goto err;
+       }
+       ret = 0;
+
        if (BCH_SNAPSHOT_DELETED(s.v))
                set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
 err:
@@ -255,23 +604,18 @@ fsck_err:
 
 int bch2_fs_check_snapshots(struct bch_fs *c)
 {
-       struct btree_trans trans;
        struct btree_iter iter;
        struct bkey_s_c k;
        int ret;
 
-       bch2_trans_init(&trans, c, 0, 0);
-
-       ret = for_each_btree_key_commit(&trans, iter,
+       ret = bch2_trans_run(c,
+               for_each_btree_key_commit(&trans, iter,
                        BTREE_ID_snapshots, POS_MIN,
                        BTREE_ITER_PREFETCH, k,
                        NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
-               check_snapshot(&trans, &iter, k));
-
+               check_snapshot(&trans, &iter, k)));
        if (ret)
-               bch_err(c, "error %i checking snapshots", ret);
-
-       bch2_trans_exit(&trans);
+               bch_err(c, "%s: error %s", __func__, bch2_err_str(ret));
        return ret;
 }
 
@@ -279,10 +623,11 @@ static int check_subvol(struct btree_trans *trans,
                        struct btree_iter *iter,
                        struct bkey_s_c k)
 {
+       struct bch_fs *c = trans->c;
        struct bkey_s_c_subvolume subvol;
        struct bch_snapshot snapshot;
        unsigned snapid;
-       int ret;
+       int ret = 0;
 
        if (k.k->type != KEY_TYPE_subvolume)
                return 0;
@@ -291,8 +636,8 @@ static int check_subvol(struct btree_trans *trans,
        snapid = le32_to_cpu(subvol.v->snapshot);
        ret = snapshot_lookup(trans, snapid, &snapshot);
 
-       if (ret == -ENOENT)
-               bch_err(trans->c, "subvolume %llu points to nonexistent snapshot %u",
+       if (bch2_err_matches(ret, ENOENT))
+               bch_err(c, "subvolume %llu points to nonexistent snapshot %u",
                        k.k->p.offset, snapid);
        if (ret)
                return ret;
@@ -300,30 +645,55 @@ static int check_subvol(struct btree_trans *trans,
        if (BCH_SUBVOLUME_UNLINKED(subvol.v)) {
                ret = bch2_subvolume_delete(trans, iter->pos.offset);
                if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
-                       bch_err(trans->c, "error deleting subvolume %llu: %s",
+                       bch_err(c, "error deleting subvolume %llu: %s",
                                iter->pos.offset, bch2_err_str(ret));
                if (ret)
                        return ret;
        }
 
-       return 0;
+       if (!BCH_SUBVOLUME_SNAP(subvol.v)) {
+               u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.v->snapshot));
+               u32 snapshot_tree = snapshot_t(c, snapshot_root)->tree;
+               struct bch_snapshot_tree st;
+
+               ret = snapshot_tree_lookup(trans, snapshot_tree, &st);
+
+               bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
+                               "%s: snapshot tree %u not found", __func__, snapshot_tree);
+
+               if (ret)
+                       return ret;
+
+               if (fsck_err_on(le32_to_cpu(st.master_subvol) != subvol.k->p.offset, c,
+                               "subvolume %llu is not set as snapshot but is not master subvolume",
+                               k.k->p.offset)) {
+                       struct bkey_i_subvolume *s =
+                               bch2_bkey_make_mut_typed(trans, iter, subvol.s_c, 0, subvolume);
+                       ret = PTR_ERR_OR_ZERO(s);
+                       if (ret)
+                               return ret;
+
+                       SET_BCH_SUBVOLUME_SNAP(&s->v, true);
+               }
+       }
+
+fsck_err:
+       return ret;
 }
 
 int bch2_fs_check_subvols(struct bch_fs *c)
 {
-       struct btree_trans trans;
        struct btree_iter iter;
        struct bkey_s_c k;
        int ret;
 
-       bch2_trans_init(&trans, c, 0, 0);
-
-       ret = for_each_btree_key_commit(&trans, iter,
+       ret = bch2_trans_run(c,
+               for_each_btree_key_commit(&trans, iter,
                        BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
                        NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
-               check_subvol(&trans, &iter, k));
-
-       bch2_trans_exit(&trans);
+               check_subvol(&trans, &iter, k)));
+       if (ret)
+               bch_err(c, "%s: error %s", __func__, bch2_err_str(ret));
 
        return ret;
 }
@@ -335,20 +705,15 @@ void bch2_fs_snapshots_exit(struct bch_fs *c)
 
 int bch2_fs_snapshots_start(struct bch_fs *c)
 {
-       struct btree_trans trans;
        struct btree_iter iter;
        struct bkey_s_c k;
        int ret = 0;
 
-       bch2_trans_init(&trans, c, 0, 0);
-
-       for_each_btree_key2(&trans, iter, BTREE_ID_snapshots,
+       ret = bch2_trans_run(c,
+               for_each_btree_key2(&trans, iter, BTREE_ID_snapshots,
                           POS_MIN, 0, k,
-               bch2_mark_snapshot(&trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?:
-               bch2_snapshot_set_equiv(&trans, k));
-
-       bch2_trans_exit(&trans);
-
+                       bch2_mark_snapshot(&trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?:
+                       bch2_snapshot_set_equiv(&trans, k)));
        if (ret)
                bch_err(c, "error starting snapshots: %s", bch2_err_str(ret));
        return ret;
@@ -368,7 +733,8 @@ static int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id)
                                    0, snapshot);
        ret = PTR_ERR_OR_ZERO(s);
        if (unlikely(ret)) {
-               bch2_fs_inconsistent_on(ret == -ENOENT, trans->c, "missing snapshot %u", id);
+               bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT),
+                                       trans->c, "missing snapshot %u", id);
                return ret;
        }
 
@@ -388,6 +754,7 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
 {
        struct bch_fs *c = trans->c;
        struct btree_iter iter, p_iter = (struct btree_iter) { NULL };
+       struct btree_iter tree_iter = (struct btree_iter) { NULL };
        struct bkey_s_c_snapshot s;
        u32 parent_id;
        unsigned i;
@@ -396,7 +763,8 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
        s = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id),
                                     BTREE_ITER_INTENT, snapshot);
        ret = bkey_err(s);
-       bch2_fs_inconsistent_on(ret == -ENOENT, c, "missing snapshot %u", id);
+       bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
+                               "missing snapshot %u", id);
 
        if (ret)
                goto err;
@@ -412,7 +780,8 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
                                     0, snapshot);
                ret = PTR_ERR_OR_ZERO(parent);
                if (unlikely(ret)) {
-                       bch2_fs_inconsistent_on(ret == -ENOENT, c, "missing snapshot %u", parent_id);
+                       bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
+                                               "missing snapshot %u", parent_id);
                        goto err;
                }
 
@@ -430,25 +799,49 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
                    le32_to_cpu(parent->v.children[1]))
                        swap(parent->v.children[0],
                             parent->v.children[1]);
+       } else {
+               /*
+                * We're deleting the root of a snapshot tree: update the
+                * snapshot_tree entry to point to the new root, or delete it if
+                * this is the last snapshot ID in this tree:
+                */
+               struct bkey_i_snapshot_tree *s_t;
+
+               BUG_ON(s.v->children[1]);
+
+               s_t = bch2_bkey_get_mut_typed(trans, &tree_iter,
+                               BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s.v->tree)),
+                               0, snapshot_tree);
+               ret = PTR_ERR_OR_ZERO(s_t);
+               if (ret)
+                       goto err;
+
+               if (s.v->children[0]) {
+                       s_t->v.root_snapshot = cpu_to_le32(s.v->children[0]);
+               } else {
+                       s_t->k.type = KEY_TYPE_deleted;
+                       set_bkey_val_u64s(&s_t->k, 0);
+               }
        }
 
        ret = bch2_btree_delete_at(trans, &iter, 0);
 err:
+       bch2_trans_iter_exit(trans, &tree_iter);
        bch2_trans_iter_exit(trans, &p_iter);
        bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
-int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
-                             u32 *new_snapids,
-                             u32 *snapshot_subvols,
-                             unsigned nr_snapids)
+static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
+                         u32 *new_snapids,
+                         u32 *snapshot_subvols,
+                         unsigned nr_snapids)
 {
-       struct btree_iter iter, parent_iter = { NULL };
+       struct btree_iter iter;
        struct bkey_i_snapshot *n;
        struct bkey_s_c k;
        unsigned i;
-       int ret = 0;
+       int ret;
 
        bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots,
                             POS_MIN, BTREE_ITER_INTENT);
@@ -476,7 +869,7 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
                n->v.flags      = 0;
                n->v.parent     = cpu_to_le32(parent);
                n->v.subvol     = cpu_to_le32(snapshot_subvols[i]);
-               n->v.pad        = 0;
+               n->v.tree       = cpu_to_le32(tree);
                SET_BCH_SNAPSHOT_SUBVOL(&n->v, true);
 
                ret = bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
@@ -486,38 +879,92 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
 
                new_snapids[i]  = iter.pos.offset;
        }
+err:
+       bch2_trans_iter_exit(trans, &iter);
+       return ret;
+}
 
-       if (parent) {
-               n = bch2_bkey_get_mut_typed(trans, &parent_iter,
-                               BTREE_ID_snapshots, POS(0, parent),
-                               0, snapshot);
-               ret = PTR_ERR_OR_ZERO(n);
-               if (unlikely(ret)) {
-                       if (ret == -ENOENT)
-                               bch_err(trans->c, "snapshot %u not found", parent);
-                       goto err;
-               }
+/*
+ * Create new snapshot IDs as children of an existing snapshot ID:
+ */
+static int bch2_snapshot_node_create_children(struct btree_trans *trans, u32 parent,
+                             u32 *new_snapids,
+                             u32 *snapshot_subvols,
+                             unsigned nr_snapids)
+{
+       struct btree_iter iter;
+       struct bkey_i_snapshot *n_parent;
+       int ret = 0;
 
-               if (n->v.children[0] || n->v.children[1]) {
-                       bch_err(trans->c, "Trying to add child snapshot nodes to parent that already has children");
-                       ret = -EINVAL;
-                       goto err;
-               }
+       n_parent = bch2_bkey_get_mut_typed(trans, &iter,
+                       BTREE_ID_snapshots, POS(0, parent),
+                       0, snapshot);
+       ret = PTR_ERR_OR_ZERO(n_parent);
+       if (unlikely(ret)) {
+               if (bch2_err_matches(ret, ENOENT))
+                       bch_err(trans->c, "snapshot %u not found", parent);
+               return ret;
+       }
 
-               n->v.children[0] = cpu_to_le32(new_snapids[0]);
-               n->v.children[1] = cpu_to_le32(new_snapids[1]);
-               n->v.subvol = 0;
-               SET_BCH_SNAPSHOT_SUBVOL(&n->v, false);
-               ret   = bch2_trans_update(trans, &parent_iter, &n->k_i, 0);
-               if (ret)
-                       goto err;
+       if (n_parent->v.children[0] || n_parent->v.children[1]) {
+               bch_err(trans->c, "Trying to add child snapshot nodes to parent that already has children");
+               ret = -EINVAL;
+               goto err;
        }
+
+       ret = create_snapids(trans, parent, le32_to_cpu(n_parent->v.tree),
+                            new_snapids, snapshot_subvols, nr_snapids);
+       if (ret)
+               goto err;
+
+       n_parent->v.children[0] = cpu_to_le32(new_snapids[0]);
+       n_parent->v.children[1] = cpu_to_le32(new_snapids[1]);
+       n_parent->v.subvol = 0;
+       SET_BCH_SNAPSHOT_SUBVOL(&n_parent->v, false);
 err:
-       bch2_trans_iter_exit(trans, &parent_iter);
        bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
+/*
+ * Create a snapshot node that is the root of a new tree:
+ */
+static int bch2_snapshot_node_create_tree(struct btree_trans *trans,
+                             u32 *new_snapids,
+                             u32 *snapshot_subvols,
+                             unsigned nr_snapids)
+{
+       struct bkey_i_snapshot_tree *n_tree;
+       int ret;
+
+       n_tree = __snapshot_tree_create(trans);
+       ret =   PTR_ERR_OR_ZERO(n_tree) ?:
+               create_snapids(trans, 0, n_tree->k.p.offset,
+                            new_snapids, snapshot_subvols, nr_snapids);
+       if (ret)
+               return ret;
+
+       n_tree->v.master_subvol = cpu_to_le32(snapshot_subvols[0]);
+       n_tree->v.root_snapshot = cpu_to_le32(new_snapids[0]);
+       return 0;
+}
+
+int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
+                             u32 *new_snapids,
+                             u32 *snapshot_subvols,
+                             unsigned nr_snapids)
+{
+       BUG_ON((parent == 0) != (nr_snapids == 1));
+       BUG_ON((parent != 0) != (nr_snapids == 2));
+
+       return parent
+               ? bch2_snapshot_node_create_children(trans, parent,
+                               new_snapids, snapshot_subvols, nr_snapids)
+               : bch2_snapshot_node_create_tree(trans,
+                               new_snapids, snapshot_subvols, nr_snapids);
+
+}
+
 static int snapshot_delete_key(struct btree_trans *trans,
                               struct btree_iter *iter,
                               struct bkey_s_c k,
@@ -731,7 +1178,8 @@ bch2_subvolume_get_inlined(struct btree_trans *trans, unsigned subvol,
 {
        int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_subvolumes, POS(0, subvol),
                                          iter_flags, subvolume, s);
-       bch2_fs_inconsistent_on(ret == -ENOENT && inconsistent_if_not_found,
+       bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT) &&
+                               inconsistent_if_not_found,
                                trans->c, "missing subvolume %u", subvol);
        return ret;
 }
@@ -785,7 +1233,8 @@ int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
                                BTREE_ITER_CACHED|BTREE_ITER_INTENT,
                                subvolume);
        ret = bkey_err(subvol);
-       bch2_fs_inconsistent_on(ret == -ENOENT, trans->c, "missing subvolume %u", subvolid);
+       bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
+                               "missing subvolume %u", subvolid);
        if (ret)
                return ret;
 
@@ -894,7 +1343,8 @@ int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid)
                        BTREE_ITER_CACHED, subvolume);
        ret = PTR_ERR_OR_ZERO(n);
        if (unlikely(ret)) {
-               bch2_fs_inconsistent_on(ret == -ENOENT, trans->c, "missing subvolume %u", subvolid);
+               bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
+                                       "missing subvolume %u", subvolid);
                return ret;
        }
 
index 1f6f7862e48f73fa654f12ba70b2796d609e8e6e..dcd9f5f95535884a207187a949b418fccb4335dd 100644 (file)
@@ -5,6 +5,16 @@
 #include "darray.h"
 #include "subvolume_types.h"
 
+void bch2_snapshot_tree_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+int bch2_snapshot_tree_invalid(const struct bch_fs *, struct bkey_s_c,
+                              unsigned, struct printbuf *);
+
+#define bch2_bkey_ops_snapshot_tree ((struct bkey_ops) {       \
+       .key_invalid    = bch2_snapshot_tree_invalid,           \
+       .val_to_text    = bch2_snapshot_tree_to_text,           \
+       .min_val_size   = 8,                                    \
+})
+
 void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
 int bch2_snapshot_invalid(const struct bch_fs *, struct bkey_s_c,
                          unsigned, struct printbuf *);
@@ -28,6 +38,15 @@ static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id)
        return snapshot_t(c, id)->parent;
 }
 
+static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id)
+{
+       u32 parent;
+
+       while ((parent = bch2_snapshot_parent(c, id)))
+               id = parent;
+       return id;
+}
+
 static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id)
 {
        return snapshot_t(c, id)->equiv;
@@ -107,6 +126,7 @@ static inline int snapshot_list_add(struct bch_fs *c, snapshot_id_list *s, u32 i
        return ret;
 }
 
+int bch2_fs_check_snapshot_trees(struct bch_fs *);
 int bch2_fs_check_snapshots(struct bch_fs *);
 int bch2_fs_check_subvols(struct bch_fs *);
 
index aa49c45a35ab2d2ee853b3119f349bb189919a39..c6c1cbad97816c6cf0b8974b5e53d64cc2468c8f 100644 (file)
@@ -10,6 +10,7 @@ struct snapshot_t {
        u32                     parent;
        u32                     children[2];
        u32                     subvol; /* Nonzero only if a subvolume points to this node: */
+       u32                     tree;
        u32                     equiv;
 };