bcachefs: btree_ptr_v2
authorKent Overstreet <kent.overstreet@gmail.com>
Fri, 7 Feb 2020 18:38:02 +0000 (13:38 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:35 +0000 (17:08 -0400)
Add a new btree ptr type which contains the sequence number (random 64
bit cookie, actually) for that btree node - this lets us verify that
when we read in a btree node it really is the btree node we wanted.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/bcachefs_format.h
fs/bcachefs/bkey.h
fs/bcachefs/btree_cache.h
fs/bcachefs/btree_io.c
fs/bcachefs/btree_update_interior.c
fs/bcachefs/buckets.c
fs/bcachefs/buckets.h
fs/bcachefs/extents.c
fs/bcachefs/extents.h
fs/bcachefs/recovery.c
fs/bcachefs/replicas.c

index dbc9c15514bdbdfdf13b786f8eba39fa82791cb9..575fb7143cc03206d4e5518a155d26d2b6bdaef1 100644 (file)
@@ -343,7 +343,8 @@ static inline void bkey_init(struct bkey *k)
        x(stripe,               14)                     \
        x(reflink_p,            15)                     \
        x(reflink_v,            16)                     \
-       x(inline_data,          17)
+       x(inline_data,          17)                     \
+       x(btree_ptr_v2,         18)
 
 enum bch_bkey_type {
 #define x(name, nr) KEY_TYPE_##name    = nr,
@@ -599,6 +600,19 @@ struct bch_btree_ptr {
        struct bch_extent_ptr   start[];
 } __attribute__((packed, aligned(8)));
 
+struct bch_btree_ptr_v2 {
+       struct bch_val          v;
+
+       __u64                   mem_ptr;
+       __le64                  seq;
+       __le16                  sectors_written;
+       /* In case we ever decide to do variable size btree nodes: */
+       __le16                  sectors;
+       struct bpos             min_key;
+       __u64                   _data[0];
+       struct bch_extent_ptr   start[];
+} __attribute__((packed, aligned(8)));
+
 struct bch_extent {
        struct bch_val          v;
 
@@ -630,7 +644,8 @@ struct bch_reservation {
 
 /* Btree pointers don't carry around checksums: */
 #define BKEY_BTREE_PTR_VAL_U64s_MAX                            \
-       ((sizeof(struct bch_extent_ptr)) / sizeof(u64) * BCH_REPLICAS_MAX)
+       ((sizeof(struct bch_btree_ptr_v2) +                     \
+         sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(u64))
 #define BKEY_BTREE_PTR_U64s_MAX                                        \
        (BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX)
 
@@ -1299,7 +1314,8 @@ LE64_BITMASK(BCH_SB_ERASURE_CODE, struct bch_sb, flags[3],  0, 16);
        x(new_siphash,                  7)      \
        x(inline_data,                  8)      \
        x(new_extent_overwrite,         9)      \
-       x(incompressible,               10)
+       x(incompressible,               10)     \
+       x(btree_ptr_v2,                 11)
 
 enum bch_sb_feature {
 #define x(f, n) BCH_FEATURE_##f,
index 36e6ecc045146ed249096c0baf5f663d94d08160..aa729347e4487f94941c8df72ab5d4e1393fd701 100644 (file)
@@ -573,6 +573,7 @@ BKEY_VAL_ACCESSORS(stripe);
 BKEY_VAL_ACCESSORS(reflink_p);
 BKEY_VAL_ACCESSORS(reflink_v);
 BKEY_VAL_ACCESSORS(inline_data);
+BKEY_VAL_ACCESSORS(btree_ptr_v2);
 
 /* byte order helpers */
 
index 270f7f8fb1403e33d0db87261d73bdc2109a0810..6e7edcaf66751daca66e790dd802dc50f8200f28 100644 (file)
@@ -40,6 +40,8 @@ static inline u64 btree_ptr_hash_val(const struct bkey_i *k)
        switch (k->k.type) {
        case KEY_TYPE_btree_ptr:
                return *((u64 *) bkey_i_to_btree_ptr_c(k)->v.start);
+       case KEY_TYPE_btree_ptr_v2:
+               return bkey_i_to_btree_ptr_v2_c(k)->v.seq;
        default:
                return 0;
        }
index 9df8d4f785bfa5fcb5e0d7b0b8e80e078c530d0d..5fa31698ed67f6c1b62f2ca292cb50fcac4b313d 100644 (file)
@@ -734,6 +734,15 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
                        bch2_bpos_swab(&b->data->max_key);
                }
 
+               if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
+                       struct bch_btree_ptr_v2 *bp =
+                               &bkey_i_to_btree_ptr_v2(&b->key)->v;
+
+                       btree_err_on(bkey_cmp(b->data->min_key, bp->min_key),
+                                    BTREE_ERR_MUST_RETRY, c, b, NULL,
+                                    "incorrect min_key");
+               }
+
                btree_err_on(bkey_cmp(b->data->max_key, b->key.k.p),
                             BTREE_ERR_MUST_RETRY, c, b, i,
                             "incorrect max key");
@@ -897,6 +906,15 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
                     BTREE_ERR_MUST_RETRY, c, b, NULL,
                     "bad btree header");
 
+       if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
+               struct bch_btree_ptr_v2 *bp =
+                       &bkey_i_to_btree_ptr_v2(&b->key)->v;
+
+               btree_err_on(b->data->keys.seq != bp->seq,
+                            BTREE_ERR_MUST_RETRY, c, b, NULL,
+                            "got wrong btree node");
+       }
+
        while (b->written < c->opts.btree_node_size) {
                unsigned sectors, whiteout_u64s = 0;
                struct nonce nonce;
@@ -1004,15 +1022,15 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
        i = &b->data->keys;
        for (k = i->start; k != vstruct_last(i);) {
                struct bkey tmp;
-               struct bkey_s_c u = bkey_disassemble(b, k, &tmp);
-               const char *invalid = bch2_bkey_val_invalid(c, u);
+               struct bkey_s u = __bkey_disassemble(b, k, &tmp);
+               const char *invalid = bch2_bkey_val_invalid(c, u.s_c);
 
                if (invalid ||
                    (inject_invalid_keys(c) &&
                     !bversion_cmp(u.k->version, MAX_VERSION))) {
                        char buf[160];
 
-                       bch2_bkey_val_to_text(&PBUF(buf), c, u);
+                       bch2_bkey_val_to_text(&PBUF(buf), c, u.s_c);
                        btree_err(BTREE_ERR_FIXABLE, c, b, i,
                                  "invalid bkey %s: %s", buf, invalid);
 
@@ -1025,6 +1043,12 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
                        continue;
                }
 
+               if (u.k->type == KEY_TYPE_btree_ptr_v2) {
+                       struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(u);
+
+                       bp.v->mem_ptr = 0;
+               }
+
                k = bkey_next_skip_noops(k, vstruct_last(i));
        }
 
index ff8cb37ed9a150dbaf69f8948e2f7bcbd5ec53a3..3d8b6218c983df9a528e75380b9df75a45fcd5e7 100644 (file)
@@ -332,7 +332,11 @@ retry:
                goto retry;
        }
 
-       bkey_btree_ptr_init(&tmp.k);
+       if (c->sb.features & (1ULL << BCH_FEATURE_btree_ptr_v2))
+               bkey_btree_ptr_v2_init(&tmp.k);
+       else
+               bkey_btree_ptr_init(&tmp.k);
+
        bch2_alloc_sectors_append_ptrs(c, wp, &tmp.k, c->opts.btree_node_size);
 
        bch2_open_bucket_get(c, wp, &ob);
@@ -354,14 +358,13 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
 {
        struct bch_fs *c = as->c;
        struct btree *b;
+       int ret;
 
        BUG_ON(level >= BTREE_MAX_DEPTH);
        BUG_ON(!as->reserve->nr);
 
        b = as->reserve->b[--as->reserve->nr];
 
-       BUG_ON(bch2_btree_node_hash_insert(&c->btree_cache, b, level, as->btree_id));
-
        set_btree_node_accessed(b);
        set_btree_node_dirty(b);
        set_btree_node_need_write(b);
@@ -372,7 +375,16 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
        b->data->flags = 0;
        SET_BTREE_NODE_ID(b->data, as->btree_id);
        SET_BTREE_NODE_LEVEL(b->data, level);
-       b->data->ptr = bkey_i_to_btree_ptr(&b->key)->v.start[0];
+       b->data->ptr = bch2_bkey_ptrs_c(bkey_i_to_s_c(&b->key)).start->ptr;
+
+       if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
+               struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(&b->key);
+
+               bp->v.mem_ptr           = 0;
+               bp->v.seq               = b->data->keys.seq;
+               bp->v.sectors_written   = 0;
+               bp->v.sectors           = cpu_to_le16(c->opts.btree_node_size);
+       }
 
        if (c->sb.features & (1ULL << BCH_FEATURE_new_extent_overwrite))
                SET_BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data, true);
@@ -385,10 +397,26 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
 
        btree_node_will_make_reachable(as, b);
 
+       ret = bch2_btree_node_hash_insert(&c->btree_cache, b, level, as->btree_id);
+       BUG_ON(ret);
+
        trace_btree_node_alloc(c, b);
        return b;
 }
 
+static void btree_set_min(struct btree *b, struct bpos pos)
+{
+       if (b->key.k.type == KEY_TYPE_btree_ptr_v2)
+               bkey_i_to_btree_ptr_v2(&b->key)->v.min_key = pos;
+       b->data->min_key = pos;
+}
+
+static void btree_set_max(struct btree *b, struct bpos pos)
+{
+       b->key.k.p = pos;
+       b->data->max_key = pos;
+}
+
 struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *as,
                                                  struct btree *b,
                                                  struct bkey_format format)
@@ -397,11 +425,12 @@ struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *as,
 
        n = bch2_btree_node_alloc(as, b->c.level);
 
-       n->data->min_key        = b->data->min_key;
-       n->data->max_key        = b->data->max_key;
-       n->data->format         = format;
        SET_BTREE_NODE_SEQ(n->data, BTREE_NODE_SEQ(b->data) + 1);
 
+       btree_set_min(n, b->data->min_key);
+       btree_set_max(n, b->data->max_key);
+
+       n->data->format         = format;
        btree_node_set_format(n, format);
 
        bch2_btree_sort_into(as->c, n, b);
@@ -431,10 +460,9 @@ static struct btree *__btree_root_alloc(struct btree_update *as, unsigned level)
 {
        struct btree *b = bch2_btree_node_alloc(as, level);
 
-       b->data->min_key = POS_MIN;
-       b->data->max_key = POS_MAX;
+       btree_set_min(b, POS_MIN);
+       btree_set_max(b, POS_MAX);
        b->data->format = bch2_btree_calc_format(b);
-       b->key.k.p = POS_MAX;
 
        btree_node_set_format(b, b->data->format);
        bch2_btree_build_aux_trees(b);
@@ -1263,10 +1291,8 @@ static struct btree *__btree_split_node(struct btree_update *as,
 
        BUG_ON(!prev);
 
-       n1->key.k.p = bkey_unpack_pos(n1, prev);
-       n1->data->max_key = n1->key.k.p;
-       n2->data->min_key =
-               btree_type_successor(n1->c.btree_id, n1->key.k.p);
+       btree_set_max(n1, bkey_unpack_pos(n1, prev));
+       btree_set_min(n2, btree_type_successor(n1->c.btree_id, n1->key.k.p));
 
        set2->u64s = cpu_to_le16((u64 *) vstruct_end(set1) - (u64 *) k);
        set1->u64s = cpu_to_le16(le16_to_cpu(set1->u64s) - le16_to_cpu(set2->u64s));
@@ -1749,10 +1775,9 @@ retry:
 
        n = bch2_btree_node_alloc(as, b->c.level);
 
-       n->data->min_key        = prev->data->min_key;
-       n->data->max_key        = next->data->max_key;
+       btree_set_min(n, prev->data->min_key);
+       btree_set_max(n, next->data->max_key);
        n->data->format         = new_f;
-       n->key.k.p              = next->key.k.p;
 
        btree_node_set_format(n, new_f);
 
@@ -2202,8 +2227,8 @@ void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id)
        bch2_btree_build_aux_trees(b);
 
        b->data->flags = 0;
-       b->data->min_key = POS_MIN;
-       b->data->max_key = POS_MAX;
+       btree_set_min(b, POS_MIN);
+       btree_set_max(b, POS_MAX);
        b->data->format = bch2_btree_calc_format(b);
        btree_node_set_format(b, b->data->format);
 
index 60ad443bb5097527952b0639c275b74fb471358f..9fae7d9fb4955021ca0163322a7d58c4d373c3b8 100644 (file)
@@ -1194,6 +1194,7 @@ int bch2_mark_key_locked(struct bch_fs *c,
                ret = bch2_mark_alloc(c, k, fs_usage, journal_seq, flags);
                break;
        case KEY_TYPE_btree_ptr:
+       case KEY_TYPE_btree_ptr_v2:
                sectors = !(flags & BTREE_TRIGGER_OVERWRITE)
                        ?  c->opts.btree_node_size
                        : -c->opts.btree_node_size;
@@ -1729,6 +1730,7 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
 
        switch (k.k->type) {
        case KEY_TYPE_btree_ptr:
+       case KEY_TYPE_btree_ptr_v2:
                sectors = !(flags & BTREE_TRIGGER_OVERWRITE)
                        ?  c->opts.btree_node_size
                        : -c->opts.btree_node_size;
index 2e49f2a8ccd9905b905a187a760698bb3865de81..4c84787575f51391b8ec616c6a1941c183cde0f2 100644 (file)
@@ -97,7 +97,8 @@ static inline struct bucket *PTR_BUCKET(struct bch_dev *ca,
 static inline enum bch_data_type ptr_data_type(const struct bkey *k,
                                               const struct bch_extent_ptr *ptr)
 {
-       if (k->type == KEY_TYPE_btree_ptr)
+       if (k->type == KEY_TYPE_btree_ptr ||
+           k->type == KEY_TYPE_btree_ptr_v2)
                return BCH_DATA_BTREE;
 
        return ptr->cached ? BCH_DATA_CACHED : BCH_DATA_USER;
index 10ca544317ba6b9c86f84b5673860e6690a237eb..cff4955d203b5bec24a975cc770d4b20e9152593 100644 (file)
@@ -748,6 +748,7 @@ void bch2_bkey_append_ptr(struct bkey_i *k,
 
        switch (k->k.type) {
        case KEY_TYPE_btree_ptr:
+       case KEY_TYPE_btree_ptr_v2:
        case KEY_TYPE_extent:
                EBUG_ON(bkey_val_u64s(&k->k) >= BKEY_EXTENT_VAL_U64s_MAX);
 
@@ -1030,6 +1031,8 @@ const char *bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k)
 
        if (k.k->type == KEY_TYPE_btree_ptr)
                size_ondisk = c->opts.btree_node_size;
+       if (k.k->type == KEY_TYPE_btree_ptr_v2)
+               size_ondisk = le16_to_cpu(bkey_s_c_to_btree_ptr_v2(k).v->sectors);
 
        bkey_extent_entry_for_each(ptrs, entry) {
                if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX)
index 6e8119a8ad302a70599011735e274240d39d2b3d..70b7d70269dc14af02bd76c52100d42a4c9d6104 100644 (file)
@@ -225,6 +225,13 @@ static inline struct bkey_ptrs_c bch2_bkey_ptrs_c(struct bkey_s_c k)
                        bkey_val_end(r),
                };
        }
+       case KEY_TYPE_btree_ptr_v2: {
+               struct bkey_s_c_btree_ptr_v2 e = bkey_s_c_to_btree_ptr_v2(k);
+               return (struct bkey_ptrs_c) {
+                       to_entry(&e.v->start[0]),
+                       to_entry(extent_entry_last(e))
+               };
+       }
        default:
                return (struct bkey_ptrs_c) { NULL, NULL };
        }
@@ -372,6 +379,13 @@ void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *,
        .swab           = bch2_ptr_swab,                        \
 }
 
+#define bch2_bkey_ops_btree_ptr_v2 (struct bkey_ops) {         \
+       .key_invalid    = bch2_btree_ptr_invalid,               \
+       .key_debugcheck = bch2_btree_ptr_debugcheck,            \
+       .val_to_text    = bch2_btree_ptr_to_text,               \
+       .swab           = bch2_ptr_swab,                        \
+}
+
 /* KEY_TYPE_extent: */
 
 const char *bch2_extent_invalid(const struct bch_fs *, struct bkey_s_c);
@@ -416,6 +430,7 @@ static inline bool bkey_extent_is_direct_data(const struct bkey *k)
 {
        switch (k->type) {
        case KEY_TYPE_btree_ptr:
+       case KEY_TYPE_btree_ptr_v2:
        case KEY_TYPE_extent:
        case KEY_TYPE_reflink_v:
                return true;
index 29e6f9f00bad9bba5ce8c47a6ff36142e15bfbcc..c9d12f7c180e88705109ccd1ba246b29ab0d637e 100644 (file)
@@ -1010,6 +1010,7 @@ int bch2_fs_recovery(struct bch_fs *c)
                c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current);
                c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_siphash;
                c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_extent_overwrite;
+               c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_btree_ptr_v2;
                write_sb = true;
        }
 
index 66787d0c5c632f2407592fd1f994a0732b92d365..f4851c8b8f8831f647188e9ef71257a4e1d61874 100644 (file)
@@ -112,6 +112,7 @@ void bch2_bkey_to_replicas(struct bch_replicas_entry *e,
 
        switch (k.k->type) {
        case KEY_TYPE_btree_ptr:
+       case KEY_TYPE_btree_ptr_v2:
                e->data_type = BCH_DATA_BTREE;
                extent_to_replicas(k, e);
                break;