bcachefs: KEY_TYPE_inode_v3, metadata_version_inode_v3
authorKent Overstreet <kent.overstreet@linux.dev>
Fri, 21 Oct 2022 17:21:03 +0000 (13:21 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:51 +0000 (17:09 -0400)
Move bi_size and bi_sectors into the non-varint portion of the inode, so
that the write path can update them without going through the relatively
expensive unpack/pack operations.

Other changes:
 - Add a field for the offset of the varint section, so we can add new
   non-varint fields without needing a new inode type, like alloc_v3
 - Move bi_mode into the flags field, so that the varint section can be
   u64 aligned

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/bcachefs_format.h
fs/bcachefs/bkey_methods.c
fs/bcachefs/buckets.c
fs/bcachefs/inode.c
fs/bcachefs/inode.h
fs/bcachefs/io.c
fs/bcachefs/recovery.c

index 66c88518616016f465071e960dc5259e7ca7b14e..e0e2219fb1cc2d2d0f87641c90f1dfe9ab8bad4c 100644 (file)
@@ -370,7 +370,8 @@ static inline void bkey_init(struct bkey *k)
        x(set,                  25)                     \
        x(lru,                  26)                     \
        x(alloc_v4,             27)                     \
-       x(backpointer,          28)
+       x(backpointer,          28)                     \
+       x(inode_v3,             29)
 
 enum bch_bkey_type {
 #define x(name, nr) KEY_TYPE_##name    = nr,
@@ -721,6 +722,21 @@ struct bch_inode_v2 {
        __u8                    fields[0];
 } __packed __aligned(8);
 
+struct bch_inode_v3 {
+       struct bch_val          v;
+
+       __le64                  bi_journal_seq;
+       __le64                  bi_hash_seed;
+       __le64                  bi_flags;
+       __le64                  bi_sectors;
+       __le64                  bi_size;
+       __le64                  bi_version;
+       __u8                    fields[0];
+} __packed __aligned(8);
+
+#define INODEv3_FIELDS_START_INITIAL   6
+#define INODEv3_FIELDS_START_CUR       (offsetof(struct bch_inode_v3, fields) / sizeof(u64))
+
 struct bch_inode_generation {
        struct bch_val          v;
 
@@ -732,7 +748,7 @@ struct bch_inode_generation {
  * bi_subvol and bi_parent_subvol are only set for subvolume roots:
  */
 
-#define BCH_INODE_FIELDS()                     \
+#define BCH_INODE_FIELDS_v2()                  \
        x(bi_atime,                     96)     \
        x(bi_ctime,                     96)     \
        x(bi_mtime,                     96)     \
@@ -759,6 +775,31 @@ struct bch_inode_generation {
        x(bi_subvol,                    32)     \
        x(bi_parent_subvol,             32)
 
+#define BCH_INODE_FIELDS_v3()                  \
+       x(bi_atime,                     96)     \
+       x(bi_ctime,                     96)     \
+       x(bi_mtime,                     96)     \
+       x(bi_otime,                     96)     \
+       x(bi_uid,                       32)     \
+       x(bi_gid,                       32)     \
+       x(bi_nlink,                     32)     \
+       x(bi_generation,                32)     \
+       x(bi_dev,                       32)     \
+       x(bi_data_checksum,             8)      \
+       x(bi_compression,               8)      \
+       x(bi_project,                   32)     \
+       x(bi_background_compression,    8)      \
+       x(bi_data_replicas,             8)      \
+       x(bi_promote_target,            16)     \
+       x(bi_foreground_target,         16)     \
+       x(bi_background_target,         16)     \
+       x(bi_erasure_code,              16)     \
+       x(bi_fields_set,                16)     \
+       x(bi_dir,                       64)     \
+       x(bi_dir_offset,                64)     \
+       x(bi_subvol,                    32)     \
+       x(bi_parent_subvol,             32)
+
 /* subset of BCH_INODE_FIELDS */
 #define BCH_INODE_OPTS()                       \
        x(data_checksum,                8)      \
@@ -815,6 +856,13 @@ LE32_BITMASK(INODE_NEW_VARINT,     struct bch_inode, bi_flags, 31, 32);
 LE64_BITMASK(INODEv2_STR_HASH, struct bch_inode_v2, bi_flags, 20, 24);
 LE64_BITMASK(INODEv2_NR_FIELDS,        struct bch_inode_v2, bi_flags, 24, 31);
 
+LE64_BITMASK(INODEv3_STR_HASH, struct bch_inode_v3, bi_flags, 20, 24);
+LE64_BITMASK(INODEv3_NR_FIELDS,        struct bch_inode_v3, bi_flags, 24, 31);
+
+LE64_BITMASK(INODEv3_FIELDS_START,
+                               struct bch_inode_v3, bi_flags, 31, 36);
+LE64_BITMASK(INODEv3_MODE,     struct bch_inode_v3, bi_flags, 36, 52);
+
 /* Dirents */
 
 /*
@@ -1499,7 +1547,8 @@ struct bch_sb_field_journal_seq_blacklist {
        x(freespace,                    19)             \
        x(alloc_v4,                     20)             \
        x(new_data_types,               21)             \
-       x(backpointers,                 22)
+       x(backpointers,                 22)             \
+       x(inode_v3,                     23)
 
 enum bcachefs_metadata_version {
        bcachefs_metadata_version_min = 9,
index 45c8b2c61c5b912fb911bcf1de3af0a1e2cb8257..c7c0a9781a35ffb9fe5fbe2e37f038def79991b0 100644 (file)
@@ -149,6 +149,7 @@ static unsigned bch2_key_types_allowed[] = {
                (1U << KEY_TYPE_whiteout)|
                (1U << KEY_TYPE_inode)|
                (1U << KEY_TYPE_inode_v2)|
+               (1U << KEY_TYPE_inode_v3)|
                (1U << KEY_TYPE_inode_generation),
        [BKEY_TYPE_dirents] =
                (1U << KEY_TYPE_deleted)|
index b657f8545a3b96fb13fe3456ef9a0eb2e65556bb..9dcdfca19d527c66dfb1b33b142acaac85755511 100644 (file)
@@ -1123,10 +1123,10 @@ int bch2_mark_inode(struct btree_trans *trans,
        u64 journal_seq = trans->journal_res.seq;
 
        if (flags & BTREE_TRIGGER_INSERT) {
-               struct bch_inode_v2 *v = (struct bch_inode_v2 *) new.v;
+               struct bch_inode_v3 *v = (struct bch_inode_v3 *) new.v;
 
                BUG_ON(!journal_seq);
-               BUG_ON(new.k->type != KEY_TYPE_inode_v2);
+               BUG_ON(new.k->type != KEY_TYPE_inode_v3);
 
                v->bi_journal_seq = cpu_to_le64(journal_seq);
        }
index 9eeabe70aec1c69469045c5b9453c6e6faaa5cda..f338cf6fd8b79ebd20fe42126ea95a8905cc4b8f 100644 (file)
@@ -60,11 +60,10 @@ static int inode_decode_field(const u8 *in, const u8 *end,
        return bytes;
 }
 
-static inline void bch2_inode_pack_inlined(struct bch_fs *c,
-                                          struct bkey_inode_buf *packed,
+static inline void bch2_inode_pack_inlined(struct bkey_inode_buf *packed,
                                           const struct bch_inode_unpacked *inode)
 {
-       struct bkey_i_inode_v2 *k = &packed->inode;
+       struct bkey_i_inode_v3 *k = &packed->inode;
        u8 *out = k->v.fields;
        u8 *end = (void *) &packed[1];
        u8 *last_nonzero_field = out;
@@ -72,13 +71,17 @@ static inline void bch2_inode_pack_inlined(struct bch_fs *c,
        unsigned bytes;
        int ret;
 
-       bkey_inode_v2_init(&packed->inode.k_i);
+       bkey_inode_v3_init(&packed->inode.k_i);
        packed->inode.k.p.offset        = inode->bi_inum;
        packed->inode.v.bi_journal_seq  = cpu_to_le64(inode->bi_journal_seq);
        packed->inode.v.bi_hash_seed    = inode->bi_hash_seed;
        packed->inode.v.bi_flags        = cpu_to_le64(inode->bi_flags);
-       packed->inode.v.bi_flags        = cpu_to_le64(inode->bi_flags);
-       packed->inode.v.bi_mode         = cpu_to_le16(inode->bi_mode);
+       packed->inode.v.bi_sectors      = cpu_to_le64(inode->bi_sectors);
+       packed->inode.v.bi_size         = cpu_to_le64(inode->bi_size);
+       packed->inode.v.bi_version      = cpu_to_le64(inode->bi_version);
+       SET_INODEv3_MODE(&packed->inode.v, inode->bi_mode);
+       SET_INODEv3_FIELDS_START(&packed->inode.v, INODEv3_FIELDS_START_CUR);
+
 
 #define x(_name, _bits)                                                        \
        nr_fields++;                                                    \
@@ -99,7 +102,7 @@ static inline void bch2_inode_pack_inlined(struct bch_fs *c,
                        *out++ = 0;                                     \
        }
 
-       BCH_INODE_FIELDS()
+       BCH_INODE_FIELDS_v3()
 #undef  x
        BUG_ON(out > end);
 
@@ -110,7 +113,7 @@ static inline void bch2_inode_pack_inlined(struct bch_fs *c,
        set_bkey_val_bytes(&packed->inode.k, bytes);
        memset_u64s_tail(&packed->inode.v, 0, bytes);
 
-       SET_INODEv2_NR_FIELDS(&k->v, nr_fields);
+       SET_INODEv3_NR_FIELDS(&k->v, nr_fields);
 
        if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
                struct bch_inode_unpacked unpacked;
@@ -120,21 +123,23 @@ static inline void bch2_inode_pack_inlined(struct bch_fs *c,
                BUG_ON(ret);
                BUG_ON(unpacked.bi_inum         != inode->bi_inum);
                BUG_ON(unpacked.bi_hash_seed    != inode->bi_hash_seed);
+               BUG_ON(unpacked.bi_sectors      != inode->bi_sectors);
+               BUG_ON(unpacked.bi_size         != inode->bi_size);
+               BUG_ON(unpacked.bi_version      != inode->bi_version);
                BUG_ON(unpacked.bi_mode         != inode->bi_mode);
 
 #define x(_name, _bits)        if (unpacked._name != inode->_name)             \
                        panic("unpacked %llu should be %llu",           \
                              (u64) unpacked._name, (u64) inode->_name);
-               BCH_INODE_FIELDS()
+               BCH_INODE_FIELDS_v3()
 #undef  x
        }
 }
 
-void bch2_inode_pack(struct bch_fs *c,
-                    struct bkey_inode_buf *packed,
+void bch2_inode_pack(struct bkey_inode_buf *packed,
                     const struct bch_inode_unpacked *inode)
 {
-       bch2_inode_pack_inlined(c, packed, inode);
+       bch2_inode_pack_inlined(packed, inode);
 }
 
 static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode,
@@ -164,7 +169,7 @@ static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode,
        unpacked->_name = field[1];                                     \
        in += ret;
 
-       BCH_INODE_FIELDS()
+       BCH_INODE_FIELDS_v2()
 #undef  x
 
        /* XXX: signal if there were more fields than expected? */
@@ -203,15 +208,66 @@ static int bch2_inode_unpack_v2(struct bch_inode_unpacked *unpacked,
                return -1;                                              \
        fieldnr++;
 
-       BCH_INODE_FIELDS()
+       BCH_INODE_FIELDS_v2()
 #undef  x
 
        /* XXX: signal if there were more fields than expected? */
        return 0;
 }
 
-int bch2_inode_unpack(struct bkey_s_c k,
-                     struct bch_inode_unpacked *unpacked)
+static int bch2_inode_unpack_v3(struct bkey_s_c k,
+                               struct bch_inode_unpacked *unpacked)
+{
+       struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k);
+       const u8 *in = inode.v->fields;
+       const u8 *end = bkey_val_end(inode);
+       unsigned nr_fields = INODEv3_NR_FIELDS(inode.v);
+       unsigned fieldnr = 0;
+       int ret;
+       u64 v[2];
+
+       unpacked->bi_inum       = inode.k->p.offset;
+       unpacked->bi_journal_seq= le64_to_cpu(inode.v->bi_journal_seq);
+       unpacked->bi_hash_seed  = inode.v->bi_hash_seed;
+       unpacked->bi_flags      = le64_to_cpu(inode.v->bi_flags);
+       unpacked->bi_sectors    = le64_to_cpu(inode.v->bi_sectors);
+       unpacked->bi_size       = le64_to_cpu(inode.v->bi_size);
+       unpacked->bi_version    = le64_to_cpu(inode.v->bi_version);
+       unpacked->bi_mode       = INODEv3_MODE(inode.v);
+
+#define x(_name, _bits)                                                        \
+       if (fieldnr < nr_fields) {                                      \
+               ret = bch2_varint_decode_fast(in, end, &v[0]);          \
+               if (ret < 0)                                            \
+                       return ret;                                     \
+               in += ret;                                              \
+                                                                       \
+               if (_bits > 64) {                                       \
+                       ret = bch2_varint_decode_fast(in, end, &v[1]);  \
+                       if (ret < 0)                                    \
+                               return ret;                             \
+                       in += ret;                                      \
+               } else {                                                \
+                       v[1] = 0;                                       \
+               }                                                       \
+       } else {                                                        \
+               v[0] = v[1] = 0;                                        \
+       }                                                               \
+                                                                       \
+       unpacked->_name = v[0];                                         \
+       if (v[1] || v[0] != unpacked->_name)                            \
+               return -1;                                              \
+       fieldnr++;
+
+       BCH_INODE_FIELDS_v3()
+#undef  x
+
+       /* XXX: signal if there were more fields than expected? */
+       return 0;
+}
+
+static noinline int bch2_inode_unpack_slowpath(struct bkey_s_c k,
+                                              struct bch_inode_unpacked *unpacked)
 {
        memset(unpacked, 0, sizeof(*unpacked));
 
@@ -252,6 +308,14 @@ int bch2_inode_unpack(struct bkey_s_c k,
        }
 }
 
+int bch2_inode_unpack(struct bkey_s_c k,
+                     struct bch_inode_unpacked *unpacked)
+{
+       if (likely(k.k->type == KEY_TYPE_inode_v3))
+               return bch2_inode_unpack_v3(k, unpacked);
+       return bch2_inode_unpack_slowpath(k, unpacked);
+}
+
 int bch2_inode_peek(struct btree_trans *trans,
                    struct btree_iter *iter,
                    struct bch_inode_unpacked *inode,
@@ -297,11 +361,32 @@ int bch2_inode_write(struct btree_trans *trans,
        if (IS_ERR(inode_p))
                return PTR_ERR(inode_p);
 
-       bch2_inode_pack_inlined(trans->c, inode_p, inode);
+       bch2_inode_pack_inlined(inode_p, inode);
        inode_p->inode.k.p.snapshot = iter->snapshot;
        return bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
 }
 
+struct bkey_i *bch2_inode_to_v3(struct btree_trans *trans, struct bkey_i *k)
+{
+       struct bch_inode_unpacked u;
+       struct bkey_inode_buf *inode_p;
+       int ret;
+
+       if (!bkey_is_inode(&k->k))
+               return ERR_PTR(-ENOENT);
+
+       inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p));
+       if (IS_ERR(inode_p))
+               return ERR_CAST(inode_p);
+
+       ret = bch2_inode_unpack(bkey_i_to_s_c(k), &u);
+       if (ret)
+               return ERR_PTR(ret);
+
+       bch2_inode_pack(inode_p, &u);
+       return &inode_p->inode.k_i;
+}
+
 static int __bch2_inode_invalid(struct bkey_s_c k, struct printbuf *err)
 {
        struct bch_inode_unpacked unpacked;
@@ -387,15 +472,48 @@ int bch2_inode_v2_invalid(const struct bch_fs *c, struct bkey_s_c k,
        return __bch2_inode_invalid(k, err);
 }
 
-static void __bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode)
+int bch2_inode_v3_invalid(const struct bch_fs *c, struct bkey_s_c k,
+                         int rw, struct printbuf *err)
+{
+       struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k);
+
+       if (bkey_val_bytes(k.k) < sizeof(*inode.v)) {
+               prt_printf(err, "incorrect value size (%zu < %zu)",
+                      bkey_val_bytes(k.k), sizeof(*inode.v));
+               return -BCH_ERR_invalid_bkey;
+       }
+
+       if (INODEv3_FIELDS_START(inode.v) < INODEv3_FIELDS_START_INITIAL ||
+           INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k)) {
+               prt_printf(err, "invalid fields_start (got %llu, min %u max %zu)",
+                      INODEv3_FIELDS_START(inode.v),
+                      INODEv3_FIELDS_START_INITIAL,
+                      bkey_val_u64s(inode.k));
+               return -BCH_ERR_invalid_bkey;
+       }
+
+       if (INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR) {
+               prt_printf(err, "invalid str hash type (%llu >= %u)",
+                      INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR);
+               return -BCH_ERR_invalid_bkey;
+       }
+
+       return __bch2_inode_invalid(k, err);
+}
+
+static void __bch2_inode_unpacked_to_text(struct printbuf *out,
+                                         struct bch_inode_unpacked *inode)
 {
-       prt_printf(out, "mode %o flags %x journal_seq %llu",
+       prt_printf(out, "mode %o flags %x journal_seq %llu bi_size %llu bi_sectors %llu bi_version %llu",
               inode->bi_mode, inode->bi_flags,
-              inode->bi_journal_seq);
+              inode->bi_journal_seq,
+              inode->bi_size,
+              inode->bi_sectors,
+              inode->bi_version);
 
 #define x(_name, _bits)                                                \
        prt_printf(out, " "#_name " %llu", (u64) inode->_name);
-       BCH_INODE_FIELDS()
+       BCH_INODE_FIELDS_v3()
 #undef  x
 }
 
@@ -405,8 +523,7 @@ void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked
        __bch2_inode_unpacked_to_text(out, inode);
 }
 
-void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c,
-                      struct bkey_s_c k)
+void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
 {
        struct bch_inode_unpacked inode;
 
index da78ed023a3038e3b1a70a76013e03bf62effc47..b753e1b254e4679c932f0f96a0e5f1c039e92b72 100644 (file)
@@ -9,6 +9,7 @@ extern const char * const bch2_inode_opts[];
 
 int bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
 int bch2_inode_v2_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
+int bch2_inode_v3_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
 void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
 
 #define bch2_bkey_ops_inode ((struct bkey_ops) {       \
@@ -25,10 +26,18 @@ void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
        .atomic_trigger = bch2_mark_inode,              \
 })
 
+#define bch2_bkey_ops_inode_v3 ((struct bkey_ops) {    \
+       .key_invalid    = bch2_inode_v3_invalid,        \
+       .val_to_text    = bch2_inode_to_text,           \
+       .trans_trigger  = bch2_trans_mark_inode,        \
+       .atomic_trigger = bch2_mark_inode,              \
+})
+
 static inline bool bkey_is_inode(const struct bkey *k)
 {
        return  k->type == KEY_TYPE_inode ||
-               k->type == KEY_TYPE_inode_v2;
+               k->type == KEY_TYPE_inode_v2 ||
+               k->type == KEY_TYPE_inode_v3;
 }
 
 int bch2_inode_generation_invalid(const struct bch_fs *, struct bkey_s_c,
@@ -52,25 +61,28 @@ struct bch_inode_unpacked {
        u64                     bi_inum;
        u64                     bi_journal_seq;
        __le64                  bi_hash_seed;
+       u64                     bi_size;
+       u64                     bi_sectors;
+       u64                     bi_version;
        u32                     bi_flags;
        u16                     bi_mode;
 
 #define x(_name, _bits)        u##_bits _name;
-       BCH_INODE_FIELDS()
+       BCH_INODE_FIELDS_v3()
 #undef  x
 };
 
 struct bkey_inode_buf {
-       struct bkey_i_inode_v2  inode;
+       struct bkey_i_inode_v3  inode;
 
 #define x(_name, _bits)                + 8 + _bits / 8
-       u8              _pad[0 + BCH_INODE_FIELDS()];
+       u8              _pad[0 + BCH_INODE_FIELDS_v3()];
 #undef  x
 } __packed __aligned(8);
 
-void bch2_inode_pack(struct bch_fs *, struct bkey_inode_buf *,
-                    const struct bch_inode_unpacked *);
+void bch2_inode_pack(struct bkey_inode_buf *, const struct bch_inode_unpacked *);
 int bch2_inode_unpack(struct bkey_s_c, struct bch_inode_unpacked *);
+struct bkey_i *bch2_inode_to_v3(struct btree_trans *, struct bkey_i *);
 
 void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *);
 
index a937940f509600ddf9410a68ace7102179b2a4e5..fb85c2bfd569ae76f51543e801a313d78ad5225d 100644 (file)
@@ -356,7 +356,7 @@ int bch2_extent_update(struct btree_trans *trans,
        }
 
        if (i_sectors_delta || new_i_size) {
-               bch2_inode_pack(trans->c, &inode_p, &inode_u);
+               bch2_inode_pack(&inode_p, &inode_u);
 
                inode_p.inode.k.p.snapshot = iter->snapshot;
 
index 2df1a541cb40f25743fd857e617e3ef98638dfea..b355902260379ec8f2108e04a3f30b9d1132634b 100644 (file)
@@ -1098,6 +1098,9 @@ int bch2_fs_recovery(struct bch_fs *c)
                        c->opts.version_upgrade = true;
                        c->opts.fsck            = true;
                        c->opts.fix_errors      = FSCK_OPT_YES;
+               } else if (c->sb.version < bcachefs_metadata_version_inode_v3) {
+                       bch_info(c, "version prior to inode_v3, upgrade required");
+                       c->opts.version_upgrade = true;
                }
        }
 
@@ -1482,7 +1485,7 @@ int bch2_fs_initialize(struct bch_fs *c)
        c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
        c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
 
-       if (c->sb.version < bcachefs_metadata_version_backpointers)
+       if (c->sb.version < bcachefs_metadata_version_inode_v3)
                c->opts.version_upgrade = true;
 
        if (c->opts.version_upgrade) {
@@ -1563,7 +1566,7 @@ int bch2_fs_initialize(struct bch_fs *c)
        bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755, 0, NULL);
        root_inode.bi_inum      = BCACHEFS_ROOT_INO;
        root_inode.bi_subvol    = BCACHEFS_ROOT_SUBVOL;
-       bch2_inode_pack(c, &packed_inode, &root_inode);
+       bch2_inode_pack(&packed_inode, &root_inode);
        packed_inode.inode.k.p.snapshot = U32_MAX;
 
        err = "error creating root directory";