bcachefs: Make bkey types globally unique
authorKent Overstreet <kent.overstreet@gmail.com>
Thu, 1 Nov 2018 19:10:01 +0000 (15:10 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:12 +0000 (17:08 -0400)
this lets us get rid of a lot of extra switch statements - in a lot of
places we dispatch on the btree node type, and then the key type, so
this is a nice cleanup across a lot of code.

Also improve the on disk format versioning stuff.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
59 files changed:
fs/bcachefs/acl.c
fs/bcachefs/alloc_background.c
fs/bcachefs/alloc_background.h
fs/bcachefs/alloc_foreground.c
fs/bcachefs/alloc_foreground.h
fs/bcachefs/bcachefs.h
fs/bcachefs/bcachefs_format.h
fs/bcachefs/bkey.c
fs/bcachefs/bkey.h
fs/bcachefs/bkey_methods.c
fs/bcachefs/bkey_methods.h
fs/bcachefs/bkey_sort.c
fs/bcachefs/bkey_sort.h
fs/bcachefs/bset.h
fs/bcachefs/btree_cache.c
fs/bcachefs/btree_cache.h
fs/bcachefs/btree_gc.c
fs/bcachefs/btree_gc.h
fs/bcachefs/btree_io.c
fs/bcachefs/btree_iter.c
fs/bcachefs/btree_types.h
fs/bcachefs/btree_update.h
fs/bcachefs/btree_update_interior.c
fs/bcachefs/btree_update_leaf.c
fs/bcachefs/buckets.c
fs/bcachefs/buckets.h
fs/bcachefs/debug.c
fs/bcachefs/dirent.c
fs/bcachefs/dirent.h
fs/bcachefs/ec.c
fs/bcachefs/ec.h
fs/bcachefs/extents.c
fs/bcachefs/extents.h
fs/bcachefs/fs-io.c
fs/bcachefs/fs.c
fs/bcachefs/fsck.c
fs/bcachefs/inode.c
fs/bcachefs/inode.h
fs/bcachefs/io.c
fs/bcachefs/journal_io.c
fs/bcachefs/migrate.c
fs/bcachefs/move.c
fs/bcachefs/move.h
fs/bcachefs/movinggc.c
fs/bcachefs/opts.h
fs/bcachefs/quota.c
fs/bcachefs/quota.h
fs/bcachefs/rebalance.c
fs/bcachefs/recovery.c
fs/bcachefs/replicas.c
fs/bcachefs/replicas.h
fs/bcachefs/str_hash.h
fs/bcachefs/super-io.c
fs/bcachefs/super-io.h
fs/bcachefs/super.c
fs/bcachefs/sysfs.c
fs/bcachefs/trace.h
fs/bcachefs/xattr.c
fs/bcachefs/xattr.h

index eb6fa4d7c1f6d52d4510dfc0bf518acf7f382cc4..bcfc9fdce35e4141e343df352c587773c9dccc94 100644 (file)
@@ -24,9 +24,9 @@ static inline int acl_to_xattr_type(int type)
 {
        switch (type) {
        case ACL_TYPE_ACCESS:
-               return BCH_XATTR_INDEX_POSIX_ACL_ACCESS;
+               return KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS;
        case ACL_TYPE_DEFAULT:
-               return BCH_XATTR_INDEX_POSIX_ACL_DEFAULT;
+               return KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT;
        default:
                BUG();
        }
@@ -355,7 +355,7 @@ int bch2_acl_chmod(struct btree_trans *trans,
 
        iter = bch2_hash_lookup(trans, bch2_xattr_hash_desc,
                        &inode->ei_str_hash, inode->v.i_ino,
-                       &X_SEARCH(BCH_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0),
+                       &X_SEARCH(KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0),
                        BTREE_ITER_INTENT);
        if (IS_ERR(iter))
                return PTR_ERR(iter) != -ENOENT ? PTR_ERR(iter) : 0;
index 390b008b0200f025335f2ba37ab142952d73b337..885aff511f97c7e6e3e36b1b332ab14f6625e6dd 100644 (file)
@@ -76,22 +76,15 @@ static unsigned bch_alloc_val_u64s(const struct bch_alloc *a)
 
 const char *bch2_alloc_invalid(const struct bch_fs *c, struct bkey_s_c k)
 {
+       struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k);
+
        if (k.k->p.inode >= c->sb.nr_devices ||
            !c->devs[k.k->p.inode])
                return "invalid device";
 
-       switch (k.k->type) {
-       case BCH_ALLOC: {
-               struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k);
-
-               /* allow for unknown fields */
-               if (bkey_val_u64s(a.k) < bch_alloc_val_u64s(a.v))
-                       return "incorrect value size";
-               break;
-       }
-       default:
-               return "invalid type";
-       }
+       /* allow for unknown fields */
+       if (bkey_val_u64s(a.k) < bch_alloc_val_u64s(a.v))
+               return "incorrect value size";
 
        return NULL;
 }
@@ -99,14 +92,9 @@ const char *bch2_alloc_invalid(const struct bch_fs *c, struct bkey_s_c k)
 void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
                        struct bkey_s_c k)
 {
-       switch (k.k->type) {
-       case BCH_ALLOC: {
-               struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k);
+       struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k);
 
-               pr_buf(out, "gen %u", a.v->gen);
-               break;
-       }
-       }
+       pr_buf(out, "gen %u", a.v->gen);
 }
 
 static inline unsigned get_alloc_field(const u8 **p, unsigned bytes)
@@ -158,7 +146,7 @@ static void bch2_alloc_read_key(struct bch_fs *c, struct bkey_s_c k)
        struct bucket *g;
        const u8 *d;
 
-       if (k.k->type != BCH_ALLOC)
+       if (k.k->type != KEY_TYPE_alloc)
                return;
 
        a = bkey_s_c_to_alloc(k);
index 59b6a5f2f89045f97880430818665b984299e4a6..8ced4e845281ad07db3adc9658be9a8f930d6909 100644 (file)
@@ -11,7 +11,7 @@
 const char *bch2_alloc_invalid(const struct bch_fs *, struct bkey_s_c);
 void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
 
-#define bch2_bkey_alloc_ops (struct bkey_ops) {                \
+#define bch2_bkey_ops_alloc (struct bkey_ops) {                \
        .key_invalid    = bch2_alloc_invalid,           \
        .val_to_text    = bch2_alloc_to_text,           \
 }
index 6e5f6e57da5652b58e5ed1047f3bdd87aec1faed..ddcf2c407764a90955a64d113384cb0df3ebe14f 100644 (file)
@@ -923,7 +923,8 @@ err:
  * as allocated out of @ob
  */
 void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct write_point *wp,
-                                   struct bkey_i_extent *e, unsigned sectors)
+                                   struct bkey_i *k, unsigned sectors)
+
 {
        struct open_bucket *ob;
        unsigned i;
@@ -935,13 +936,11 @@ void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct write_point *wp,
                struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
                struct bch_extent_ptr tmp = ob->ptr;
 
-               EBUG_ON(bch2_extent_has_device(extent_i_to_s_c(e), ob->ptr.dev));
-
-               tmp.cached = bkey_extent_is_cached(&e->k) ||
-                       (!ca->mi.durability && wp->type == BCH_DATA_USER);
+               tmp.cached = !ca->mi.durability &&
+                       wp->type == BCH_DATA_USER;
 
                tmp.offset += ca->mi.bucket_size - ob->sectors_free;
-               extent_ptr_append(e, tmp);
+               bch2_bkey_append_ptr(k, tmp);
 
                BUG_ON(sectors > ob->sectors_free);
                ob->sectors_free -= sectors;
index c71cf738172937b67cb24b21fd2d20928c221815..94389052fa94cbd403a07e6478ccfba53ae26dcf 100644 (file)
@@ -101,7 +101,7 @@ struct write_point *bch2_alloc_sectors_start(struct bch_fs *,
                                             struct closure *);
 
 void bch2_alloc_sectors_append_ptrs(struct bch_fs *, struct write_point *,
-                                   struct bkey_i_extent *, unsigned);
+                                   struct bkey_i *, unsigned);
 void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *);
 
 void bch2_open_buckets_stop_dev(struct bch_fs *, struct bch_dev *,
index 258a67d4437bd8658ec1b697b2884119a9e8c1ed..cd2fff851bbe1b26df6ce9c1a6ce8a524ce3fb10 100644 (file)
@@ -541,6 +541,7 @@ struct bch_fs {
                __uuid_t        uuid;
                __uuid_t        user_uuid;
 
+               u16             version;
                u16             encoded_extent_max;
 
                u8              nr_devices;
index a00e77fa1d37950714311e7be104ab1d96f4db45..801156b74335251044e8fd842d3849439db9ab12 100644 (file)
@@ -307,15 +307,6 @@ static inline void bkey_init(struct bkey *k)
 #define __BKEY_PADDED(key, pad)                                        \
        struct { struct bkey_i key; __u64 key ## _pad[pad]; }
 
-#define BKEY_VAL_TYPE(name, nr)                                                \
-struct bkey_i_##name {                                                 \
-       union {                                                         \
-               struct bkey             k;                              \
-               struct bkey_i           k_i;                            \
-       };                                                              \
-       struct bch_##name               v;                              \
-}
-
 /*
  * - DELETED keys are used internally to mark keys that should be ignored but
  *   override keys in composition order.  Their version number is ignored.
@@ -330,19 +321,37 @@ struct bkey_i_##name {                                                    \
  *   by new writes or cluster-wide GC. Node repair can also overwrite them with
  *   the same or a more recent version number, but not with an older version
  *   number.
+ *
+ * - WHITEOUT: for hash table btrees
 */
-#define KEY_TYPE_DELETED               0
-#define KEY_TYPE_DISCARD               1
-#define KEY_TYPE_ERROR                 2
-#define KEY_TYPE_COOKIE                        3
-#define KEY_TYPE_PERSISTENT_DISCARD    4
-#define KEY_TYPE_GENERIC_NR            128
+#define BCH_BKEY_TYPES()                               \
+       x(deleted,              0)                      \
+       x(discard,              1)                      \
+       x(error,                2)                      \
+       x(cookie,               3)                      \
+       x(whiteout,             4)                      \
+       x(btree_ptr,            5)                      \
+       x(extent,               6)                      \
+       x(reservation,          7)                      \
+       x(inode,                8)                      \
+       x(inode_generation,     9)                      \
+       x(dirent,               10)                     \
+       x(xattr,                11)                     \
+       x(alloc,                12)                     \
+       x(quota,                13)                     \
+       x(stripe,               14)
+
+enum bch_bkey_type {
+#define x(name, nr) KEY_TYPE_##name    = nr,
+       BCH_BKEY_TYPES()
+#undef x
+       KEY_TYPE_MAX,
+};
 
 struct bch_cookie {
        struct bch_val          v;
        __le64                  cookie;
 };
-BKEY_VAL_TYPE(cookie,          KEY_TYPE_COOKIE);
 
 /* Extents */
 
@@ -620,21 +629,12 @@ union bch_extent_entry {
 #undef x
 };
 
-enum {
-       BCH_EXTENT              = 128,
-
-       /*
-        * This is kind of a hack, we're overloading the type for a boolean that
-        * really should be part of the value - BCH_EXTENT and BCH_EXTENT_CACHED
-        * have the same value type:
-        */
-       BCH_EXTENT_CACHED       = 129,
+struct bch_btree_ptr {
+       struct bch_val          v;
 
-       /*
-        * Persistent reservation:
-        */
-       BCH_RESERVATION         = 130,
-};
+       __u64                   _data[0];
+       struct bch_extent_ptr   start[];
+} __attribute__((packed, aligned(8)));
 
 struct bch_extent {
        struct bch_val          v;
@@ -642,7 +642,6 @@ struct bch_extent {
        __u64                   _data[0];
        union bch_extent_entry  start[];
 } __attribute__((packed, aligned(8)));
-BKEY_VAL_TYPE(extent,          BCH_EXTENT);
 
 struct bch_reservation {
        struct bch_val          v;
@@ -651,7 +650,6 @@ struct bch_reservation {
        __u8                    nr_replicas;
        __u8                    pad[3];
 } __attribute__((packed, aligned(8)));
-BKEY_VAL_TYPE(reservation,     BCH_RESERVATION);
 
 /* Maximum size (in u64s) a single pointer could be: */
 #define BKEY_EXTENT_PTR_U64s_MAX\
@@ -679,12 +677,6 @@ BKEY_VAL_TYPE(reservation, BCH_RESERVATION);
 
 #define BCACHEFS_ROOT_INO      4096
 
-enum bch_inode_types {
-       BCH_INODE_FS            = 128,
-       BCH_INODE_BLOCKDEV      = 129,
-       BCH_INODE_GENERATION    = 130,
-};
-
 struct bch_inode {
        struct bch_val          v;
 
@@ -693,7 +685,6 @@ struct bch_inode {
        __le16                  bi_mode;
        __u8                    fields[0];
 } __attribute__((packed, aligned(8)));
-BKEY_VAL_TYPE(inode,           BCH_INODE_FS);
 
 struct bch_inode_generation {
        struct bch_val          v;
@@ -701,7 +692,6 @@ struct bch_inode_generation {
        __le32                  bi_generation;
        __le32                  pad;
 } __attribute__((packed, aligned(8)));
-BKEY_VAL_TYPE(inode_generation,        BCH_INODE_GENERATION);
 
 #define BCH_INODE_FIELDS()                                     \
        BCH_INODE_FIELD(bi_atime,                       64)     \
@@ -766,24 +756,6 @@ enum {
 LE32_BITMASK(INODE_STR_HASH,   struct bch_inode, bi_flags, 20, 24);
 LE32_BITMASK(INODE_NR_FIELDS,  struct bch_inode, bi_flags, 24, 32);
 
-struct bch_inode_blockdev {
-       struct bch_val          v;
-
-       __le64                  i_size;
-       __le64                  i_flags;
-
-       /* Seconds: */
-       __le64                  i_ctime;
-       __le64                  i_mtime;
-
-       __uuid_t                i_uuid;
-       __u8                    i_label[32];
-} __attribute__((packed, aligned(8)));
-BKEY_VAL_TYPE(inode_blockdev,  BCH_INODE_BLOCKDEV);
-
-/* Thin provisioned volume, or cache for another block device? */
-LE64_BITMASK(CACHED_DEV,       struct bch_inode_blockdev, i_flags, 0,  1)
-
 /* Dirents */
 
 /*
@@ -797,11 +769,6 @@ LE64_BITMASK(CACHED_DEV,   struct bch_inode_blockdev, i_flags, 0,  1)
  * collision:
  */
 
-enum {
-       BCH_DIRENT              = 128,
-       BCH_DIRENT_WHITEOUT     = 129,
-};
-
 struct bch_dirent {
        struct bch_val          v;
 
@@ -816,7 +783,6 @@ struct bch_dirent {
 
        __u8                    d_name[];
 } __attribute__((packed, aligned(8)));
-BKEY_VAL_TYPE(dirent,          BCH_DIRENT);
 
 #define BCH_NAME_MAX   (U8_MAX * sizeof(u64) -                         \
                         sizeof(struct bkey) -                          \
@@ -825,16 +791,11 @@ BKEY_VAL_TYPE(dirent,             BCH_DIRENT);
 
 /* Xattrs */
 
-enum {
-       BCH_XATTR               = 128,
-       BCH_XATTR_WHITEOUT      = 129,
-};
-
-#define BCH_XATTR_INDEX_USER                   0
-#define BCH_XATTR_INDEX_POSIX_ACL_ACCESS       1
-#define BCH_XATTR_INDEX_POSIX_ACL_DEFAULT      2
-#define BCH_XATTR_INDEX_TRUSTED                        3
-#define BCH_XATTR_INDEX_SECURITY               4
+#define KEY_TYPE_XATTR_INDEX_USER                      0
+#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS  1
+#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT 2
+#define KEY_TYPE_XATTR_INDEX_TRUSTED                   3
+#define KEY_TYPE_XATTR_INDEX_SECURITY          4
 
 struct bch_xattr {
        struct bch_val          v;
@@ -843,14 +804,9 @@ struct bch_xattr {
        __le16                  x_val_len;
        __u8                    x_name[];
 } __attribute__((packed, aligned(8)));
-BKEY_VAL_TYPE(xattr,           BCH_XATTR);
 
 /* Bucket/allocation information: */
 
-enum {
-       BCH_ALLOC               = 128,
-};
-
 enum {
        BCH_ALLOC_FIELD_READ_TIME       = 0,
        BCH_ALLOC_FIELD_WRITE_TIME      = 1,
@@ -862,14 +818,9 @@ struct bch_alloc {
        __u8                    gen;
        __u8                    data[];
 } __attribute__((packed, aligned(8)));
-BKEY_VAL_TYPE(alloc,   BCH_ALLOC);
 
 /* Quotas: */
 
-enum {
-       BCH_QUOTA               = 128,
-};
-
 enum quota_types {
        QTYP_USR                = 0,
        QTYP_GRP                = 1,
@@ -892,14 +843,9 @@ struct bch_quota {
        struct bch_val          v;
        struct bch_quota_counter c[Q_COUNTERS];
 } __attribute__((packed, aligned(8)));
-BKEY_VAL_TYPE(quota,   BCH_QUOTA);
 
 /* Erasure coding */
 
-enum {
-       BCH_STRIPE              = 128,
-};
-
 struct bch_stripe {
        struct bch_val          v;
        __le16                  sectors;
@@ -913,7 +859,6 @@ struct bch_stripe {
 
        struct bch_extent_ptr   ptrs[0];
 } __attribute__((packed, aligned(8)));
-BKEY_VAL_TYPE(stripe,  BCH_STRIPE);
 
 /* Optional/variable size superblock sections: */
 
@@ -1149,15 +1094,21 @@ struct bch_sb_field_clean {
 /* Superblock: */
 
 /*
- * Version 8:  BCH_SB_ENCODED_EXTENT_MAX_BITS
- *             BCH_MEMBER_DATA_ALLOWED
- * Version 9:  incompatible extent nonce change
+ * New versioning scheme:
+ * One common version number for all on disk data structures - superblock, btree
+ * nodes, journal entries
  */
+#define BCH_JSET_VERSION_OLD                   2
+#define BCH_BSET_VERSION_OLD                   3
+
+enum bcachefs_metadata_version {
+       bcachefs_metadata_version_min                   = 9,
+       bcachefs_metadata_version_new_versioning        = 10,
+       bcachefs_metadata_version_bkey_renumber         = 10,
+       bcachefs_metadata_version_max                   = 11,
+};
 
-#define BCH_SB_VERSION_MIN             7
-#define BCH_SB_VERSION_EXTENT_MAX      8
-#define BCH_SB_VERSION_EXTENT_NONCE_V1 9
-#define BCH_SB_VERSION_MAX             9
+#define bcachefs_metadata_version_current      (bcachefs_metadata_version_max - 1)
 
 #define BCH_SB_SECTOR                  8
 #define BCH_SB_MEMBERS_MAX             64 /* XXX kill */
@@ -1176,6 +1127,9 @@ struct bch_sb_layout {
 /*
  * @offset     - sector where this sb was written
  * @version    - on disk format version
+ * @version_min        - Oldest metadata version this filesystem contains; so we can
+ *               safely drop compatibility code and refuse to mount filesystems
+ *               we'd need it for
  * @magic      - identifies as a bcachefs superblock (BCACHE_MAGIC)
  * @seq                - incremented each time superblock is written
  * @uuid       - used for generating various magic numbers and identifying
@@ -1369,11 +1323,6 @@ static inline __u64 __bset_magic(struct bch_sb *sb)
 
 /* Journal */
 
-#define BCACHE_JSET_VERSION_UUIDv1     1
-#define BCACHE_JSET_VERSION_UUID       1       /* Always latest UUID format */
-#define BCACHE_JSET_VERSION_JKEYS      2
-#define BCACHE_JSET_VERSION            2
-
 #define JSET_KEYS_U64s (sizeof(struct jset_entry) / sizeof(__u64))
 
 #define BCH_JSET_ENTRY_TYPES()                 \
@@ -1453,35 +1402,26 @@ LE32_BITMASK(JSET_BIG_ENDIAN,   struct jset, flags, 4, 5);
 
 /* Btree: */
 
-#define DEFINE_BCH_BTREE_IDS()                                 \
-       DEF_BTREE_ID(EXTENTS,   0, "extents")                   \
-       DEF_BTREE_ID(INODES,    1, "inodes")                    \
-       DEF_BTREE_ID(DIRENTS,   2, "dirents")                   \
-       DEF_BTREE_ID(XATTRS,    3, "xattrs")                    \
-       DEF_BTREE_ID(ALLOC,     4, "alloc")                     \
-       DEF_BTREE_ID(QUOTAS,    5, "quotas")                    \
-       DEF_BTREE_ID(EC,        6, "erasure_coding")
-
-#define DEF_BTREE_ID(kwd, val, name) BTREE_ID_##kwd = val,
+#define BCH_BTREE_IDS()                                \
+       x(EXTENTS,      0, "extents")                   \
+       x(INODES,       1, "inodes")                    \
+       x(DIRENTS,      2, "dirents")                   \
+       x(XATTRS,       3, "xattrs")                    \
+       x(ALLOC,        4, "alloc")                     \
+       x(QUOTAS,       5, "quotas")                    \
+       x(EC,           6, "erasure_coding")
 
 enum btree_id {
-       DEFINE_BCH_BTREE_IDS()
+#define x(kwd, val, name) BTREE_ID_##kwd = val,
+       BCH_BTREE_IDS()
+#undef x
        BTREE_ID_NR
 };
 
-#undef DEF_BTREE_ID
-
 #define BTREE_MAX_DEPTH                4U
 
 /* Btree nodes */
 
-/* Version 1: Seed pointer into btree node checksum
- */
-#define BCACHE_BSET_CSUM               1
-#define BCACHE_BSET_KEY_v1             2
-#define BCACHE_BSET_JOURNAL_SEQ                3
-#define BCACHE_BSET_VERSION            3
-
 /*
  * Btree nodes
  *
index d7e022ba2027607ace1846983bdbc0012caa1e4f..d35cdde299c49985bd1ffcc62868e26330e63001 100644 (file)
@@ -488,7 +488,7 @@ enum bkey_pack_pos_ret bch2_bkey_pack_pos_lossy(struct bkey_packed *out,
        pack_state_finish(&state, out);
        out->u64s       = f->key_u64s;
        out->format     = KEY_FORMAT_LOCAL_BTREE;
-       out->type       = KEY_TYPE_DELETED;
+       out->type       = KEY_TYPE_deleted;
 
 #ifdef CONFIG_BCACHEFS_DEBUG
        if (exact) {
index 9679631a7e89205081d6fe6f8f6c7fe7614aa0ff..44044fcd6f9f10bbbbba3baa948750ccd0cf8c45 100644 (file)
@@ -61,10 +61,12 @@ static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes)
        k->u64s = BKEY_U64s + DIV_ROUND_UP(bytes, sizeof(u64));
 }
 
-#define bkey_deleted(_k)       ((_k)->type == KEY_TYPE_DELETED)
+#define bkey_val_end(_k)       vstruct_idx((_k).v, bkey_val_u64s((_k).k))
+
+#define bkey_deleted(_k)       ((_k)->type == KEY_TYPE_deleted)
 
 #define bkey_whiteout(_k)                              \
-       ((_k)->type == KEY_TYPE_DELETED || (_k)->type == KEY_TYPE_DISCARD)
+       ((_k)->type == KEY_TYPE_deleted || (_k)->type == KEY_TYPE_discard)
 
 #define bkey_packed_typecheck(_k)                                      \
 ({                                                                     \
@@ -439,7 +441,15 @@ static inline struct bkey_s_c bkey_i_to_s_c(const struct bkey_i *k)
  * bkey_i_extent to a bkey_i - since that's always safe, instead of conversion
  * functions.
  */
-#define __BKEY_VAL_ACCESSORS(name, nr, _assert)                                \
+#define BKEY_VAL_ACCESSORS(name)                                       \
+struct bkey_i_##name {                                                 \
+       union {                                                         \
+               struct bkey             k;                              \
+               struct bkey_i           k_i;                            \
+       };                                                              \
+       struct bch_##name               v;                              \
+};                                                                     \
+                                                                       \
 struct bkey_s_c_##name {                                               \
        union {                                                         \
        struct {                                                        \
@@ -464,20 +474,20 @@ struct bkey_s_##name {                                                    \
                                                                        \
 static inline struct bkey_i_##name *bkey_i_to_##name(struct bkey_i *k) \
 {                                                                      \
-       _assert(k->k.type, nr);                                         \
+       EBUG_ON(k->k.type != KEY_TYPE_##name);                          \
        return container_of(&k->k, struct bkey_i_##name, k);            \
 }                                                                      \
                                                                        \
 static inline const struct bkey_i_##name *                             \
 bkey_i_to_##name##_c(const struct bkey_i *k)                           \
 {                                                                      \
-       _assert(k->k.type, nr);                                         \
+       EBUG_ON(k->k.type != KEY_TYPE_##name);                          \
        return container_of(&k->k, struct bkey_i_##name, k);            \
 }                                                                      \
                                                                        \
 static inline struct bkey_s_##name bkey_s_to_##name(struct bkey_s k)   \
 {                                                                      \
-       _assert(k.k->type, nr);                                         \
+       EBUG_ON(k.k->type != KEY_TYPE_##name);                          \
        return (struct bkey_s_##name) {                                 \
                .k = k.k,                                               \
                .v = container_of(k.v, struct bch_##name, v),           \
@@ -486,7 +496,7 @@ static inline struct bkey_s_##name bkey_s_to_##name(struct bkey_s k)        \
                                                                        \
 static inline struct bkey_s_c_##name bkey_s_c_to_##name(struct bkey_s_c k)\
 {                                                                      \
-       _assert(k.k->type, nr);                                         \
+       EBUG_ON(k.k->type != KEY_TYPE_##name);                          \
        return (struct bkey_s_c_##name) {                               \
                .k = k.k,                                               \
                .v = container_of(k.v, struct bch_##name, v),           \
@@ -512,7 +522,7 @@ name##_i_to_s_c(const struct bkey_i_##name *k)                              \
                                                                        \
 static inline struct bkey_s_##name bkey_i_to_s_##name(struct bkey_i *k)        \
 {                                                                      \
-       _assert(k->k.type, nr);                                         \
+       EBUG_ON(k->k.type != KEY_TYPE_##name);                          \
        return (struct bkey_s_##name) {                                 \
                .k = &k->k,                                             \
                .v = container_of(&k->v, struct bch_##name, v),         \
@@ -522,27 +532,13 @@ static inline struct bkey_s_##name bkey_i_to_s_##name(struct bkey_i *k)   \
 static inline struct bkey_s_c_##name                                   \
 bkey_i_to_s_c_##name(const struct bkey_i *k)                           \
 {                                                                      \
-       _assert(k->k.type, nr);                                         \
+       EBUG_ON(k->k.type != KEY_TYPE_##name);                          \
        return (struct bkey_s_c_##name) {                               \
                .k = &k->k,                                             \
                .v = container_of(&k->v, struct bch_##name, v),         \
        };                                                              \
 }                                                                      \
                                                                        \
-static inline struct bch_##name *                                      \
-bkey_p_##name##_val(const struct bkey_format *f,                       \
-                   struct bkey_packed *k)                              \
-{                                                                      \
-       return container_of(bkeyp_val(f, k), struct bch_##name, v);     \
-}                                                                      \
-                                                                       \
-static inline const struct bch_##name *                                        \
-bkey_p_c_##name##_val(const struct bkey_format *f,                     \
-                     const struct bkey_packed *k)                      \
-{                                                                      \
-       return container_of(bkeyp_val(f, k), struct bch_##name, v);     \
-}                                                                      \
-                                                                       \
 static inline struct bkey_i_##name *bkey_##name##_init(struct bkey_i *_k)\
 {                                                                      \
        struct bkey_i_##name *k =                                       \
@@ -550,45 +546,23 @@ static inline struct bkey_i_##name *bkey_##name##_init(struct bkey_i *_k)\
                                                                        \
        bkey_init(&k->k);                                               \
        memset(&k->v, 0, sizeof(k->v));                                 \
-       k->k.type = nr;                                                 \
+       k->k.type = KEY_TYPE_##name;                                    \
        set_bkey_val_bytes(&k->k, sizeof(k->v));                        \
                                                                        \
        return k;                                                       \
 }
 
-#define __BKEY_VAL_ASSERT(_type, _nr)  EBUG_ON(_type != _nr)
-
-#define BKEY_VAL_ACCESSORS(name, _nr)                                  \
-       static inline void __bch_##name##_assert(u8 type, u8 nr)        \
-       {                                                               \
-               EBUG_ON(type != _nr);                                   \
-       }                                                               \
-                                                                       \
-       __BKEY_VAL_ACCESSORS(name, _nr, __bch_##name##_assert)
-
-BKEY_VAL_ACCESSORS(cookie,             KEY_TYPE_COOKIE);
-
-static inline void __bch2_extent_assert(u8 type, u8 nr)
-{
-       EBUG_ON(type != BCH_EXTENT && type != BCH_EXTENT_CACHED);
-}
-
-__BKEY_VAL_ACCESSORS(extent,           BCH_EXTENT, __bch2_extent_assert);
-BKEY_VAL_ACCESSORS(reservation,                BCH_RESERVATION);
-
-BKEY_VAL_ACCESSORS(inode,              BCH_INODE_FS);
-BKEY_VAL_ACCESSORS(inode_blockdev,     BCH_INODE_BLOCKDEV);
-BKEY_VAL_ACCESSORS(inode_generation,   BCH_INODE_GENERATION);
-
-BKEY_VAL_ACCESSORS(dirent,             BCH_DIRENT);
-
-BKEY_VAL_ACCESSORS(xattr,              BCH_XATTR);
-
-BKEY_VAL_ACCESSORS(alloc,              BCH_ALLOC);
-
-BKEY_VAL_ACCESSORS(quota,              BCH_QUOTA);
-
-BKEY_VAL_ACCESSORS(stripe,             BCH_STRIPE);
+BKEY_VAL_ACCESSORS(cookie);
+BKEY_VAL_ACCESSORS(btree_ptr);
+BKEY_VAL_ACCESSORS(extent);
+BKEY_VAL_ACCESSORS(reservation);
+BKEY_VAL_ACCESSORS(inode);
+BKEY_VAL_ACCESSORS(inode_generation);
+BKEY_VAL_ACCESSORS(dirent);
+BKEY_VAL_ACCESSORS(xattr);
+BKEY_VAL_ACCESSORS(alloc);
+BKEY_VAL_ACCESSORS(quota);
+BKEY_VAL_ACCESSORS(stripe);
 
 /* byte order helpers */
 
index 81c66950668c252b34cebacdb32b494fe57f9964..f518062d896b8d0cd88140d7a35d154721fa43a6 100644 (file)
 #include "quota.h"
 #include "xattr.h"
 
-const struct bkey_ops bch2_bkey_ops[] = {
-       [BKEY_TYPE_EXTENTS]     = bch2_bkey_extent_ops,
-       [BKEY_TYPE_INODES]      = bch2_bkey_inode_ops,
-       [BKEY_TYPE_DIRENTS]     = bch2_bkey_dirent_ops,
-       [BKEY_TYPE_XATTRS]      = bch2_bkey_xattr_ops,
-       [BKEY_TYPE_ALLOC]       = bch2_bkey_alloc_ops,
-       [BKEY_TYPE_QUOTAS]      = bch2_bkey_quota_ops,
-       [BKEY_TYPE_EC]          = bch2_bkey_ec_ops,
-       [BKEY_TYPE_BTREE]       = bch2_bkey_btree_ops,
+const char * const bch_bkey_types[] = {
+#define x(name, nr) #name,
+       BCH_BKEY_TYPES()
+#undef x
+       NULL
 };
 
-const char *bch2_bkey_val_invalid(struct bch_fs *c, enum bkey_type type,
-                                 struct bkey_s_c k)
+static const char *deleted_key_invalid(const struct bch_fs *c,
+                                       struct bkey_s_c k)
 {
-       const struct bkey_ops *ops = &bch2_bkey_ops[type];
+       return NULL;
+}
+
+const struct bkey_ops bch2_bkey_ops_deleted = {
+       .key_invalid = deleted_key_invalid,
+};
+
+const struct bkey_ops bch2_bkey_ops_discard = {
+       .key_invalid = deleted_key_invalid,
+};
 
-       switch (k.k->type) {
-       case KEY_TYPE_DELETED:
-       case KEY_TYPE_DISCARD:
-               return NULL;
+static const char *empty_val_key_invalid(const struct bch_fs *c, struct bkey_s_c k)
+{
+       if (bkey_val_bytes(k.k))
+               return "value size should be zero";
 
-       case KEY_TYPE_ERROR:
-               return bkey_val_bytes(k.k) != 0
-                       ? "value size should be zero"
-                       : NULL;
+       return NULL;
+}
 
-       case KEY_TYPE_COOKIE:
-               return bkey_val_bytes(k.k) != sizeof(struct bch_cookie)
-                       ? "incorrect value size"
-                       : NULL;
+const struct bkey_ops bch2_bkey_ops_error = {
+       .key_invalid = empty_val_key_invalid,
+};
 
-       default:
-               if (k.k->type < KEY_TYPE_GENERIC_NR)
-                       return "invalid type";
+static const char *key_type_cookie_invalid(const struct bch_fs *c,
+                                          struct bkey_s_c k)
+{
+       if (bkey_val_bytes(k.k) != sizeof(struct bch_cookie))
+               return "incorrect value size";
 
-               return ops->key_invalid(c, k);
-       }
+       return NULL;
 }
 
-const char *__bch2_bkey_invalid(struct bch_fs *c, enum bkey_type type,
-                             struct bkey_s_c k)
+const struct bkey_ops bch2_bkey_ops_cookie = {
+       .key_invalid = key_type_cookie_invalid,
+};
+
+const struct bkey_ops bch2_bkey_ops_whiteout = {
+       .key_invalid = empty_val_key_invalid,
+};
+
+static const struct bkey_ops bch2_bkey_ops[] = {
+#define x(name, nr) [KEY_TYPE_##name]  = bch2_bkey_ops_##name,
+       BCH_BKEY_TYPES()
+#undef x
+};
+
+const char *bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k)
 {
-       const struct bkey_ops *ops = &bch2_bkey_ops[type];
+       if (k.k->type >= KEY_TYPE_MAX)
+               return "invalid type";
+
+       return bch2_bkey_ops[k.k->type].key_invalid(c, k);
+}
 
+const char *__bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
+                               enum btree_node_type type)
+{
        if (k.k->u64s < BKEY_U64s)
                return "u64s too small";
 
-       if (!ops->is_extents) {
-               if (k.k->size)
-                       return "nonzero size field";
-       } else {
+       if (btree_node_type_is_extents(type)) {
                if ((k.k->size == 0) != bkey_deleted(k.k))
                        return "bad size field";
+       } else {
+               if (k.k->size)
+                       return "nonzero size field";
        }
 
-       if (ops->is_extents &&
-           !k.k->size &&
-           !bkey_deleted(k.k))
-               return "zero size field";
-
        if (k.k->p.snapshot)
                return "nonzero snapshot";
 
@@ -82,11 +100,11 @@ const char *__bch2_bkey_invalid(struct bch_fs *c, enum bkey_type type,
        return NULL;
 }
 
-const char *bch2_bkey_invalid(struct bch_fs *c, enum bkey_type type,
-                             struct bkey_s_c k)
+const char *bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
+                             enum btree_node_type type)
 {
-       return __bch2_bkey_invalid(c, type, k) ?:
-               bch2_bkey_val_invalid(c, type, k);
+       return __bch2_bkey_invalid(c, k, type) ?:
+               bch2_bkey_val_invalid(c, k);
 }
 
 const char *bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k)
@@ -102,24 +120,22 @@ const char *bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k)
 
 void bch2_bkey_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k)
 {
-       enum bkey_type type = btree_node_type(b);
-       const struct bkey_ops *ops = &bch2_bkey_ops[type];
+       const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type];
        const char *invalid;
 
        BUG_ON(!k.k->u64s);
 
-       invalid = bch2_bkey_invalid(c, type, k) ?:
+       invalid = bch2_bkey_invalid(c, k, btree_node_type(b)) ?:
                bch2_bkey_in_btree_node(b, k);
        if (invalid) {
                char buf[160];
 
-               bch2_bkey_val_to_text(&PBUF(buf), c, type, k);
+               bch2_bkey_val_to_text(&PBUF(buf), c, k);
                bch2_fs_bug(c, "invalid bkey %s: %s", buf, invalid);
                return;
        }
 
-       if (k.k->type >= KEY_TYPE_GENERIC_NR &&
-           ops->key_debugcheck)
+       if (ops->key_debugcheck)
                ops->key_debugcheck(c, b, k);
 }
 
@@ -144,46 +160,90 @@ void bch2_bkey_to_text(struct printbuf *out, const struct bkey *k)
 }
 
 void bch2_val_to_text(struct printbuf *out, struct bch_fs *c,
-                     enum bkey_type type, struct bkey_s_c k)
-{
-       const struct bkey_ops *ops = &bch2_bkey_ops[type];
-
-       switch (k.k->type) {
-       case KEY_TYPE_DELETED:
-               pr_buf(out, " deleted");
-               break;
-       case KEY_TYPE_DISCARD:
-               pr_buf(out, " discard");
-               break;
-       case KEY_TYPE_ERROR:
-               pr_buf(out, " error");
-               break;
-       case KEY_TYPE_COOKIE:
-               pr_buf(out, " cookie");
-               break;
-       default:
-               if (k.k->type >= KEY_TYPE_GENERIC_NR && ops->val_to_text)
-                       ops->val_to_text(out, c, k);
-               break;
-       }
+                     struct bkey_s_c k)
+{
+       const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type];
+
+       if (likely(ops->val_to_text))
+               ops->val_to_text(out, c, k);
+       else
+               pr_buf(out, " %s", bch_bkey_types[k.k->type]);
 }
 
 void bch2_bkey_val_to_text(struct printbuf *out, struct bch_fs *c,
-                          enum bkey_type type, struct bkey_s_c k)
+                          struct bkey_s_c k)
 {
        bch2_bkey_to_text(out, k.k);
        pr_buf(out, ": ");
-       bch2_val_to_text(out, c, type, k);
+       bch2_val_to_text(out, c, k);
 }
 
-void bch2_bkey_swab(enum bkey_type type,
-                  const struct bkey_format *f,
-                  struct bkey_packed *k)
+void bch2_bkey_swab(const struct bkey_format *f,
+                   struct bkey_packed *k)
 {
-       const struct bkey_ops *ops = &bch2_bkey_ops[type];
+       const struct bkey_ops *ops = &bch2_bkey_ops[k->type];
 
        bch2_bkey_swab_key(f, k);
 
        if (ops->swab)
                ops->swab(f, k);
 }
+
+bool bch2_bkey_normalize(struct bch_fs *c, struct bkey_s k)
+{
+       const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type];
+
+       return ops->key_normalize
+               ? ops->key_normalize(c, k)
+               : false;
+}
+
+enum merge_result bch2_bkey_merge(struct bch_fs *c,
+                                 struct bkey_i *l, struct bkey_i *r)
+{
+       const struct bkey_ops *ops = &bch2_bkey_ops[l->k.type];
+
+       if (!key_merging_disabled(c) &&
+           ops->key_merge &&
+           l->k.type == r->k.type &&
+           !bversion_cmp(l->k.version, r->k.version) &&
+           !bkey_cmp(l->k.p, bkey_start_pos(&r->k)))
+               return ops->key_merge(c, l, r);
+
+       return BCH_MERGE_NOMERGE;
+}
+
+static const struct old_bkey_type {
+       u8              btree_node_type;
+       u8              old;
+       u8              new;
+} bkey_renumber_table[] = {
+       {BKEY_TYPE_BTREE,       128, KEY_TYPE_btree_ptr         },
+       {BKEY_TYPE_EXTENTS,     128, KEY_TYPE_extent            },
+       {BKEY_TYPE_EXTENTS,     129, KEY_TYPE_extent            },
+       {BKEY_TYPE_EXTENTS,     130, KEY_TYPE_reservation       },
+       {BKEY_TYPE_INODES,      128, KEY_TYPE_inode             },
+       {BKEY_TYPE_INODES,      130, KEY_TYPE_inode_generation  },
+       {BKEY_TYPE_DIRENTS,     128, KEY_TYPE_dirent            },
+       {BKEY_TYPE_DIRENTS,     129, KEY_TYPE_whiteout          },
+       {BKEY_TYPE_XATTRS,      128, KEY_TYPE_xattr             },
+       {BKEY_TYPE_XATTRS,      129, KEY_TYPE_whiteout          },
+       {BKEY_TYPE_ALLOC,       128, KEY_TYPE_alloc             },
+       {BKEY_TYPE_QUOTAS,      128, KEY_TYPE_quota             },
+};
+
+void bch2_bkey_renumber(enum btree_node_type btree_node_type,
+                       struct bkey_packed *k,
+                       int write)
+{
+       const struct old_bkey_type *i;
+
+       for (i = bkey_renumber_table;
+            i < bkey_renumber_table + ARRAY_SIZE(bkey_renumber_table);
+            i++)
+               if (btree_node_type == i->btree_node_type &&
+                   k->type == (write ? i->new : i->old)) {
+                       k->type = write ? i->old : i->new;
+                       break;
+               }
+}
index 62b86a8e2ba85950f25f1b3773191cd8ce7cd7b8..a4bfd2aef5bf6b81359e1c3de6d188224b0b1976 100644 (file)
@@ -4,24 +4,12 @@
 
 #include "bkey.h"
 
-#define DEF_BTREE_ID(kwd, val, name) BKEY_TYPE_##kwd = val,
-
-enum bkey_type {
-       DEFINE_BCH_BTREE_IDS()
-       BKEY_TYPE_BTREE,
-};
-
-#undef DEF_BTREE_ID
-
-/* Type of a key in btree @id at level @level: */
-static inline enum bkey_type bkey_type(unsigned level, enum btree_id id)
-{
-       return level ? BKEY_TYPE_BTREE : (enum bkey_type) id;
-}
-
 struct bch_fs;
 struct btree;
 struct bkey;
+enum btree_node_type;
+
+extern const char * const bch_bkey_types[];
 
 enum merge_result {
        BCH_MERGE_NOMERGE,
@@ -34,12 +22,6 @@ enum merge_result {
        BCH_MERGE_MERGE,
 };
 
-typedef bool (*key_filter_fn)(struct bch_fs *, struct btree *,
-                             struct bkey_s);
-typedef enum merge_result (*key_merge_fn)(struct bch_fs *,
-                                         struct btree *,
-                                         struct bkey_i *, struct bkey_i *);
-
 struct bkey_ops {
        /* Returns reason for being invalid if invalid, else NULL: */
        const char *    (*key_invalid)(const struct bch_fs *,
@@ -49,41 +31,34 @@ struct bkey_ops {
        void            (*val_to_text)(struct printbuf *, struct bch_fs *,
                                       struct bkey_s_c);
        void            (*swab)(const struct bkey_format *, struct bkey_packed *);
-       key_filter_fn   key_normalize;
-       key_merge_fn    key_merge;
-       bool            is_extents;
+       bool            (*key_normalize)(struct bch_fs *, struct bkey_s);
+       enum merge_result (*key_merge)(struct bch_fs *,
+                                      struct bkey_i *, struct bkey_i *);
 };
 
-static inline bool bkey_type_needs_gc(enum bkey_type type)
-{
-       switch (type) {
-       case BKEY_TYPE_BTREE:
-       case BKEY_TYPE_EXTENTS:
-       case BKEY_TYPE_EC:
-               return true;
-       default:
-               return false;
-       }
-}
-
-const char *bch2_bkey_val_invalid(struct bch_fs *, enum bkey_type,
-                                 struct bkey_s_c);
-const char *__bch2_bkey_invalid(struct bch_fs *, enum bkey_type, struct bkey_s_c);
-const char *bch2_bkey_invalid(struct bch_fs *, enum bkey_type, struct bkey_s_c);
+const char *bch2_bkey_val_invalid(struct bch_fs *, struct bkey_s_c);
+const char *__bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c,
+                               enum btree_node_type);
+const char *bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c,
+                             enum btree_node_type);
 const char *bch2_bkey_in_btree_node(struct btree *, struct bkey_s_c);
 
 void bch2_bkey_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c);
 
 void bch2_bpos_to_text(struct printbuf *, struct bpos);
 void bch2_bkey_to_text(struct printbuf *, const struct bkey *);
-void bch2_val_to_text(struct printbuf *, struct bch_fs *, enum bkey_type,
+void bch2_val_to_text(struct printbuf *, struct bch_fs *,
                      struct bkey_s_c);
 void bch2_bkey_val_to_text(struct printbuf *, struct bch_fs *,
-                          enum bkey_type, struct bkey_s_c);
+                          struct bkey_s_c);
+
+void bch2_bkey_swab(const struct bkey_format *, struct bkey_packed *);
+
+bool bch2_bkey_normalize(struct bch_fs *, struct bkey_s);
 
-void bch2_bkey_swab(enum bkey_type, const struct bkey_format *,
-                   struct bkey_packed *);
+enum merge_result bch2_bkey_merge(struct bch_fs *,
+                                 struct bkey_i *, struct bkey_i *);
 
-extern const struct bkey_ops bch2_bkey_ops[];
+void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int);
 
 #endif /* _BCACHEFS_BKEY_METHODS_H */
index 706ca77d4b172a8ebeb123c8f9603124949d69bb..12825c1b292ff8df33b53b7c45fb5930bf170ffd 100644 (file)
@@ -257,7 +257,7 @@ static void extent_sort_append(struct bch_fs *c,
        bch2_bkey_unpack(b, &tmp.k, k);
 
        if (*prev &&
-           bch2_extent_merge(c, b, (void *) *prev, &tmp.k))
+           bch2_bkey_merge(c, (void *) *prev, &tmp.k))
                return;
 
        if (*prev) {
@@ -375,7 +375,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
 }
 
 /* Sort + repack in a new format: */
-static struct btree_nr_keys
+struct btree_nr_keys
 bch2_sort_repack(struct bset *dst, struct btree *src,
                 struct btree_node_iter *src_iter,
                 struct bkey_format *out_f,
@@ -411,18 +411,12 @@ bch2_sort_repack_merge(struct bch_fs *c,
                       struct bset *dst, struct btree *src,
                       struct btree_node_iter *iter,
                       struct bkey_format *out_f,
-                      bool filter_whiteouts,
-                      key_filter_fn filter,
-                      key_merge_fn merge)
+                      bool filter_whiteouts)
 {
        struct bkey_packed *k, *prev = NULL, *out;
        struct btree_nr_keys nr;
        BKEY_PADDED(k) tmp;
 
-       if (!filter && !merge)
-               return bch2_sort_repack(dst, src, iter, out_f,
-                                       filter_whiteouts);
-
        memset(&nr, 0, sizeof(nr));
 
        while ((k = bch2_btree_node_iter_next_all(iter, src))) {
@@ -435,14 +429,15 @@ bch2_sort_repack_merge(struct bch_fs *c,
                 */
                bch2_bkey_unpack(src, &tmp.k, k);
 
-               if (filter && filter(c, src, bkey_i_to_s(&tmp.k)))
+               if (filter_whiteouts &&
+                   bch2_bkey_normalize(c, bkey_i_to_s(&tmp.k)))
                        continue;
 
                /* prev is always unpacked, for key merging: */
 
                if (prev &&
-                   merge &&
-                   merge(c, src, (void *) prev, &tmp.k) == BCH_MERGE_MERGE)
+                   bch2_bkey_merge(c, (void *) prev, &tmp.k) ==
+                   BCH_MERGE_MERGE)
                        continue;
 
                /*
@@ -606,7 +601,7 @@ unsigned bch2_sort_extent_whiteouts(struct bkey_packed *dst,
                        continue;
 
                EBUG_ON(bkeyp_val_u64s(f, in));
-               EBUG_ON(in->type != KEY_TYPE_DISCARD);
+               EBUG_ON(in->type != KEY_TYPE_discard);
 
                r.k = bkey_unpack_key(iter->b, in);
 
index 6b1661dd221a408a9fece2115f2982d6e315bec1..397009181eaee233d0cf820d9cd2a775d87e76da 100644 (file)
@@ -47,13 +47,14 @@ bch2_extent_sort_fix_overlapping(struct bch_fs *, struct bset *,
                                 struct btree_node_iter_large *);
 
 struct btree_nr_keys
+bch2_sort_repack(struct bset *, struct btree *,
+                struct btree_node_iter *,
+                struct bkey_format *, bool);
+struct btree_nr_keys
 bch2_sort_repack_merge(struct bch_fs *,
                       struct bset *, struct btree *,
                       struct btree_node_iter *,
-                      struct bkey_format *,
-                      bool,
-                      key_filter_fn,
-                      key_merge_fn);
+                      struct bkey_format *, bool);
 
 unsigned bch2_sort_keys(struct bkey_packed *,
                        struct sort_iter *, bool);
index 5d03036620b904e80e7b4fe137adf615fbf4a97e..329ffb0b6b3d8609dee95f5be98eff9f937349ac 100644 (file)
@@ -397,7 +397,7 @@ bch2_bkey_prev_all(struct btree *b, struct bset_tree *t, struct bkey_packed *k)
 static inline struct bkey_packed *
 bch2_bkey_prev(struct btree *b, struct bset_tree *t, struct bkey_packed *k)
 {
-       return bch2_bkey_prev_filter(b, t, k, KEY_TYPE_DISCARD + 1);
+       return bch2_bkey_prev_filter(b, t, k, KEY_TYPE_discard + 1);
 }
 
 enum bch_extent_overlap {
@@ -529,7 +529,7 @@ bch2_btree_node_iter_peek_all(struct btree_node_iter *iter,
 static inline struct bkey_packed *
 bch2_btree_node_iter_peek(struct btree_node_iter *iter, struct btree *b)
 {
-       return bch2_btree_node_iter_peek_filter(iter, b, KEY_TYPE_DISCARD + 1);
+       return bch2_btree_node_iter_peek_filter(iter, b, KEY_TYPE_discard + 1);
 }
 
 static inline struct bkey_packed *
@@ -555,7 +555,7 @@ bch2_btree_node_iter_prev_all(struct btree_node_iter *iter, struct btree *b)
 static inline struct bkey_packed *
 bch2_btree_node_iter_prev(struct btree_node_iter *iter, struct btree *b)
 {
-       return bch2_btree_node_iter_prev_filter(iter, b, KEY_TYPE_DISCARD + 1);
+       return bch2_btree_node_iter_prev_filter(iter, b, KEY_TYPE_discard + 1);
 }
 
 struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *,
index 846d5e816aa27895298540cdf6ca8cb494d78a57..b748afc778f4d3589aaab6b01b76239283941c90 100644 (file)
@@ -6,20 +6,17 @@
 #include "btree_iter.h"
 #include "btree_locking.h"
 #include "debug.h"
-#include "extents.h"
 #include "trace.h"
 
 #include <linux/prefetch.h>
 
-#define DEF_BTREE_ID(kwd, val, name) name,
-
 const char * const bch2_btree_ids[] = {
-       DEFINE_BCH_BTREE_IDS()
+#define x(kwd, val, name) name,
+       BCH_BTREE_IDS()
+#undef x
        NULL
 };
 
-#undef DEF_BTREE_ID
-
 void bch2_recalc_btree_reserve(struct bch_fs *c)
 {
        unsigned i, reserve = 16;
@@ -100,7 +97,7 @@ static struct btree *btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp)
        if (!b)
                return NULL;
 
-       bkey_extent_init(&b->key);
+       bkey_btree_ptr_init(&b->key);
        six_lock_init(&b->lock);
        lockdep_set_novalidate_class(&b->lock);
        INIT_LIST_HEAD(&b->list);
@@ -117,7 +114,7 @@ void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
        rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params);
 
        /* Cause future lookups for this node to fail: */
-       bkey_i_to_extent(&b->key)->v._data[0] = 0;
+       PTR_HASH(&b->key) = 0;
 }
 
 int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
@@ -604,7 +601,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
                /* raced with another fill: */
 
                /* mark as unhashed... */
-               bkey_i_to_extent(&b->key)->v._data[0] = 0;
+               PTR_HASH(&b->key) = 0;
 
                mutex_lock(&bc->lock);
                list_add(&b->list, &bc->freeable);
@@ -906,8 +903,7 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
               b->data->min_key.offset,
               b->data->max_key.inode,
               b->data->max_key.offset);
-       bch2_val_to_text(out, c, BKEY_TYPE_BTREE,
-                        bkey_i_to_s_c(&b->key));
+       bch2_val_to_text(out, c, bkey_i_to_s_c(&b->key));
        pr_buf(out, "\n"
               "    format: u64s %u fields %u %u %u %u %u\n"
               "    unpack fn len: %u\n"
index cb7f66fc8bd48f5b39bc7a25b93880fafe43ab20..7bd2bc84160d056aef6c4a575311f1541915774d 100644 (file)
@@ -4,7 +4,6 @@
 
 #include "bcachefs.h"
 #include "btree_types.h"
-#include "extents.h"
 
 struct btree_iter;
 
@@ -37,12 +36,13 @@ void bch2_fs_btree_cache_exit(struct bch_fs *);
 int bch2_fs_btree_cache_init(struct bch_fs *);
 void bch2_fs_btree_cache_init_early(struct btree_cache *);
 
-#define PTR_HASH(_k)   (bkey_i_to_extent_c(_k)->v._data[0])
+#define PTR_HASH(_k)   *((u64 *) &bkey_i_to_btree_ptr_c(_k)->v)
 
 /* is btree node in hash table? */
 static inline bool btree_node_hashed(struct btree *b)
 {
-       return bkey_extent_is_data(&b->key.k) && PTR_HASH(&b->key);
+       return b->key.k.type == KEY_TYPE_btree_ptr &&
+               PTR_HASH(&b->key);
 }
 
 #define for_each_cached_btree(_b, _c, _tbl, _iter, _pos)               \
index a849f9e320b3509c7bbafb42714a07aec676855d..85fc181e76a81de7253e0b06f0702008ea30b8f5 100644 (file)
@@ -112,137 +112,11 @@ static void btree_node_range_checks(struct bch_fs *c, struct btree *b,
 
 /* marking of btree keys/nodes: */
 
-static void ptr_gen_recalc_oldest(struct bch_fs *c,
-                                 const struct bch_extent_ptr *ptr,
-                                 u8 *max_stale)
-{
-       struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-       size_t b = PTR_BUCKET_NR(ca, ptr);
-
-       if (gen_after(ca->oldest_gens[b], ptr->gen))
-               ca->oldest_gens[b] = ptr->gen;
-
-       *max_stale = max(*max_stale, ptr_stale(ca, ptr));
-}
-
-static void ptr_gens_recalc_oldest(struct bch_fs *c, enum bkey_type type,
-                                  struct bkey_s_c k, u8 *max_stale)
-{
-       const struct bch_extent_ptr *ptr;
-
-       switch (type) {
-       case BKEY_TYPE_BTREE:
-       case BKEY_TYPE_EXTENTS:
-               switch (k.k->type) {
-               case BCH_EXTENT:
-               case BCH_EXTENT_CACHED: {
-                       struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-
-                       extent_for_each_ptr(e, ptr)
-                               ptr_gen_recalc_oldest(c, ptr, max_stale);
-                       break;
-               }
-               }
-               break;
-       case BKEY_TYPE_EC:
-               switch (k.k->type) {
-               case BCH_STRIPE: {
-                       struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
-
-                       for (ptr = s.v->ptrs;
-                            ptr < s.v->ptrs + s.v->nr_blocks;
-                            ptr++)
-                               ptr_gen_recalc_oldest(c, ptr, max_stale);
-               }
-               }
-       default:
-               break;
-       }
-}
-
-static int ptr_gen_check(struct bch_fs *c,
-                        enum bkey_type type,
-                        const struct bch_extent_ptr *ptr)
-{
-       struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-       size_t b = PTR_BUCKET_NR(ca, ptr);
-       struct bucket *g = PTR_BUCKET(ca, ptr);
-       int ret = 0;
-
-       if (mustfix_fsck_err_on(!g->mark.gen_valid, c,
-                               "found ptr with missing gen in alloc btree,\n"
-                               "type %u gen %u",
-                               type, ptr->gen)) {
-               g->_mark.gen = ptr->gen;
-               g->_mark.gen_valid = 1;
-               set_bit(b, ca->buckets_dirty);
-       }
-
-       if (mustfix_fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c,
-                               "%u ptr gen in the future: %u > %u",
-                               type, ptr->gen, g->mark.gen)) {
-               g->_mark.gen = ptr->gen;
-               g->_mark.gen_valid = 1;
-               set_bit(b, ca->buckets_dirty);
-               set_bit(BCH_FS_FIXED_GENS, &c->flags);
-       }
-fsck_err:
-       return ret;
-}
-
-static int ptr_gens_check(struct bch_fs *c, enum bkey_type type,
-                         struct bkey_s_c k)
-{
-       const struct bch_extent_ptr *ptr;
-       int ret = 0;
-
-       switch (type) {
-       case BKEY_TYPE_BTREE:
-       case BKEY_TYPE_EXTENTS:
-               switch (k.k->type) {
-               case BCH_EXTENT:
-               case BCH_EXTENT_CACHED: {
-                       struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-
-                       extent_for_each_ptr(e, ptr) {
-                               ret = ptr_gen_check(c, type, ptr);
-                               if (ret)
-                                       return ret;
-
-                       }
-                       break;
-               }
-               }
-               break;
-       case BKEY_TYPE_EC:
-               switch (k.k->type) {
-               case BCH_STRIPE: {
-                       struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
-
-                       for (ptr = s.v->ptrs;
-                            ptr < s.v->ptrs + s.v->nr_blocks;
-                            ptr++) {
-                               ret = ptr_gen_check(c, type, ptr);
-                               if (ret)
-                                       return ret;
-                       }
-               }
-               }
-               break;
-       default:
-               break;
-       }
-
-       return ret;
-}
-
-/*
- * For runtime mark and sweep:
- */
-static int bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type,
-                           struct bkey_s_c k,
+static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
                            u8 *max_stale, bool initial)
 {
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const struct bch_extent_ptr *ptr;
        struct gc_pos pos = { 0 };
        unsigned flags =
                BCH_BUCKET_MARK_GC|
@@ -257,23 +131,50 @@ static int bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type,
                        atomic64_set(&c->key_version, k.k->version.lo);
 
                if (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
-                   fsck_err_on(!bch2_bkey_replicas_marked(c, type, k,
-                                                          false), c,
+                   fsck_err_on(!bch2_bkey_replicas_marked(c, k, false), c,
                                "superblock not marked as containing replicas (type %u)",
-                               type)) {
-                       ret = bch2_mark_bkey_replicas(c, type, k);
+                               k.k->type)) {
+                       ret = bch2_mark_bkey_replicas(c, k);
                        if (ret)
                                return ret;
                }
 
-               ret = ptr_gens_check(c, type, k);
-               if (ret)
-                       return ret;
+               bkey_for_each_ptr(ptrs, ptr) {
+                       struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+                       size_t b = PTR_BUCKET_NR(ca, ptr);
+                       struct bucket *g = PTR_BUCKET(ca, ptr);
+
+                       if (mustfix_fsck_err_on(!g->mark.gen_valid, c,
+                                       "found ptr with missing gen in alloc btree,\n"
+                                       "type %u gen %u",
+                                       k.k->type, ptr->gen)) {
+                               g->_mark.gen = ptr->gen;
+                               g->_mark.gen_valid = 1;
+                               set_bit(b, ca->buckets_dirty);
+                       }
+
+                       if (mustfix_fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c,
+                                       "%u ptr gen in the future: %u > %u",
+                                       k.k->type, ptr->gen, g->mark.gen)) {
+                               g->_mark.gen = ptr->gen;
+                               g->_mark.gen_valid = 1;
+                               set_bit(b, ca->buckets_dirty);
+                               set_bit(BCH_FS_FIXED_GENS, &c->flags);
+                       }
+               }
        }
 
-       bch2_mark_key(c, type, k, true, k.k->size, pos, NULL, 0, flags);
+       bkey_for_each_ptr(ptrs, ptr) {
+               struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+               size_t b = PTR_BUCKET_NR(ca, ptr);
+
+               if (gen_after(ca->oldest_gens[b], ptr->gen))
+                       ca->oldest_gens[b] = ptr->gen;
+
+               *max_stale = max(*max_stale, ptr_stale(ca, ptr));
+       }
 
-       ptr_gens_recalc_oldest(c, type, k, max_stale);
+       bch2_mark_key(c, k, true, k.k->size, pos, NULL, 0, flags);
 fsck_err:
        return ret;
 }
@@ -281,7 +182,6 @@ fsck_err:
 static int btree_gc_mark_node(struct bch_fs *c, struct btree *b,
                              u8 *max_stale, bool initial)
 {
-       enum bkey_type type = btree_node_type(b);
        struct btree_node_iter iter;
        struct bkey unpacked;
        struct bkey_s_c k;
@@ -289,14 +189,14 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b,
 
        *max_stale = 0;
 
-       if (!bkey_type_needs_gc(type))
+       if (!btree_node_type_needs_gc(btree_node_type(b)))
                return 0;
 
        for_each_btree_node_key_unpack(b, k, &iter,
                                       &unpacked) {
                bch2_bkey_debugcheck(c, b, k);
 
-               ret = bch2_gc_mark_key(c, type, k, max_stale, initial);
+               ret = bch2_gc_mark_key(c, k, max_stale, initial);
                if (ret)
                        break;
        }
@@ -310,7 +210,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
        struct btree_iter iter;
        struct btree *b;
        struct range_checks r;
-       unsigned depth = bkey_type_needs_gc(btree_id) ? 0 : 1;
+       unsigned depth = btree_node_type_needs_gc(btree_id) ? 0 : 1;
        u8 max_stale;
        int ret = 0;
 
@@ -364,7 +264,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
 
        b = c->btree_roots[btree_id].b;
        if (!btree_node_fake(b))
-               bch2_gc_mark_key(c, BKEY_TYPE_BTREE, bkey_i_to_s_c(&b->key),
+               bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key),
                                 &max_stale, initial);
        gc_pos_set(c, gc_pos_btree_root(b->btree_id));
 
@@ -391,13 +291,13 @@ static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal,
 
        for (i = 0; i < BTREE_ID_NR; i++) {
                enum btree_id id = ids[i];
-               enum bkey_type type = bkey_type(0, id);
+               enum btree_node_type type = __btree_node_type(0, id);
 
                int ret = bch2_gc_btree(c, id, initial);
                if (ret)
                        return ret;
 
-               if (journal && bkey_type_needs_gc(type)) {
+               if (journal && btree_node_type_needs_gc(type)) {
                        struct bkey_i *k, *n;
                        struct jset_entry *j;
                        struct journal_replay *r;
@@ -405,8 +305,8 @@ static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal,
 
                        list_for_each_entry(r, journal, list)
                                for_each_jset_key(k, n, j, &r->j) {
-                                       if (type == bkey_type(j->level, j->btree_id)) {
-                                               ret = bch2_gc_mark_key(c, type,
+                                       if (type == __btree_node_type(j->level, j->btree_id)) {
+                                               ret = bch2_gc_mark_key(c,
                                                        bkey_i_to_s_c(k),
                                                        &max_stale, initial);
                                                if (ret)
@@ -507,8 +407,7 @@ static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
 
        for_each_pending_btree_node_free(c, as, d)
                if (d->index_update_done)
-                       bch2_mark_key(c, BKEY_TYPE_BTREE,
-                                     bkey_i_to_s_c(&d->key),
+                       bch2_mark_key(c, bkey_i_to_s_c(&d->key),
                                      true, 0,
                                      pos, NULL, 0,
                                      BCH_BUCKET_MARK_GC);
index bb77564b9463c04efd04e3e540a5d63b5ec68f30..89ee72ac49f6fbf1f97823dc1213ea00321ea9c1 100644 (file)
@@ -4,8 +4,6 @@
 
 #include "btree_types.h"
 
-enum bkey_type;
-
 void bch2_coalesce(struct bch_fs *);
 int bch2_gc(struct bch_fs *, struct list_head *, bool);
 void bch2_gc_thread_stop(struct bch_fs *);
@@ -58,9 +56,9 @@ static inline int gc_pos_cmp(struct gc_pos l, struct gc_pos r)
 static inline enum gc_phase btree_id_to_gc_phase(enum btree_id id)
 {
        switch (id) {
-#define DEF_BTREE_ID(n, v, s) case BTREE_ID_##n: return GC_PHASE_BTREE_##n;
-       DEFINE_BCH_BTREE_IDS()
-#undef DEF_BTREE_ID
+#define x(n, v, s) case BTREE_ID_##n: return GC_PHASE_BTREE_##n;
+       BCH_BTREE_IDS()
+#undef x
        default:
                BUG();
        }
index 506bf9e8df380c601f4e2d2663bb2481afb1ac44..f205bddd814d1cc0db6f7ffb4b05a794c72796ff 100644 (file)
@@ -392,12 +392,16 @@ void bch2_btree_sort_into(struct bch_fs *c,
 
        bch2_btree_node_iter_init_from_start(&src_iter, src);
 
-       nr = bch2_sort_repack_merge(c, btree_bset_first(dst),
-                       src, &src_iter,
-                       &dst->format,
-                       true,
-                       btree_node_ops(src)->key_normalize,
-                       btree_node_ops(src)->key_merge);
+       if (btree_node_is_extents(src))
+               nr = bch2_sort_repack_merge(c, btree_bset_first(dst),
+                               src, &src_iter,
+                               &dst->format,
+                               true);
+       else
+               nr = bch2_sort_repack(btree_bset_first(dst),
+                               src, &src_iter,
+                               &dst->format,
+                               true);
 
        bch2_time_stats_update(&c->times[BCH_TIME_btree_sort], start_time);
 
@@ -598,8 +602,8 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
 {
        struct bkey_packed *k, *prev = NULL;
        struct bpos prev_pos = POS_MIN;
-       enum bkey_type type = btree_node_type(b);
        bool seen_non_whiteout = false;
+       unsigned version;
        const char *err;
        int ret = 0;
 
@@ -645,13 +649,12 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
                             "invalid bkey format: %s", err);
        }
 
-       if (btree_err_on(le16_to_cpu(i->version) != BCACHE_BSET_VERSION,
-                        BTREE_ERR_FIXABLE, c, b, i,
-                        "unsupported bset version")) {
-               i->version = cpu_to_le16(BCACHE_BSET_VERSION);
-               i->u64s = 0;
-               return 0;
-       }
+       version = le16_to_cpu(i->version);
+       btree_err_on((version != BCH_BSET_VERSION_OLD &&
+                     version < bcachefs_metadata_version_min) ||
+                    version >= bcachefs_metadata_version_max,
+                    BTREE_ERR_FATAL, c, b, i,
+                    "unsupported bset version");
 
        if (btree_err_on(b->written + sectors > c->opts.btree_node_size,
                         BTREE_ERR_FIXABLE, c, b, i,
@@ -700,17 +703,21 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
                }
 
                if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN)
-                       bch2_bkey_swab(type, &b->format, k);
+                       bch2_bkey_swab(&b->format, k);
+
+               if (!write &&
+                   version < bcachefs_metadata_version_bkey_renumber)
+                       bch2_bkey_renumber(btree_node_type(b), k, write);
 
                u = bkey_disassemble(b, k, &tmp);
 
-               invalid = __bch2_bkey_invalid(c, type, u) ?:
+               invalid = __bch2_bkey_invalid(c, u, btree_node_type(b)) ?:
                        bch2_bkey_in_btree_node(b, u) ?:
-                       (write ? bch2_bkey_val_invalid(c, type, u) : NULL);
+                       (write ? bch2_bkey_val_invalid(c, u) : NULL);
                if (invalid) {
                        char buf[160];
 
-                       bch2_bkey_val_to_text(&PBUF(buf), c, type, u);
+                       bch2_bkey_val_to_text(&PBUF(buf), c, u);
                        btree_err(BTREE_ERR_FIXABLE, c, b, i,
                                  "invalid bkey:\n%s\n%s", invalid, buf);
 
@@ -720,6 +727,10 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
                        continue;
                }
 
+               if (write &&
+                   version < bcachefs_metadata_version_bkey_renumber)
+                       bch2_bkey_renumber(btree_node_type(b), k, write);
+
                /*
                 * with the separate whiteouts thing (used for extents), the
                 * second set of keys actually can have whiteouts too, so we
@@ -885,17 +896,16 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
 
        i = &b->data->keys;
        for (k = i->start; k != vstruct_last(i);) {
-               enum bkey_type type = btree_node_type(b);
                struct bkey tmp;
                struct bkey_s_c u = bkey_disassemble(b, k, &tmp);
-               const char *invalid = bch2_bkey_val_invalid(c, type, u);
+               const char *invalid = bch2_bkey_val_invalid(c, u);
 
                if (invalid ||
                    (inject_invalid_keys(c) &&
                     !bversion_cmp(u.k->version, MAX_VERSION))) {
                        char buf[160];
 
-                       bch2_bkey_val_to_text(&PBUF(buf), c, type, u);
+                       bch2_bkey_val_to_text(&PBUF(buf), c, u);
                        btree_err(BTREE_ERR_FIXABLE, c, b, i,
                                  "invalid bkey %s: %s", buf, invalid);
 
@@ -964,7 +974,9 @@ start:
 
                bch2_mark_io_failure(&failed, &rb->pick);
 
-               can_retry = bch2_btree_pick_ptr(c, b, &failed, &rb->pick) > 0;
+               can_retry = bch2_bkey_pick_read_device(c,
+                               bkey_i_to_s_c(&b->key),
+                               &failed, &rb->pick) > 0;
 
                if (!bio->bi_status &&
                    !bch2_btree_node_read_done(c, b, can_retry))
@@ -1007,7 +1019,8 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
 
        trace_btree_read(c, b);
 
-       ret = bch2_btree_pick_ptr(c, b, NULL, &pick);
+       ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key),
+                                        NULL, &pick);
        if (bch2_fs_fatal_err_on(ret <= 0, c,
                        "btree node read error: no device to read from")) {
                set_btree_node_read_error(b);
@@ -1135,8 +1148,8 @@ static void bch2_btree_node_write_error(struct bch_fs *c,
 {
        struct btree *b         = wbio->wbio.bio.bi_private;
        __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
-       struct bkey_i_extent *new_key;
-       struct bkey_s_extent e;
+       struct bkey_i_btree_ptr *new_key;
+       struct bkey_s_btree_ptr bp;
        struct bch_extent_ptr *ptr;
        struct btree_iter iter;
        int ret;
@@ -1160,13 +1173,13 @@ retry:
 
        bkey_copy(&tmp.k, &b->key);
 
-       new_key = bkey_i_to_extent(&tmp.k);
-       e = extent_i_to_s(new_key);
+       new_key = bkey_i_to_btree_ptr(&tmp.k);
+       bp = btree_ptr_i_to_s(new_key);
 
-       bch2_extent_drop_ptrs(e, ptr,
+       bch2_bkey_drop_ptrs(bkey_i_to_s(&tmp.k), ptr,
                bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev));
 
-       if (!bch2_extent_nr_ptrs(e.c))
+       if (!bch2_bkey_nr_ptrs(bp.s_c))
                goto err;
 
        ret = bch2_btree_node_update_key(c, &iter, b, new_key);
@@ -1269,12 +1282,11 @@ static void btree_node_write_endio(struct bio *bio)
 static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
                                   struct bset *i, unsigned sectors)
 {
-       const struct bch_extent_ptr *ptr;
        unsigned whiteout_u64s = 0;
        int ret;
 
-       extent_for_each_ptr(bkey_i_to_s_c_extent(&b->key), ptr)
-               break;
+       if (bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), BKEY_TYPE_BTREE))
+               return -1;
 
        ret = validate_bset(c, b, i, sectors, &whiteout_u64s, WRITE, false);
        if (ret)
@@ -1292,7 +1304,6 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
        struct btree_node *bn = NULL;
        struct btree_node_entry *bne = NULL;
        BKEY_PADDED(key) k;
-       struct bkey_s_extent e;
        struct bch_extent_ptr *ptr;
        struct sort_iter sort_iter;
        struct nonce nonce;
@@ -1300,6 +1311,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
        u64 seq = 0;
        bool used_mempool;
        unsigned long old, new;
+       bool validate_before_checksum = false;
        void *data;
 
        if (test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags))
@@ -1433,11 +1445,21 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
        BUG_ON(BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN);
        BUG_ON(i->seq != b->data->keys.seq);
 
-       i->version = cpu_to_le16(BCACHE_BSET_VERSION);
+       i->version = c->sb.version < bcachefs_metadata_version_new_versioning
+               ? cpu_to_le16(BCH_BSET_VERSION_OLD)
+               : cpu_to_le16(c->sb.version);
        SET_BSET_CSUM_TYPE(i, bch2_meta_checksum_type(c));
 
+       if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)))
+               validate_before_checksum = true;
+
+       /* validate_bset will be modifying: */
+       if (le16_to_cpu(i->version) <
+           bcachefs_metadata_version_bkey_renumber)
+               validate_before_checksum = true;
+
        /* if we're going to be encrypting, check metadata validity first: */
-       if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)) &&
+       if (validate_before_checksum &&
            validate_bset_for_write(c, b, i, sectors_to_write))
                goto err;
 
@@ -1451,7 +1473,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
                bne->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
 
        /* if we're not encrypting, check metadata after checksumming: */
-       if (!bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)) &&
+       if (!validate_before_checksum &&
            validate_bset_for_write(c, b, i, sectors_to_write))
                goto err;
 
@@ -1506,9 +1528,8 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
         */
 
        bkey_copy(&k.key, &b->key);
-       e = bkey_i_to_s_extent(&k.key);
 
-       extent_for_each_ptr(e, ptr)
+       bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&k.key)), ptr)
                ptr->offset += b->written;
 
        b->written += sectors_to_write;
index afc43722c1fcd0fc3243b094f7263781defd0572..4720061e9562376621782369c91e1b0ccf0e6600 100644 (file)
@@ -433,7 +433,7 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter,
         * whiteouts)
         */
        k = b->level || iter->flags & BTREE_ITER_IS_EXTENTS
-               ? bch2_btree_node_iter_prev_filter(&tmp, b, KEY_TYPE_DISCARD)
+               ? bch2_btree_node_iter_prev_filter(&tmp, b, KEY_TYPE_discard)
                : bch2_btree_node_iter_prev_all(&tmp, b);
        if (k && btree_iter_pos_cmp(iter, b, k) > 0) {
                char buf[100];
@@ -622,7 +622,7 @@ static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter,
                 * signal to bch2_btree_iter_peek_slot() that we're currently at
                 * a hole
                 */
-               u->type = KEY_TYPE_DELETED;
+               u->type = KEY_TYPE_deleted;
                return bkey_s_c_null;
        }
 
index 7eecaa6cd5a25540e0e6f833c53ed66276f35126..b4a826369a57b3c8d41b87d2a2c0a9255dd681bc 100644 (file)
@@ -405,20 +405,45 @@ static inline unsigned bset_byte_offset(struct btree *b, void *i)
        return i - (void *) b->data;
 }
 
+enum btree_node_type {
+#define x(kwd, val, name) BKEY_TYPE_##kwd = val,
+       BCH_BTREE_IDS()
+#undef x
+       BKEY_TYPE_BTREE,
+};
+
+/* Type of a key in btree @id at level @level: */
+static inline enum btree_node_type __btree_node_type(unsigned level, enum btree_id id)
+{
+       return level ? BKEY_TYPE_BTREE : (enum btree_node_type) id;
+}
+
 /* Type of keys @b contains: */
-static inline enum bkey_type btree_node_type(struct btree *b)
+static inline enum btree_node_type btree_node_type(struct btree *b)
 {
-       return b->level ? BKEY_TYPE_BTREE : b->btree_id;
+       return __btree_node_type(b->level, b->btree_id);
 }
 
-static inline const struct bkey_ops *btree_node_ops(struct btree *b)
+static inline bool btree_node_type_is_extents(enum btree_node_type type)
 {
-       return &bch2_bkey_ops[btree_node_type(b)];
+       return type == BKEY_TYPE_EXTENTS;
 }
 
 static inline bool btree_node_is_extents(struct btree *b)
 {
-       return btree_node_type(b) == BKEY_TYPE_EXTENTS;
+       return btree_node_type_is_extents(btree_node_type(b));
+}
+
+static inline bool btree_node_type_needs_gc(enum btree_node_type type)
+{
+       switch (type) {
+       case BKEY_TYPE_BTREE:
+       case BKEY_TYPE_EXTENTS:
+       case BKEY_TYPE_EC:
+               return true;
+       default:
+               return false;
+       }
 }
 
 struct btree_root {
index f6b0082235af0ef119419abf540bd205c9a23dc5..d1647f6eb476022ea3a20e931716415179121c47 100644 (file)
@@ -120,7 +120,7 @@ int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
 int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *,
                            __le64, unsigned);
 int bch2_btree_node_update_key(struct bch_fs *, struct btree_iter *,
-                              struct btree *, struct bkey_i_extent *);
+                              struct btree *, struct bkey_i_btree_ptr *);
 
 /* new transactional interface: */
 
index 7d7a021416f303209dc1b174e2438ffebc4fff38..22f087098776f477be7073c1e25a2185f6778c64 100644 (file)
@@ -132,13 +132,15 @@ bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b,
 /* Btree node freeing/allocation: */
 
 static bool btree_key_matches(struct bch_fs *c,
-                             struct bkey_s_c_extent l,
-                             struct bkey_s_c_extent r)
+                             struct bkey_s_c l,
+                             struct bkey_s_c r)
 {
+       struct bkey_ptrs_c ptrs1 = bch2_bkey_ptrs_c(l);
+       struct bkey_ptrs_c ptrs2 = bch2_bkey_ptrs_c(r);
        const struct bch_extent_ptr *ptr1, *ptr2;
 
-       extent_for_each_ptr(l, ptr1)
-               extent_for_each_ptr(r, ptr2)
+       bkey_for_each_ptr(ptrs1, ptr1)
+               bkey_for_each_ptr(ptrs2, ptr2)
                        if (ptr1->dev == ptr2->dev &&
                            ptr1->gen == ptr2->gen &&
                            ptr1->offset == ptr2->offset)
@@ -164,8 +166,7 @@ static void bch2_btree_node_free_index(struct btree_update *as, struct btree *b,
 
        for (d = as->pending; d < as->pending + as->nr_pending; d++)
                if (!bkey_cmp(k.k->p, d->key.k.p) &&
-                   btree_key_matches(c, bkey_s_c_to_extent(k),
-                                     bkey_i_to_s_c_extent(&d->key)))
+                   btree_key_matches(c, k, bkey_i_to_s_c(&d->key)))
                        goto found;
        BUG();
 found:
@@ -197,7 +198,7 @@ found:
                       ? gc_pos_btree_node(b)
                       : gc_pos_btree_root(as->btree_id)) >= 0 &&
            gc_pos_cmp(c->gc_pos, gc_phase(GC_PHASE_PENDING_DELETE)) < 0)
-               bch2_mark_key_locked(c, BKEY_TYPE_BTREE,
+               bch2_mark_key_locked(c,
                              bkey_i_to_s_c(&d->key),
                              false, 0, pos,
                              NULL, 0, BCH_BUCKET_MARK_GC);
@@ -270,8 +271,7 @@ static void bch2_btree_node_free_ondisk(struct bch_fs *c,
 {
        BUG_ON(!pending->index_update_done);
 
-       bch2_mark_key(c, BKEY_TYPE_BTREE,
-                     bkey_i_to_s_c(&pending->key),
+       bch2_mark_key(c, bkey_i_to_s_c(&pending->key),
                      false, 0,
                      gc_phase(GC_PHASE_PENDING_DELETE),
                      NULL, 0, 0);
@@ -285,7 +285,6 @@ static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
        struct write_point *wp;
        struct btree *b;
        BKEY_PADDED(k) tmp;
-       struct bkey_i_extent *e;
        struct open_buckets ob = { .nr = 0 };
        struct bch_devs_list devs_have = (struct bch_devs_list) { 0 };
        unsigned nr_reserve;
@@ -336,8 +335,8 @@ retry:
                goto retry;
        }
 
-       e = bkey_extent_init(&tmp.k);
-       bch2_alloc_sectors_append_ptrs(c, wp, e, c->opts.btree_node_size);
+       bkey_btree_ptr_init(&tmp.k);
+       bch2_alloc_sectors_append_ptrs(c, wp, &tmp.k, c->opts.btree_node_size);
 
        bch2_open_bucket_get(c, wp, &ob);
        bch2_alloc_sectors_done(c, wp);
@@ -375,7 +374,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
        b->data->flags = 0;
        SET_BTREE_NODE_ID(b->data, as->btree_id);
        SET_BTREE_NODE_LEVEL(b->data, level);
-       b->data->ptr = bkey_i_to_extent(&b->key)->v.start->ptr;
+       b->data->ptr = bkey_i_to_btree_ptr(&b->key)->v.start[0];
 
        bch2_btree_build_aux_trees(b);
 
@@ -528,8 +527,7 @@ static struct btree_reserve *bch2_btree_reserve_get(struct bch_fs *c,
                        goto err_free;
                }
 
-               ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE,
-                                             bkey_i_to_s_c(&b->key));
+               ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key));
                if (ret)
                        goto err_free;
 
@@ -1072,8 +1070,7 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
        mutex_lock(&c->btree_interior_update_lock);
        percpu_down_read(&c->usage_lock);
 
-       bch2_mark_key_locked(c, BKEY_TYPE_BTREE,
-                     bkey_i_to_s_c(&b->key),
+       bch2_mark_key_locked(c, bkey_i_to_s_c(&b->key),
                      true, 0,
                      gc_pos_btree_root(b->btree_id),
                      &stats, 0, 0);
@@ -1166,11 +1163,9 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
        mutex_lock(&c->btree_interior_update_lock);
        percpu_down_read(&c->usage_lock);
 
-       if (bkey_extent_is_data(&insert->k))
-               bch2_mark_key_locked(c, BKEY_TYPE_BTREE,
-                             bkey_i_to_s_c(insert),
-                             true, 0,
-                             gc_pos_btree_node(b), &stats, 0, 0);
+       bch2_mark_key_locked(c, bkey_i_to_s_c(insert),
+                            true, 0,
+                            gc_pos_btree_node(b), &stats, 0, 0);
 
        while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) &&
               bkey_iter_pos_cmp(b, &insert->k.p, k) > 0)
@@ -1893,7 +1888,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
                                         struct btree_update *as,
                                         struct btree_iter *iter,
                                         struct btree *b, struct btree *new_hash,
-                                        struct bkey_i_extent *new_key)
+                                        struct bkey_i_btree_ptr *new_key)
 {
        struct btree *parent;
        int ret;
@@ -1938,7 +1933,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
         */
        ret = bch2_disk_reservation_add(c, &as->reserve->disk_res,
                        c->opts.btree_node_size *
-                       bch2_extent_nr_ptrs(extent_i_to_s_c(new_key)),
+                       bch2_bkey_nr_ptrs(bkey_i_to_s_c(&new_key->k_i)),
                        BCH_DISK_RESERVATION_NOFAIL|
                        BCH_DISK_RESERVATION_GC_LOCK_HELD);
        BUG_ON(ret);
@@ -1978,8 +1973,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
                mutex_lock(&c->btree_interior_update_lock);
                percpu_down_read(&c->usage_lock);
 
-               bch2_mark_key_locked(c, BKEY_TYPE_BTREE,
-                             bkey_i_to_s_c(&new_key->k_i),
+               bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i),
                              true, 0,
                              gc_pos_btree_root(b->btree_id),
                              &stats, 0, 0);
@@ -2012,7 +2006,8 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
 }
 
 int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
-                              struct btree *b, struct bkey_i_extent *new_key)
+                              struct btree *b,
+                              struct bkey_i_btree_ptr *new_key)
 {
        struct btree *parent = btree_node_parent(iter, b);
        struct btree_update *as = NULL;
@@ -2078,8 +2073,7 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
                        goto err;
        }
 
-       ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE,
-                                     extent_i_to_s_c(new_key).s_c);
+       ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&new_key->k_i));
        if (ret)
                goto err_free_update;
 
@@ -2137,9 +2131,9 @@ void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id)
        b->level        = 0;
        b->btree_id     = id;
 
-       bkey_extent_init(&b->key);
+       bkey_btree_ptr_init(&b->key);
        b->key.k.p = POS_MAX;
-       bkey_i_to_extent(&b->key)->v._data[0] = U64_MAX - id;
+       PTR_HASH(&b->key) = U64_MAX - id;
 
        bch2_bset_init_first(b, &b->data->keys);
        bch2_btree_build_aux_trees(b);
index 4b0d674472db4c1daea994df076feec25f0da627..fd27334cf2a46cd2eef7924faa62bd2a59e30cb4 100644 (file)
@@ -71,7 +71,7 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
                        goto overwrite;
                }
 
-               k->type = KEY_TYPE_DELETED;
+               k->type = KEY_TYPE_deleted;
                bch2_btree_node_iter_fix(iter, b, node_iter, k,
                                         k->u64s, k->u64s);
                bch2_btree_iter_verify(iter, b);
@@ -312,7 +312,6 @@ btree_key_can_insert(struct btree_insert *trans,
                return BTREE_INSERT_BTREE_NODE_FULL;
 
        if (!bch2_bkey_replicas_marked(c,
-                       insert->iter->btree_id,
                        bkey_i_to_s_c(insert->k),
                        true))
                return BTREE_INSERT_NEED_MARK_REPLICAS;
@@ -449,8 +448,8 @@ static inline void btree_insert_entry_checks(struct bch_fs *c,
        BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos));
        BUG_ON(debug_check_bkeys(c) &&
               !bkey_deleted(&i->k->k) &&
-              bch2_bkey_invalid(c, (enum bkey_type) i->iter->btree_id,
-                                bkey_i_to_s_c(i->k)));
+              bch2_bkey_invalid(c, bkey_i_to_s_c(i->k),
+                                i->iter->btree_id));
 }
 
 /**
@@ -585,8 +584,7 @@ err:
                }
 
                bch2_btree_iter_unlock(trans->entries[0].iter);
-               ret = bch2_mark_bkey_replicas(c, i->iter->btree_id,
-                                             bkey_i_to_s_c(i->k))
+               ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(i->k))
                        ?: -EINTR;
                break;
        default:
index 3f4bbf280a78792381a441338da0106d203590d5..d08e95020cef4b756369ca17aed55ec9bb24d998 100644 (file)
@@ -567,7 +567,7 @@ static int __disk_sectors(struct bch_extent_crc_unpacked crc, unsigned sectors)
                                    crc.uncompressed_size));
 }
 
-static s64 ptr_disk_sectors(struct bkey_s_c_extent e,
+static s64 ptr_disk_sectors(const struct bkey *k,
                            struct extent_ptr_decoded p,
                            s64 sectors)
 {
@@ -579,8 +579,8 @@ static s64 ptr_disk_sectors(struct bkey_s_c_extent e,
                        old_sectors = 0;
                        new_sectors = sectors;
                } else {
-                       old_sectors = e.k->size;
-                       new_sectors = e.k->size + sectors;
+                       old_sectors = k->size;
+                       new_sectors = k->size + sectors;
                }
 
                sectors = -__disk_sectors(p.crc, old_sectors)
@@ -596,7 +596,6 @@ static s64 ptr_disk_sectors(struct bkey_s_c_extent e,
  * that with the gc pos seqlock held.
  */
 static void bch2_mark_pointer(struct bch_fs *c,
-                             struct bkey_s_c_extent e,
                              struct extent_ptr_decoded p,
                              s64 sectors, enum bch_data_type data_type,
                              struct bch_fs_usage *fs_usage,
@@ -709,70 +708,54 @@ static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
                            u64 journal_seq, unsigned flags,
                            bool gc)
 {
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const union bch_extent_entry *entry;
+       struct extent_ptr_decoded p;
+       s64 cached_sectors      = 0;
+       s64 dirty_sectors       = 0;
+       s64 ec_sectors          = 0;
+       unsigned replicas       = 0;
+       unsigned ec_redundancy  = 0;
+       unsigned i;
+       int ret;
+
        BUG_ON(!sectors);
 
-       switch (k.k->type) {
-       case BCH_EXTENT:
-       case BCH_EXTENT_CACHED: {
-               struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-               const union bch_extent_entry *entry;
-               struct extent_ptr_decoded p;
-               s64 cached_sectors      = 0;
-               s64 dirty_sectors       = 0;
-               s64 ec_sectors          = 0;
-               unsigned replicas       = 0;
-               unsigned ec_redundancy  = 0;
-               unsigned i;
-               int ret;
-
-               extent_for_each_ptr_decode(e, p, entry) {
-                       s64 disk_sectors = ptr_disk_sectors(e, p, sectors);
-                       s64 adjusted_disk_sectors = disk_sectors;
-
-                       bch2_mark_pointer(c, e, p, disk_sectors, data_type,
-                                         stats, journal_seq, flags, gc);
-
-                       if (!p.ptr.cached)
-                               for (i = 0; i < p.ec_nr; i++) {
-                                       ret = bch2_mark_stripe_ptr(c, p.ec[i],
-                                                       disk_sectors, flags,
-                                                       &adjusted_disk_sectors,
-                                                       &ec_redundancy, gc);
-                                       if (ret)
-                                               return ret;
-                               }
-                       if (!p.ptr.cached)
-                               replicas++;
-
-                       if (p.ptr.cached)
-                               cached_sectors  += adjusted_disk_sectors;
-                       else if (!p.ec_nr)
-                               dirty_sectors   += adjusted_disk_sectors;
-                       else
-                               ec_sectors      += adjusted_disk_sectors;
-               }
+       bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+               s64 disk_sectors = ptr_disk_sectors(k.k, p, sectors);
+               s64 adjusted_disk_sectors = disk_sectors;
 
-               replicas        = clamp_t(unsigned,     replicas,
-                                         1, ARRAY_SIZE(stats->replicas));
-               ec_redundancy   = clamp_t(unsigned,     ec_redundancy,
-                                         1, ARRAY_SIZE(stats->replicas));
+               bch2_mark_pointer(c, p, disk_sectors, data_type,
+                                 stats, journal_seq, flags, gc);
 
-               stats->replicas[0].data[BCH_DATA_CACHED]        += cached_sectors;
-               stats->replicas[replicas - 1].data[data_type]   += dirty_sectors;
-               stats->replicas[ec_redundancy - 1].ec_data      += ec_sectors;
-               break;
+               if (!p.ptr.cached)
+                       for (i = 0; i < p.ec_nr; i++) {
+                               ret = bch2_mark_stripe_ptr(c, p.ec[i],
+                                               disk_sectors, flags,
+                                               &adjusted_disk_sectors,
+                                               &ec_redundancy, gc);
+                               if (ret)
+                                       return ret;
+                       }
+               if (!p.ptr.cached)
+                       replicas++;
+
+               if (p.ptr.cached)
+                       cached_sectors  += adjusted_disk_sectors;
+               else if (!p.ec_nr)
+                       dirty_sectors   += adjusted_disk_sectors;
+               else
+                       ec_sectors      += adjusted_disk_sectors;
        }
-       case BCH_RESERVATION: {
-               unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
 
-               sectors *= replicas;
-               replicas = clamp_t(unsigned, replicas,
-                                  1, ARRAY_SIZE(stats->replicas));
+       replicas        = clamp_t(unsigned,     replicas,
+                                 1, ARRAY_SIZE(stats->replicas));
+       ec_redundancy   = clamp_t(unsigned,     ec_redundancy,
+                                 1, ARRAY_SIZE(stats->replicas));
 
-               stats->replicas[replicas - 1].persistent_reserved += sectors;
-               break;
-       }
-       }
+       stats->replicas[0].data[BCH_DATA_CACHED]        += cached_sectors;
+       stats->replicas[replicas - 1].data[data_type]   += dirty_sectors;
+       stats->replicas[ec_redundancy - 1].ec_data      += ec_sectors;
 
        return 0;
 }
@@ -813,56 +796,49 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
                            u64 journal_seq, unsigned flags,
                            bool gc)
 {
-       switch (k.k->type) {
-       case BCH_STRIPE: {
-               struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
-               size_t idx = s.k->p.offset;
-               struct stripe *m = genradix_ptr(&c->stripes[gc], idx);
-               unsigned i;
-
-               if (!m || (!inserting && !m->alive)) {
-                       bch_err_ratelimited(c, "error marking nonexistent stripe %zu",
-                                           idx);
-                       return -1;
-               }
-
-               if (inserting && m->alive) {
-                       bch_err_ratelimited(c, "error marking stripe %zu: already exists",
-                                           idx);
-                       return -1;
-               }
+       struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
+       size_t idx = s.k->p.offset;
+       struct stripe *m = genradix_ptr(&c->stripes[gc], idx);
+       unsigned i;
 
-               BUG_ON(atomic_read(&m->blocks_nonempty));
+       if (!m || (!inserting && !m->alive)) {
+               bch_err_ratelimited(c, "error marking nonexistent stripe %zu",
+                                   idx);
+               return -1;
+       }
 
-               for (i = 0; i < EC_STRIPE_MAX; i++)
-                       BUG_ON(atomic_read(&m->block_sectors[i]));
+       if (inserting && m->alive) {
+               bch_err_ratelimited(c, "error marking stripe %zu: already exists",
+                                   idx);
+               return -1;
+       }
 
-               if (inserting) {
-                       m->sectors      = le16_to_cpu(s.v->sectors);
-                       m->algorithm    = s.v->algorithm;
-                       m->nr_blocks    = s.v->nr_blocks;
-                       m->nr_redundant = s.v->nr_redundant;
-               }
+       BUG_ON(atomic_read(&m->blocks_nonempty));
 
-               if (!gc) {
-                       if (inserting)
-                               bch2_stripes_heap_insert(c, m, idx);
-                       else
-                               bch2_stripes_heap_del(c, m, idx);
-               } else {
-                       m->alive = inserting;
-               }
+       for (i = 0; i < EC_STRIPE_MAX; i++)
+               BUG_ON(atomic_read(&m->block_sectors[i]));
 
-               bucket_set_stripe(c, s.v, inserting, fs_usage, 0, gc);
-               break;
+       if (inserting) {
+               m->sectors      = le16_to_cpu(s.v->sectors);
+               m->algorithm    = s.v->algorithm;
+               m->nr_blocks    = s.v->nr_blocks;
+               m->nr_redundant = s.v->nr_redundant;
        }
+
+       if (!gc) {
+               if (inserting)
+                       bch2_stripes_heap_insert(c, m, idx);
+               else
+                       bch2_stripes_heap_del(c, m, idx);
+       } else {
+               m->alive = inserting;
        }
 
+       bucket_set_stripe(c, s.v, inserting, fs_usage, 0, gc);
        return 0;
 }
 
-static int __bch2_mark_key(struct bch_fs *c,
-                          enum bkey_type type, struct bkey_s_c k,
+static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
                           bool inserting, s64 sectors,
                           struct bch_fs_usage *stats,
                           u64 journal_seq, unsigned flags,
@@ -870,22 +846,32 @@ static int __bch2_mark_key(struct bch_fs *c,
 {
        int ret = 0;
 
-       switch (type) {
-       case BKEY_TYPE_BTREE:
+       switch (k.k->type) {
+       case KEY_TYPE_btree_ptr:
                ret = bch2_mark_extent(c, k, inserting
                                       ?  c->opts.btree_node_size
                                       : -c->opts.btree_node_size,
                                       BCH_DATA_BTREE,
                                       stats, journal_seq, flags, gc);
                break;
-       case BKEY_TYPE_EXTENTS:
+       case KEY_TYPE_extent:
                ret = bch2_mark_extent(c, k, sectors, BCH_DATA_USER,
                                       stats, journal_seq, flags, gc);
                break;
-       case BKEY_TYPE_EC:
+       case KEY_TYPE_stripe:
                ret = bch2_mark_stripe(c, k, inserting,
                                       stats, journal_seq, flags, gc);
                break;
+       case KEY_TYPE_reservation: {
+               unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
+
+               sectors *= replicas;
+               replicas = clamp_t(unsigned, replicas,
+                                  1, ARRAY_SIZE(stats->replicas));
+
+               stats->replicas[replicas - 1].persistent_reserved += sectors;
+               break;
+       }
        default:
                break;
        }
@@ -894,7 +880,7 @@ static int __bch2_mark_key(struct bch_fs *c,
 }
 
 int bch2_mark_key_locked(struct bch_fs *c,
-                  enum bkey_type type, struct bkey_s_c k,
+                  struct bkey_s_c k,
                   bool inserting, s64 sectors,
                   struct gc_pos pos,
                   struct bch_fs_usage *stats,
@@ -906,7 +892,7 @@ int bch2_mark_key_locked(struct bch_fs *c,
                if (!stats)
                        stats = this_cpu_ptr(c->usage[0]);
 
-               ret = __bch2_mark_key(c, type, k, inserting, sectors,
+               ret = __bch2_mark_key(c, k, inserting, sectors,
                                      stats, journal_seq, flags, false);
                if (ret)
                        return ret;
@@ -914,7 +900,7 @@ int bch2_mark_key_locked(struct bch_fs *c,
 
        if ((flags & BCH_BUCKET_MARK_GC) ||
            gc_visited(c, pos)) {
-               ret = __bch2_mark_key(c, type, k, inserting, sectors,
+               ret = __bch2_mark_key(c, k, inserting, sectors,
                                      this_cpu_ptr(c->usage[1]),
                                      journal_seq, flags, true);
                if (ret)
@@ -924,8 +910,7 @@ int bch2_mark_key_locked(struct bch_fs *c,
        return 0;
 }
 
-int bch2_mark_key(struct bch_fs *c,
-                 enum bkey_type type, struct bkey_s_c k,
+int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
                  bool inserting, s64 sectors,
                  struct gc_pos pos,
                  struct bch_fs_usage *stats,
@@ -934,7 +919,7 @@ int bch2_mark_key(struct bch_fs *c,
        int ret;
 
        percpu_down_read(&c->usage_lock);
-       ret = bch2_mark_key_locked(c, type, k, inserting, sectors,
+       ret = bch2_mark_key_locked(c, k, inserting, sectors,
                                   pos, stats, journal_seq, flags);
        percpu_up_read(&c->usage_lock);
 
@@ -952,20 +937,19 @@ void bch2_mark_update(struct btree_insert *trans,
        struct gc_pos           pos = gc_pos_btree_node(b);
        struct bkey_packed      *_k;
 
-       if (!bkey_type_needs_gc(iter->btree_id))
+       if (!btree_node_type_needs_gc(iter->btree_id))
                return;
 
        percpu_down_read(&c->usage_lock);
 
        if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
-               bch2_mark_key_locked(c, btree_node_type(b),
-                       bkey_i_to_s_c(insert->k), true,
+               bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true,
                        bpos_min(insert->k->k.p, b->key.k.p).offset -
                        bkey_start_offset(&insert->k->k),
                        pos, &stats, trans->journal_res.seq, 0);
 
        while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
-                                                     KEY_TYPE_DISCARD))) {
+                                                     KEY_TYPE_discard))) {
                struct bkey             unpacked;
                struct bkey_s_c         k;
                s64                     sectors = 0;
@@ -994,9 +978,8 @@ void bch2_mark_update(struct btree_insert *trans,
                                sectors = k.k->p.offset - insert->k->k.p.offset;
                                BUG_ON(sectors <= 0);
 
-                               bch2_mark_key_locked(c, btree_node_type(b),
-                                       k, true, sectors, pos, &stats,
-                                       trans->journal_res.seq, 0);
+                               bch2_mark_key_locked(c, k, true, sectors,
+                                       pos, &stats, trans->journal_res.seq, 0);
 
                                sectors = bkey_start_offset(&insert->k->k) -
                                        k.k->p.offset;
@@ -1006,9 +989,8 @@ void bch2_mark_update(struct btree_insert *trans,
                        BUG_ON(sectors >= 0);
                }
 
-               bch2_mark_key_locked(c, btree_node_type(b),
-                       k, false, sectors, pos, &stats,
-                       trans->journal_res.seq, 0);
+               bch2_mark_key_locked(c, k, false, sectors,
+                       pos, &stats, trans->journal_res.seq, 0);
 
                bch2_btree_node_iter_advance(&node_iter, b);
        }
index 884041b53eb936eecb7bc129bb66c18f29e394dc..c584ad1b437519358c1dbb4d02c8f5836fcc45ad 100644 (file)
@@ -220,10 +220,10 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
 #define BCH_BUCKET_MARK_NOATOMIC               (1 << 0)
 #define BCH_BUCKET_MARK_GC                     (1 << 1)
 
-int bch2_mark_key_locked(struct bch_fs *, enum bkey_type, struct bkey_s_c,
+int bch2_mark_key_locked(struct bch_fs *, struct bkey_s_c,
                  bool, s64, struct gc_pos,
                  struct bch_fs_usage *, u64, unsigned);
-int bch2_mark_key(struct bch_fs *, enum bkey_type, struct bkey_s_c,
+int bch2_mark_key(struct bch_fs *, struct bkey_s_c,
                  bool, s64, struct gc_pos,
                  struct bch_fs_usage *, u64, unsigned);
 void bch2_mark_update(struct btree_insert *, struct btree_insert_entry *);
index 0a9efe57d5a9c6c52dbcba67d5bb008b22d7b372..f15c29878a9edd9159757eeb66700c22e5590935 100644 (file)
@@ -56,7 +56,8 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
        v->btree_id     = b->btree_id;
        bch2_btree_keys_init(v, &c->expensive_debug_checks);
 
-       if (bch2_btree_pick_ptr(c, b, NULL, &pick) <= 0)
+       if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key),
+                                      NULL, &pick) <= 0)
                return;
 
        ca = bch_dev_bkey_exists(c, pick.ptr.dev);
@@ -223,8 +224,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
        k = bch2_btree_iter_peek(&iter);
 
        while (k.k && !(err = btree_iter_err(k))) {
-               bch2_bkey_val_to_text(&PBUF(i->buf), i->c,
-                                     bkey_type(0, i->id), k);
+               bch2_bkey_val_to_text(&PBUF(i->buf), i->c, k);
                i->bytes = strlen(i->buf);
                BUG_ON(i->bytes >= PAGE_SIZE);
                i->buf[i->bytes] = '\n';
index c1a611b4d9ec95b3fe6e76bdb0a2237f0ce644b1..80d37c56827202a7f2a9423a04503c12c98f6b81 100644 (file)
@@ -65,8 +65,7 @@ static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
 
 const struct bch_hash_desc bch2_dirent_hash_desc = {
        .btree_id       = BTREE_ID_DIRENTS,
-       .key_type       = BCH_DIRENT,
-       .whiteout_type  = BCH_DIRENT_WHITEOUT,
+       .key_type       = KEY_TYPE_dirent,
        .hash_key       = dirent_hash_key,
        .hash_bkey      = dirent_hash_bkey,
        .cmp_key        = dirent_cmp_key,
@@ -75,58 +74,37 @@ const struct bch_hash_desc bch2_dirent_hash_desc = {
 
 const char *bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k)
 {
-       struct bkey_s_c_dirent d;
+       struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
        unsigned len;
 
-       switch (k.k->type) {
-       case BCH_DIRENT:
-               if (bkey_val_bytes(k.k) < sizeof(struct bch_dirent))
-                       return "value too small";
-
-               d = bkey_s_c_to_dirent(k);
-               len = bch2_dirent_name_bytes(d);
-
-               if (!len)
-                       return "empty name";
+       if (bkey_val_bytes(k.k) < sizeof(struct bch_dirent))
+               return "value too small";
 
-               /*
-                * older versions of bcachefs were buggy and creating dirent
-                * keys that were bigger than necessary:
-                */
-               if (bkey_val_u64s(k.k) > dirent_val_u64s(len + 7))
-                       return "value too big";
+       len = bch2_dirent_name_bytes(d);
+       if (!len)
+               return "empty name";
 
-               if (len > BCH_NAME_MAX)
-                       return "dirent name too big";
+       /*
+        * older versions of bcachefs were buggy and creating dirent
+        * keys that were bigger than necessary:
+        */
+       if (bkey_val_u64s(k.k) > dirent_val_u64s(len + 7))
+               return "value too big";
 
-               return NULL;
-       case BCH_DIRENT_WHITEOUT:
-               return bkey_val_bytes(k.k) != 0
-                       ? "value size should be zero"
-                       : NULL;
+       if (len > BCH_NAME_MAX)
+               return "dirent name too big";
 
-       default:
-               return "invalid type";
-       }
+       return NULL;
 }
 
 void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c,
                         struct bkey_s_c k)
 {
-       struct bkey_s_c_dirent d;
-
-       switch (k.k->type) {
-       case BCH_DIRENT:
-               d = bkey_s_c_to_dirent(k);
-
-               bch_scnmemcpy(out, d.v->d_name,
-                             bch2_dirent_name_bytes(d));
-               pr_buf(out, " -> %llu", d.v->d_inum);
-               break;
-       case BCH_DIRENT_WHITEOUT:
-               pr_buf(out, "whiteout");
-               break;
-       }
+       struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
+
+       bch_scnmemcpy(out, d.v->d_name,
+                     bch2_dirent_name_bytes(d));
+       pr_buf(out, " -> %llu", d.v->d_inum);
 }
 
 static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
@@ -287,7 +265,7 @@ int bch2_dirent_rename(struct btree_trans *trans,
                                 * overwrite old_dst - just make sure to use a
                                 * whiteout when deleting src:
                                 */
-                               new_src->k.type = BCH_DIRENT_WHITEOUT;
+                               new_src->k.type = KEY_TYPE_whiteout;
                        }
                } else {
                        /* Check if we need a whiteout to delete src: */
@@ -298,7 +276,7 @@ int bch2_dirent_rename(struct btree_trans *trans,
                                return ret;
 
                        if (ret)
-                               new_src->k.type = BCH_DIRENT_WHITEOUT;
+                               new_src->k.type = KEY_TYPE_whiteout;
                }
        }
 
@@ -361,7 +339,7 @@ int bch2_empty_dir(struct bch_fs *c, u64 dir_inum)
                if (k.k->p.inode > dir_inum)
                        break;
 
-               if (k.k->type == BCH_DIRENT) {
+               if (k.k->type == KEY_TYPE_dirent) {
                        ret = -ENOTEMPTY;
                        break;
                }
@@ -385,7 +363,7 @@ int bch2_readdir(struct bch_fs *c, struct file *file,
 
        for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
                           POS(inode->v.i_ino, ctx->pos), 0, k) {
-               if (k.k->type != BCH_DIRENT)
+               if (k.k->type != KEY_TYPE_dirent)
                        continue;
 
                dirent = bkey_s_c_to_dirent(k);
index 2afb0baed11a17ed64d0a266a59c94d29763a674..7b47573dcc464faf790dfa8a5fb3e5da21e29d7f 100644 (file)
@@ -9,7 +9,7 @@ extern const struct bch_hash_desc bch2_dirent_hash_desc;
 const char *bch2_dirent_invalid(const struct bch_fs *, struct bkey_s_c);
 void bch2_dirent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
 
-#define bch2_bkey_dirent_ops (struct bkey_ops) {       \
+#define bch2_bkey_ops_dirent (struct bkey_ops) {       \
        .key_invalid    = bch2_dirent_invalid,          \
        .val_to_text    = bch2_dirent_to_text,          \
 }
index 091a1f0a043272095c7b6088919ca31fbc1b8d65..010b9b90f2fcfdd107615931b0770486a2439e07 100644 (file)
@@ -123,49 +123,39 @@ static void *stripe_csum(struct bch_stripe *s, unsigned dev, unsigned csum_idx)
        return csums + (dev * stripe_csums_per_device(s) + csum_idx) * csum_bytes;
 }
 
-const char *bch2_ec_key_invalid(const struct bch_fs *c, struct bkey_s_c k)
+const char *bch2_stripe_invalid(const struct bch_fs *c, struct bkey_s_c k)
 {
+       const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
+
        if (k.k->p.inode)
                return "invalid stripe key";
 
-       switch (k.k->type) {
-       case BCH_STRIPE: {
-               const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
-
-               if (bkey_val_bytes(k.k) < sizeof(*s))
-                       return "incorrect value size";
+       if (bkey_val_bytes(k.k) < sizeof(*s))
+               return "incorrect value size";
 
-               if (bkey_val_u64s(k.k) != stripe_val_u64s(s))
-                       return "incorrect value size";
+       if (bkey_val_u64s(k.k) != stripe_val_u64s(s))
+               return "incorrect value size";
 
-               return NULL;
-       }
-       default:
-               return "invalid type";
-       }
+       return NULL;
 }
 
-void bch2_ec_key_to_text(struct printbuf *out, struct bch_fs *c,
+void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
                         struct bkey_s_c k)
 {
-       switch (k.k->type) {
-       case BCH_STRIPE: {
-               const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
-               unsigned i;
-
-               pr_buf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u",
-                      s->algorithm,
-                      le16_to_cpu(s->sectors),
-                      s->nr_blocks - s->nr_redundant,
-                      s->nr_redundant,
-                      s->csum_type,
-                      1U << s->csum_granularity_bits);
-
-               for (i = 0; i < s->nr_blocks; i++)
-                       pr_buf(out, " %u:%llu", s->ptrs[i].dev,
-                              (u64) s->ptrs[i].offset);
-       }
-       }
+       const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
+       unsigned i;
+
+       pr_buf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u",
+              s->algorithm,
+              le16_to_cpu(s->sectors),
+              s->nr_blocks - s->nr_redundant,
+              s->nr_redundant,
+              s->csum_type,
+              1U << s->csum_granularity_bits);
+
+       for (i = 0; i < s->nr_blocks; i++)
+               pr_buf(out, " %u:%llu", s->ptrs[i].dev,
+                      (u64) s->ptrs[i].offset);
 }
 
 static int ptr_matches_stripe(struct bch_fs *c,
@@ -454,7 +444,7 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
                             POS(0, stripe_idx),
                             BTREE_ITER_SLOTS);
        k = bch2_btree_iter_peek_slot(&iter);
-       if (btree_iter_err(k) || k.k->type != BCH_STRIPE) {
+       if (btree_iter_err(k) || k.k->type != KEY_TYPE_stripe) {
                __bcache_io_error(c,
                        "error doing reconstruct read: stripe not found");
                kfree(buf);
@@ -695,7 +685,7 @@ static void ec_stripe_delete(struct bch_fs *c, size_t idx)
                             POS(0, idx),
                             BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
        k = bch2_btree_iter_peek_slot(&iter);
-       if (btree_iter_err(k) || k.k->type != BCH_STRIPE)
+       if (btree_iter_err(k) || k.k->type != KEY_TYPE_stripe)
                goto out;
 
        v = kmalloc(bkey_val_bytes(k.k), GFP_KERNEL);
index c35de8b1ef64d272384debe52d5f4314a6db2ee8..4a8cade37c7ac7fa4f3afa8ff53c3e9732b4a87a 100644 (file)
@@ -5,13 +5,13 @@
 #include "ec_types.h"
 #include "keylist_types.h"
 
-const char *bch2_ec_key_invalid(const struct bch_fs *, struct bkey_s_c);
-void bch2_ec_key_to_text(struct printbuf *, struct bch_fs *,
+const char *bch2_stripe_invalid(const struct bch_fs *, struct bkey_s_c);
+void bch2_stripe_to_text(struct printbuf *, struct bch_fs *,
                         struct bkey_s_c);
 
-#define bch2_bkey_ec_ops (struct bkey_ops) {           \
-       .key_invalid    = bch2_ec_key_invalid,          \
-       .val_to_text    = bch2_ec_key_to_text,          \
+#define bch2_bkey_ops_stripe (struct bkey_ops) {       \
+       .key_invalid    = bch2_stripe_invalid,          \
+       .val_to_text    = bch2_stripe_to_text,          \
 }
 
 struct bch_read_bio;
index 582499b08f31aa72a90df6d8c429742f49424eca..c9a6f6e4a16593b309d402f434071c3b5e073973 100644 (file)
 #include "util.h"
 #include "xattr.h"
 
-/* Common among btree and extent ptrs */
-
-const struct bch_extent_ptr *
-bch2_extent_has_device(struct bkey_s_c_extent e, unsigned dev)
-{
-       const struct bch_extent_ptr *ptr;
-
-       extent_for_each_ptr(e, ptr)
-               if (ptr->dev == dev)
-                       return ptr;
-
-       return NULL;
-}
-
-void bch2_extent_drop_device(struct bkey_s_extent e, unsigned dev)
-{
-       struct bch_extent_ptr *ptr;
-
-       bch2_extent_drop_ptrs(e, ptr, ptr->dev == dev);
-}
-
-const struct bch_extent_ptr *
-bch2_extent_has_group(struct bch_fs *c, struct bkey_s_c_extent e, unsigned group)
-{
-       const struct bch_extent_ptr *ptr;
-
-       extent_for_each_ptr(e, ptr) {
-               struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-
-               if (ca->mi.group &&
-                   ca->mi.group - 1 == group)
-                       return ptr;
-       }
-
-       return NULL;
-}
-
-const struct bch_extent_ptr *
-bch2_extent_has_target(struct bch_fs *c, struct bkey_s_c_extent e, unsigned target)
-{
-       const struct bch_extent_ptr *ptr;
-
-       extent_for_each_ptr(e, ptr)
-               if (bch2_dev_in_target(c, ptr->dev, target) &&
-                   (!ptr->cached ||
-                    !ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr)))
-                       return ptr;
-
-       return NULL;
-}
-
-unsigned bch2_extent_nr_ptrs(struct bkey_s_c_extent e)
+unsigned bch2_bkey_nr_ptrs(struct bkey_s_c k)
 {
+       struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
        const struct bch_extent_ptr *ptr;
        unsigned nr_ptrs = 0;
 
-       extent_for_each_ptr(e, ptr)
+       bkey_for_each_ptr(p, ptr)
                nr_ptrs++;
 
        return nr_ptrs;
 }
 
-unsigned bch2_extent_nr_dirty_ptrs(struct bkey_s_c k)
+unsigned bch2_bkey_nr_dirty_ptrs(struct bkey_s_c k)
 {
-       struct bkey_s_c_extent e;
-       const struct bch_extent_ptr *ptr;
        unsigned nr_ptrs = 0;
 
        switch (k.k->type) {
-       case BCH_EXTENT:
-       case BCH_EXTENT_CACHED:
-               e = bkey_s_c_to_extent(k);
+       case KEY_TYPE_btree_ptr:
+       case KEY_TYPE_extent: {
+               struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
+               const struct bch_extent_ptr *ptr;
 
-               extent_for_each_ptr(e, ptr)
+               bkey_for_each_ptr(p, ptr)
                        nr_ptrs += !ptr->cached;
+               BUG_ON(!nr_ptrs);
                break;
-
-       case BCH_RESERVATION:
+       }
+       case KEY_TYPE_reservation:
                nr_ptrs = bkey_s_c_to_reservation(k).v->nr_replicas;
                break;
        }
@@ -139,25 +89,216 @@ static unsigned bch2_extent_ptr_durability(struct bch_fs *c,
        return durability;
 }
 
-unsigned bch2_extent_durability(struct bch_fs *c, struct bkey_s_c_extent e)
+unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k)
 {
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
        const union bch_extent_entry *entry;
        struct extent_ptr_decoded p;
        unsigned durability = 0;
 
-       extent_for_each_ptr_decode(e, p, entry)
+       bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
                durability += bch2_extent_ptr_durability(c, p);
 
        return durability;
 }
 
+static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f,
+                                                  unsigned dev)
+{
+       struct bch_dev_io_failures *i;
+
+       for (i = f->devs; i < f->devs + f->nr; i++)
+               if (i->dev == dev)
+                       return i;
+
+       return NULL;
+}
+
+void bch2_mark_io_failure(struct bch_io_failures *failed,
+                         struct extent_ptr_decoded *p)
+{
+       struct bch_dev_io_failures *f = dev_io_failures(failed, p->ptr.dev);
+
+       if (!f) {
+               BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs));
+
+               f = &failed->devs[failed->nr++];
+               f->dev          = p->ptr.dev;
+               f->idx          = p->idx;
+               f->nr_failed    = 1;
+               f->nr_retries   = 0;
+       } else if (p->idx != f->idx) {
+               f->idx          = p->idx;
+               f->nr_failed    = 1;
+               f->nr_retries   = 0;
+       } else {
+               f->nr_failed++;
+       }
+}
+
+/*
+ * returns true if p1 is better than p2:
+ */
+static inline bool ptr_better(struct bch_fs *c,
+                             const struct extent_ptr_decoded p1,
+                             const struct extent_ptr_decoded p2)
+{
+       if (likely(!p1.idx && !p2.idx)) {
+               struct bch_dev *dev1 = bch_dev_bkey_exists(c, p1.ptr.dev);
+               struct bch_dev *dev2 = bch_dev_bkey_exists(c, p2.ptr.dev);
+
+               u64 l1 = atomic64_read(&dev1->cur_latency[READ]);
+               u64 l2 = atomic64_read(&dev2->cur_latency[READ]);
+
+               /* Pick at random, biased in favor of the faster device: */
+
+               return bch2_rand_range(l1 + l2) > l1;
+       }
+
+       if (force_reconstruct_read(c))
+               return p1.idx > p2.idx;
+
+       return p1.idx < p2.idx;
+}
+
+/*
+ * This picks a non-stale pointer, preferably from a device other than @avoid.
+ * Avoid can be NULL, meaning pick any. If there are no non-stale pointers to
+ * other devices, it will still pick a pointer from avoid.
+ */
+int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
+                              struct bch_io_failures *failed,
+                              struct extent_ptr_decoded *pick)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const union bch_extent_entry *entry;
+       struct extent_ptr_decoded p;
+       struct bch_dev_io_failures *f;
+       struct bch_dev *ca;
+       int ret = 0;
+
+       if (k.k->type == KEY_TYPE_error)
+               return -EIO;
+
+       bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+               ca = bch_dev_bkey_exists(c, p.ptr.dev);
+
+               /*
+                * If there are any dirty pointers it's an error if we can't
+                * read:
+                */
+               if (!ret && !p.ptr.cached)
+                       ret = -EIO;
+
+               if (p.ptr.cached && ptr_stale(ca, &p.ptr))
+                       continue;
+
+               f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL;
+               if (f)
+                       p.idx = f->nr_failed < f->nr_retries
+                               ? f->idx
+                               : f->idx + 1;
+
+               if (!p.idx &&
+                   !bch2_dev_is_readable(ca))
+                       p.idx++;
+
+               if (force_reconstruct_read(c) &&
+                   !p.idx && p.ec_nr)
+                       p.idx++;
+
+               if (p.idx >= p.ec_nr + 1)
+                       continue;
+
+               if (ret > 0 && !ptr_better(c, p, *pick))
+                       continue;
+
+               *pick = p;
+               ret = 1;
+       }
+
+       return ret;
+}
+
+void bch2_bkey_append_ptr(struct bkey_i *k,
+                         struct bch_extent_ptr ptr)
+{
+       EBUG_ON(bch2_bkey_has_device(bkey_i_to_s_c(k), ptr.dev));
+
+       switch (k->k.type) {
+       case KEY_TYPE_btree_ptr:
+       case KEY_TYPE_extent:
+               EBUG_ON(bkey_val_u64s(&k->k) >= BKEY_EXTENT_VAL_U64s_MAX);
+
+               ptr.type = 1 << BCH_EXTENT_ENTRY_ptr;
+
+               memcpy((void *) &k->v + bkey_val_bytes(&k->k),
+                      &ptr,
+                      sizeof(ptr));
+               k->u64s++;
+               break;
+       default:
+               BUG();
+       }
+}
+
+void bch2_bkey_drop_device(struct bkey_s k, unsigned dev)
+{
+       struct bch_extent_ptr *ptr;
+
+       bch2_bkey_drop_ptrs(k, ptr, ptr->dev == dev);
+}
+
+/* extent specific utility code */
+
+const struct bch_extent_ptr *
+bch2_extent_has_device(struct bkey_s_c_extent e, unsigned dev)
+{
+       const struct bch_extent_ptr *ptr;
+
+       extent_for_each_ptr(e, ptr)
+               if (ptr->dev == dev)
+                       return ptr;
+
+       return NULL;
+}
+
+const struct bch_extent_ptr *
+bch2_extent_has_group(struct bch_fs *c, struct bkey_s_c_extent e, unsigned group)
+{
+       const struct bch_extent_ptr *ptr;
+
+       extent_for_each_ptr(e, ptr) {
+               struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+
+               if (ca->mi.group &&
+                   ca->mi.group - 1 == group)
+                       return ptr;
+       }
+
+       return NULL;
+}
+
+const struct bch_extent_ptr *
+bch2_extent_has_target(struct bch_fs *c, struct bkey_s_c_extent e, unsigned target)
+{
+       const struct bch_extent_ptr *ptr;
+
+       extent_for_each_ptr(e, ptr)
+               if (bch2_dev_in_target(c, ptr->dev, target) &&
+                   (!ptr->cached ||
+                    !ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr)))
+                       return ptr;
+
+       return NULL;
+}
+
 unsigned bch2_extent_is_compressed(struct bkey_s_c k)
 {
        unsigned ret = 0;
 
        switch (k.k->type) {
-       case BCH_EXTENT:
-       case BCH_EXTENT_CACHED: {
+       case KEY_TYPE_extent: {
                struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
                const union bch_extent_entry *entry;
                struct extent_ptr_decoded p;
@@ -189,10 +330,10 @@ bool bch2_extent_matches_ptr(struct bch_fs *c, struct bkey_s_c_extent e,
        return false;
 }
 
-static union bch_extent_entry *extent_entry_prev(struct bkey_s_extent e,
+static union bch_extent_entry *extent_entry_prev(struct bkey_ptrs ptrs,
                                          union bch_extent_entry *entry)
 {
-       union bch_extent_entry *i = e.v->start;
+       union bch_extent_entry *i = ptrs.start;
 
        if (i == entry)
                return NULL;
@@ -202,23 +343,24 @@ static union bch_extent_entry *extent_entry_prev(struct bkey_s_extent e,
        return i;
 }
 
-union bch_extent_entry *bch2_extent_drop_ptr(struct bkey_s_extent e,
-                                            struct bch_extent_ptr *ptr)
+union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s k,
+                                          struct bch_extent_ptr *ptr)
 {
+       struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
        union bch_extent_entry *dst, *src, *prev;
        bool drop_crc = true;
 
-       EBUG_ON(ptr < &e.v->start->ptr ||
-               ptr >= &extent_entry_last(e)->ptr);
+       EBUG_ON(ptr < &ptrs.start->ptr ||
+               ptr >= &ptrs.end->ptr);
        EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr);
 
        src = extent_entry_next(to_entry(ptr));
-       if (src != extent_entry_last(e) &&
+       if (src != ptrs.end &&
            !extent_entry_is_crc(src))
                drop_crc = false;
 
        dst = to_entry(ptr);
-       while ((prev = extent_entry_prev(e, dst))) {
+       while ((prev = extent_entry_prev(ptrs, dst))) {
                if (extent_entry_is_ptr(prev))
                        break;
 
@@ -232,8 +374,8 @@ union bch_extent_entry *bch2_extent_drop_ptr(struct bkey_s_extent e,
        }
 
        memmove_u64s_down(dst, src,
-                         (u64 *) extent_entry_last(e) - (u64 *) src);
-       e.k->u64s -= (u64 *) src - (u64 *) dst;
+                         (u64 *) ptrs.end - (u64 *) src);
+       k.k->u64s -= (u64 *) src - (u64 *) dst;
 
        return dst;
 }
@@ -300,7 +442,7 @@ found:
 restart_narrow_pointers:
        extent_for_each_ptr_decode(extent_i_to_s(e), p, i)
                if (can_narrow_crc(p.crc, n)) {
-                       bch2_extent_drop_ptr(extent_i_to_s(e), &i->ptr);
+                       bch2_bkey_drop_ptr(extent_i_to_s(e).s, &i->ptr);
                        p.ptr.offset += p.crc.offset;
                        p.crc = n;
                        bch2_extent_ptr_decoded_append(e, &p);
@@ -325,302 +467,165 @@ static inline bool bch2_crc_unpacked_cmp(struct bch_extent_crc_unpacked l,
                bch2_crc_cmp(l.csum, r.csum));
 }
 
-static void bch2_extent_drop_stale(struct bch_fs *c, struct bkey_s_extent e)
-{
-       struct bch_extent_ptr *ptr;
-
-       bch2_extent_drop_ptrs(e, ptr,
-               ptr->cached &&
-               ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr));
-}
-
-bool bch2_ptr_normalize(struct bch_fs *c, struct btree *b, struct bkey_s k)
-{
-       return bch2_extent_normalize(c, k);
-}
-
 void bch2_ptr_swab(const struct bkey_format *f, struct bkey_packed *k)
 {
-       switch (k->type) {
-       case BCH_EXTENT:
-       case BCH_EXTENT_CACHED: {
-               union bch_extent_entry *entry;
-               u64 *d = (u64 *) bkeyp_val(f, k);
-               unsigned i;
-
-               for (i = 0; i < bkeyp_val_u64s(f, k); i++)
-                       d[i] = swab64(d[i]);
-
-               for (entry = (union bch_extent_entry *) d;
-                    entry < (union bch_extent_entry *) (d + bkeyp_val_u64s(f, k));
-                    entry = extent_entry_next(entry)) {
-                       switch (extent_entry_type(entry)) {
-                       case BCH_EXTENT_ENTRY_ptr:
-                               break;
-                       case BCH_EXTENT_ENTRY_crc32:
-                               entry->crc32.csum = swab32(entry->crc32.csum);
-                               break;
-                       case BCH_EXTENT_ENTRY_crc64:
-                               entry->crc64.csum_hi = swab16(entry->crc64.csum_hi);
-                               entry->crc64.csum_lo = swab64(entry->crc64.csum_lo);
-                               break;
-                       case BCH_EXTENT_ENTRY_crc128:
-                               entry->crc128.csum.hi = (__force __le64)
-                                       swab64((__force u64) entry->crc128.csum.hi);
-                               entry->crc128.csum.lo = (__force __le64)
-                                       swab64((__force u64) entry->crc128.csum.lo);
-                               break;
-                       case BCH_EXTENT_ENTRY_stripe_ptr:
-                               break;
-                       }
-               }
-               break;
-       }
-       }
-}
-
-static const char *extent_ptr_invalid(const struct bch_fs *c,
-                                     struct bkey_s_c_extent e,
-                                     const struct bch_extent_ptr *ptr,
-                                     unsigned size_ondisk,
-                                     bool metadata)
-{
-       const struct bch_extent_ptr *ptr2;
-       struct bch_dev *ca;
-
-       if (ptr->dev >= c->sb.nr_devices ||
-           !c->devs[ptr->dev])
-               return "pointer to invalid device";
-
-       ca = bch_dev_bkey_exists(c, ptr->dev);
-       if (!ca)
-               return "pointer to invalid device";
-
-       extent_for_each_ptr(e, ptr2)
-               if (ptr != ptr2 && ptr->dev == ptr2->dev)
-                       return "multiple pointers to same device";
-
-       if (ptr->offset + size_ondisk > bucket_to_sector(ca, ca->mi.nbuckets))
-               return "offset past end of device";
-
-       if (ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket))
-               return "offset before first bucket";
-
-       if (bucket_remainder(ca, ptr->offset) +
-           size_ondisk > ca->mi.bucket_size)
-               return "spans multiple buckets";
-
-       return NULL;
-}
-
-static void extent_print_ptrs(struct printbuf *out, struct bch_fs *c,
-                             struct bkey_s_c_extent e)
-{
-       const union bch_extent_entry *entry;
-       struct bch_extent_crc_unpacked crc;
-       const struct bch_extent_ptr *ptr;
-       const struct bch_extent_stripe_ptr *ec;
-       struct bch_dev *ca;
-       bool first = true;
+       union bch_extent_entry *entry;
+       u64 *d = (u64 *) bkeyp_val(f, k);
+       unsigned i;
 
-       extent_for_each_entry(e, entry) {
-               if (!first)
-                       pr_buf(out, " ");
+       for (i = 0; i < bkeyp_val_u64s(f, k); i++)
+               d[i] = swab64(d[i]);
 
-               switch (__extent_entry_type(entry)) {
+       for (entry = (union bch_extent_entry *) d;
+            entry < (union bch_extent_entry *) (d + bkeyp_val_u64s(f, k));
+            entry = extent_entry_next(entry)) {
+               switch (extent_entry_type(entry)) {
                case BCH_EXTENT_ENTRY_ptr:
-                       ptr = entry_to_ptr(entry);
-                       ca = ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
-                               ? bch_dev_bkey_exists(c, ptr->dev)
-                               : NULL;
-
-                       pr_buf(out, "ptr: %u:%llu gen %u%s%s", ptr->dev,
-                              (u64) ptr->offset, ptr->gen,
-                              ptr->cached ? " cached" : "",
-                              ca && ptr_stale(ca, ptr)
-                              ? " stale" : "");
                        break;
                case BCH_EXTENT_ENTRY_crc32:
+                       entry->crc32.csum = swab32(entry->crc32.csum);
+                       break;
                case BCH_EXTENT_ENTRY_crc64:
+                       entry->crc64.csum_hi = swab16(entry->crc64.csum_hi);
+                       entry->crc64.csum_lo = swab64(entry->crc64.csum_lo);
+                       break;
                case BCH_EXTENT_ENTRY_crc128:
-                       crc = bch2_extent_crc_unpack(e.k, entry_to_crc(entry));
-
-                       pr_buf(out, "crc: c_size %u size %u offset %u nonce %u csum %u compress %u",
-                              crc.compressed_size,
-                              crc.uncompressed_size,
-                              crc.offset, crc.nonce,
-                              crc.csum_type,
-                              crc.compression_type);
+                       entry->crc128.csum.hi = (__force __le64)
+                               swab64((__force u64) entry->crc128.csum.hi);
+                       entry->crc128.csum.lo = (__force __le64)
+                               swab64((__force u64) entry->crc128.csum.lo);
                        break;
                case BCH_EXTENT_ENTRY_stripe_ptr:
-                       ec = &entry->stripe_ptr;
-
-                       pr_buf(out, "ec: idx %llu block %u",
-                              (u64) ec->idx, ec->block);
                        break;
-               default:
-                       pr_buf(out, "(invalid extent entry %.16llx)", *((u64 *) entry));
-                       goto out;
                }
-
-               first = false;
        }
-out:
-       if (bkey_extent_is_cached(e.k))
-               pr_buf(out, " cached");
 }
 
-static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f,
-                                                  unsigned dev)
-{
-       struct bch_dev_io_failures *i;
-
-       for (i = f->devs; i < f->devs + f->nr; i++)
-               if (i->dev == dev)
-                       return i;
-
-       return NULL;
-}
-
-void bch2_mark_io_failure(struct bch_io_failures *failed,
-                         struct extent_ptr_decoded *p)
-{
-       struct bch_dev_io_failures *f = dev_io_failures(failed, p->ptr.dev);
-
-       if (!f) {
-               BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs));
-
-               f = &failed->devs[failed->nr++];
-               f->dev          = p->ptr.dev;
-               f->idx          = p->idx;
-               f->nr_failed    = 1;
-               f->nr_retries   = 0;
-       } else if (p->idx != f->idx) {
-               f->idx          = p->idx;
-               f->nr_failed    = 1;
-               f->nr_retries   = 0;
-       } else {
-               f->nr_failed++;
-       }
-}
-
-/*
- * returns true if p1 is better than p2:
- */
-static inline bool ptr_better(struct bch_fs *c,
-                             const struct extent_ptr_decoded p1,
-                             const struct extent_ptr_decoded p2)
+static const char *extent_ptr_invalid(const struct bch_fs *c,
+                                     struct bkey_s_c k,
+                                     const struct bch_extent_ptr *ptr,
+                                     unsigned size_ondisk,
+                                     bool metadata)
 {
-       if (likely(!p1.idx && !p2.idx)) {
-               struct bch_dev *dev1 = bch_dev_bkey_exists(c, p1.ptr.dev);
-               struct bch_dev *dev2 = bch_dev_bkey_exists(c, p2.ptr.dev);
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const struct bch_extent_ptr *ptr2;
+       struct bch_dev *ca;
 
-               u64 l1 = atomic64_read(&dev1->cur_latency[READ]);
-               u64 l2 = atomic64_read(&dev2->cur_latency[READ]);
+       if (ptr->dev >= c->sb.nr_devices ||
+           !c->devs[ptr->dev])
+               return "pointer to invalid device";
 
-               /* Pick at random, biased in favor of the faster device: */
+       ca = bch_dev_bkey_exists(c, ptr->dev);
+       if (!ca)
+               return "pointer to invalid device";
 
-               return bch2_rand_range(l1 + l2) > l1;
-       }
+       bkey_for_each_ptr(ptrs, ptr2)
+               if (ptr != ptr2 && ptr->dev == ptr2->dev)
+                       return "multiple pointers to same device";
 
-       if (force_reconstruct_read(c))
-               return p1.idx > p2.idx;
+       if (ptr->offset + size_ondisk > bucket_to_sector(ca, ca->mi.nbuckets))
+               return "offset past end of device";
 
-       return p1.idx < p2.idx;
+       if (ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket))
+               return "offset before first bucket";
+
+       if (bucket_remainder(ca, ptr->offset) +
+           size_ondisk > ca->mi.bucket_size)
+               return "spans multiple buckets";
+
+       return NULL;
 }
 
-static int extent_pick_read_device(struct bch_fs *c,
-                                  struct bkey_s_c_extent e,
-                                  struct bch_io_failures *failed,
-                                  struct extent_ptr_decoded *pick)
+static void bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
+                             struct bkey_s_c k)
 {
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
        const union bch_extent_entry *entry;
-       struct extent_ptr_decoded p;
-       struct bch_dev_io_failures *f;
+       struct bch_extent_crc_unpacked crc;
+       const struct bch_extent_ptr *ptr;
+       const struct bch_extent_stripe_ptr *ec;
        struct bch_dev *ca;
-       int ret = 0;
-
-       extent_for_each_ptr_decode(e, p, entry) {
-               ca = bch_dev_bkey_exists(c, p.ptr.dev);
-
-               if (p.ptr.cached && ptr_stale(ca, &p.ptr))
-                       continue;
+       bool first = true;
 
-               f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL;
-               if (f)
-                       p.idx = f->nr_failed < f->nr_retries
-                               ? f->idx
-                               : f->idx + 1;
+       bkey_extent_entry_for_each(ptrs, entry) {
+               if (!first)
+                       pr_buf(out, " ");
 
-               if (!p.idx &&
-                   !bch2_dev_is_readable(ca))
-                       p.idx++;
+               switch (__extent_entry_type(entry)) {
+               case BCH_EXTENT_ENTRY_ptr:
+                       ptr = entry_to_ptr(entry);
+                       ca = ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
+                               ? bch_dev_bkey_exists(c, ptr->dev)
+                               : NULL;
 
-               if (force_reconstruct_read(c) &&
-                   !p.idx && p.ec_nr)
-                       p.idx++;
+                       pr_buf(out, "ptr: %u:%llu gen %u%s%s", ptr->dev,
+                              (u64) ptr->offset, ptr->gen,
+                              ptr->cached ? " cached" : "",
+                              ca && ptr_stale(ca, ptr)
+                              ? " stale" : "");
+                       break;
+               case BCH_EXTENT_ENTRY_crc32:
+               case BCH_EXTENT_ENTRY_crc64:
+               case BCH_EXTENT_ENTRY_crc128:
+                       crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
 
-               if (p.idx >= p.ec_nr + 1)
-                       continue;
+                       pr_buf(out, "crc: c_size %u size %u offset %u nonce %u csum %u compress %u",
+                              crc.compressed_size,
+                              crc.uncompressed_size,
+                              crc.offset, crc.nonce,
+                              crc.csum_type,
+                              crc.compression_type);
+                       break;
+               case BCH_EXTENT_ENTRY_stripe_ptr:
+                       ec = &entry->stripe_ptr;
 
-               if (ret && !ptr_better(c, p, *pick))
-                       continue;
+                       pr_buf(out, "ec: idx %llu block %u",
+                              (u64) ec->idx, ec->block);
+                       break;
+               default:
+                       pr_buf(out, "(invalid extent entry %.16llx)", *((u64 *) entry));
+                       return;
+               }
 
-               *pick = p;
-               ret = 1;
+               first = false;
        }
-
-       return ret;
 }
 
 /* Btree ptrs */
 
 const char *bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k)
 {
-       if (bkey_extent_is_cached(k.k))
-               return "cached";
-
-       if (k.k->size)
-               return "nonzero key size";
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const union bch_extent_entry *entry;
+       const struct bch_extent_ptr *ptr;
+       const char *reason;
 
        if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX)
                return "value too big";
 
-       switch (k.k->type) {
-       case BCH_EXTENT: {
-               struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-               const union bch_extent_entry *entry;
-               const struct bch_extent_ptr *ptr;
-               const char *reason;
-
-               extent_for_each_entry(e, entry) {
-                       if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX)
-                               return "invalid extent entry type";
-
-                       if (!extent_entry_is_ptr(entry))
-                               return "has non ptr field";
-               }
-
-               extent_for_each_ptr(e, ptr) {
-                       reason = extent_ptr_invalid(c, e, ptr,
-                                                   c->opts.btree_node_size,
-                                                   true);
-                       if (reason)
-                               return reason;
-               }
+       bkey_extent_entry_for_each(ptrs, entry) {
+               if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX)
+                       return "invalid extent entry type";
 
-               return NULL;
+               if (!extent_entry_is_ptr(entry))
+                       return "has non ptr field";
        }
 
-       default:
-               return "invalid value type";
+       bkey_for_each_ptr(ptrs, ptr) {
+               reason = extent_ptr_invalid(c, k, ptr,
+                                           c->opts.btree_node_size,
+                                           true);
+               if (reason)
+                       return reason;
        }
+
+       return NULL;
 }
 
 void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b,
                               struct bkey_s_c k)
 {
-       struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
        const struct bch_extent_ptr *ptr;
        unsigned seq;
        const char *err;
@@ -630,7 +635,7 @@ void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b,
        unsigned replicas = 0;
        bool bad;
 
-       extent_for_each_ptr(e, ptr) {
+       bkey_for_each_ptr(ptrs, ptr) {
                ca = bch_dev_bkey_exists(c, ptr->dev);
                replicas++;
 
@@ -656,9 +661,8 @@ void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b,
        }
 
        if (!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) &&
-           !bch2_bkey_replicas_marked(c, btree_node_type(b),
-                                      e.s_c, false)) {
-               bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b), k);
+           !bch2_bkey_replicas_marked(c, k, false)) {
+               bch2_bkey_val_to_text(&PBUF(buf), c, k);
                bch2_fs_bug(c,
                        "btree key bad (replicas not marked in superblock):\n%s",
                        buf);
@@ -667,7 +671,7 @@ void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b,
 
        return;
 err:
-       bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b), k);
+       bch2_bkey_val_to_text(&PBUF(buf), c, k);
        bch2_fs_bug(c, "%s btree pointer %s: bucket %zi gen %i mark %08x",
                    err, buf, PTR_BUCKET_NR(ca, ptr),
                    mark.gen, (unsigned) mark.v.counter);
@@ -678,22 +682,13 @@ void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c,
 {
        const char *invalid;
 
-       if (bkey_extent_is_data(k.k))
-               extent_print_ptrs(out, c, bkey_s_c_to_extent(k));
+       bkey_ptrs_to_text(out, c, k);
 
        invalid = bch2_btree_ptr_invalid(c, k);
        if (invalid)
                pr_buf(out, " invalid: %s", invalid);
 }
 
-int bch2_btree_pick_ptr(struct bch_fs *c, const struct btree *b,
-                       struct bch_io_failures *failed,
-                       struct extent_ptr_decoded *pick)
-{
-       return extent_pick_read_device(c, bkey_i_to_s_c_extent(&b->key),
-                                      failed, pick);
-}
-
 /* Extents */
 
 bool __bch2_cut_front(struct bpos where, struct bkey_s k)
@@ -714,7 +709,7 @@ bool __bch2_cut_front(struct bpos where, struct bkey_s k)
         * cause offset to point to the next bucket:
         */
        if (!len)
-               k.k->type = KEY_TYPE_DELETED;
+               k.k->type = KEY_TYPE_deleted;
        else if (bkey_extent_is_data(k.k)) {
                struct bkey_s_extent e = bkey_s_to_extent(k);
                union bch_extent_entry *entry;
@@ -766,7 +761,7 @@ bool bch2_cut_back(struct bpos where, struct bkey *k)
        k->size = len;
 
        if (!len)
-               k->type = KEY_TYPE_DELETED;
+               k->type = KEY_TYPE_deleted;
 
        return true;
 }
@@ -830,13 +825,13 @@ static void verify_extent_nonoverlapping(struct btree *b,
        struct bkey uk;
 
        iter = *_iter;
-       k = bch2_btree_node_iter_prev_filter(&iter, b, KEY_TYPE_DISCARD);
+       k = bch2_btree_node_iter_prev_filter(&iter, b, KEY_TYPE_discard);
        BUG_ON(k &&
               (uk = bkey_unpack_key(b, k),
                bkey_cmp(uk.p, bkey_start_pos(&insert->k)) > 0));
 
        iter = *_iter;
-       k = bch2_btree_node_iter_peek_filter(&iter, b, KEY_TYPE_DISCARD);
+       k = bch2_btree_node_iter_peek_filter(&iter, b, KEY_TYPE_discard);
 #if 0
        BUG_ON(k &&
               (uk = bkey_unpack_key(b, k),
@@ -882,13 +877,13 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
        verify_extent_nonoverlapping(l->b, &l->iter, insert);
 
        node_iter = l->iter;
-       k = bch2_btree_node_iter_prev_filter(&node_iter, l->b, KEY_TYPE_DISCARD);
+       k = bch2_btree_node_iter_prev_filter(&node_iter, l->b, KEY_TYPE_discard);
        if (k && !bkey_written(l->b, k) &&
            bch2_extent_merge_inline(c, iter, k, bkey_to_packed(insert), true))
                return;
 
        node_iter = l->iter;
-       k = bch2_btree_node_iter_peek_filter(&node_iter, l->b, KEY_TYPE_DISCARD);
+       k = bch2_btree_node_iter_peek_filter(&node_iter, l->b, KEY_TYPE_discard);
        if (k && !bkey_written(l->b, k) &&
            bch2_extent_merge_inline(c, iter, bkey_to_packed(insert), k, false))
                return;
@@ -912,7 +907,7 @@ static void extent_insert_committed(struct extent_insert_state *s)
 
        bkey_copy(&split.k, insert);
        if (s->deleting)
-               split.k.k.type = KEY_TYPE_DISCARD;
+               split.k.k.type = KEY_TYPE_discard;
 
        bch2_cut_back(s->committed, &split.k.k);
 
@@ -934,7 +929,7 @@ static void extent_insert_committed(struct extent_insert_state *s)
        if (s->update_journal) {
                bkey_copy(&split.k, !s->deleting ? insert : &s->whiteout);
                if (s->deleting)
-                       split.k.k.type = KEY_TYPE_DISCARD;
+                       split.k.k.type = KEY_TYPE_discard;
 
                bch2_cut_back(s->committed, &split.k.k);
 
@@ -985,7 +980,7 @@ bch2_extent_can_insert(struct btree_insert *trans,
                *u64s += BKEY_U64s;
 
        _k = bch2_btree_node_iter_peek_filter(&node_iter, l->b,
-                                             KEY_TYPE_DISCARD);
+                                             KEY_TYPE_discard);
        if (!_k)
                return BTREE_INSERT_OK;
 
@@ -1062,7 +1057,7 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
                        btree_account_key_drop(l->b, _k);
 
                k.k->size = 0;
-               k.k->type = KEY_TYPE_DELETED;
+               k.k->type = KEY_TYPE_deleted;
 
                if (_k >= btree_bset_last(l->b)->start) {
                        unsigned u64s = _k->u64s;
@@ -1123,7 +1118,7 @@ static void __bch2_insert_fixup_extent(struct extent_insert_state *s)
 
        while (bkey_cmp(s->committed, insert->k.p) < 0 &&
               (_k = bch2_btree_node_iter_peek_filter(&l->iter, l->b,
-                                                     KEY_TYPE_DISCARD))) {
+                                                     KEY_TYPE_discard))) {
                struct bkey_s k = __bkey_disassemble(l->b, _k, &unpacked);
                enum bch_extent_overlap overlap = bch2_extent_overlap(&insert->k, k.k);
 
@@ -1155,7 +1150,7 @@ static void __bch2_insert_fixup_extent(struct extent_insert_state *s)
                    !bkey_cmp(bkey_start_pos(&insert->k), bkey_start_pos(k.k))) {
                        if (!bkey_whiteout(k.k)) {
                                btree_account_key_drop(l->b, _k);
-                               _k->type = KEY_TYPE_DISCARD;
+                               _k->type = KEY_TYPE_discard;
                                reserve_whiteout(l->b, _k);
                        }
                        break;
@@ -1286,88 +1281,66 @@ bch2_insert_fixup_extent(struct btree_insert *trans,
 
 const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k)
 {
-       if (bkey_val_u64s(k.k) > BKEY_EXTENT_VAL_U64s_MAX)
-               return "value too big";
-
-       if (!k.k->size)
-               return "zero key size";
+       struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
+       const union bch_extent_entry *entry;
+       struct bch_extent_crc_unpacked crc;
+       const struct bch_extent_ptr *ptr;
+       unsigned size_ondisk = e.k->size;
+       const char *reason;
+       unsigned nonce = UINT_MAX;
 
-       switch (k.k->type) {
-       case BCH_EXTENT:
-       case BCH_EXTENT_CACHED: {
-               struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-               const union bch_extent_entry *entry;
-               struct bch_extent_crc_unpacked crc;
-               const struct bch_extent_ptr *ptr;
-               unsigned size_ondisk = e.k->size;
-               const char *reason;
-               unsigned nonce = UINT_MAX;
+       if (bkey_val_u64s(e.k) > BKEY_EXTENT_VAL_U64s_MAX)
+               return "value too big";
 
-               extent_for_each_entry(e, entry) {
-                       if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX)
-                               return "invalid extent entry type";
+       extent_for_each_entry(e, entry) {
+               if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX)
+                       return "invalid extent entry type";
 
-                       switch (extent_entry_type(entry)) {
-                       case BCH_EXTENT_ENTRY_ptr:
-                               ptr = entry_to_ptr(entry);
+               switch (extent_entry_type(entry)) {
+               case BCH_EXTENT_ENTRY_ptr:
+                       ptr = entry_to_ptr(entry);
 
-                               reason = extent_ptr_invalid(c, e, &entry->ptr,
-                                                           size_ondisk, false);
-                               if (reason)
-                                       return reason;
-                               break;
-                       case BCH_EXTENT_ENTRY_crc32:
-                       case BCH_EXTENT_ENTRY_crc64:
-                       case BCH_EXTENT_ENTRY_crc128:
-                               crc = bch2_extent_crc_unpack(e.k, entry_to_crc(entry));
+                       reason = extent_ptr_invalid(c, e.s_c, &entry->ptr,
+                                                   size_ondisk, false);
+                       if (reason)
+                               return reason;
+                       break;
+               case BCH_EXTENT_ENTRY_crc32:
+               case BCH_EXTENT_ENTRY_crc64:
+               case BCH_EXTENT_ENTRY_crc128:
+                       crc = bch2_extent_crc_unpack(e.k, entry_to_crc(entry));
 
-                               if (crc.offset + e.k->size >
-                                   crc.uncompressed_size)
-                                       return "checksum offset + key size > uncompressed size";
+                       if (crc.offset + e.k->size >
+                           crc.uncompressed_size)
+                               return "checksum offset + key size > uncompressed size";
 
-                               size_ondisk = crc.compressed_size;
+                       size_ondisk = crc.compressed_size;
 
-                               if (!bch2_checksum_type_valid(c, crc.csum_type))
-                                       return "invalid checksum type";
+                       if (!bch2_checksum_type_valid(c, crc.csum_type))
+                               return "invalid checksum type";
 
-                               if (crc.compression_type >= BCH_COMPRESSION_NR)
-                                       return "invalid compression type";
+                       if (crc.compression_type >= BCH_COMPRESSION_NR)
+                               return "invalid compression type";
 
-                               if (bch2_csum_type_is_encryption(crc.csum_type)) {
-                                       if (nonce == UINT_MAX)
-                                               nonce = crc.offset + crc.nonce;
-                                       else if (nonce != crc.offset + crc.nonce)
-                                               return "incorrect nonce";
-                               }
-                               break;
-                       case BCH_EXTENT_ENTRY_stripe_ptr:
-                               break;
+                       if (bch2_csum_type_is_encryption(crc.csum_type)) {
+                               if (nonce == UINT_MAX)
+                                       nonce = crc.offset + crc.nonce;
+                               else if (nonce != crc.offset + crc.nonce)
+                                       return "incorrect nonce";
                        }
+                       break;
+               case BCH_EXTENT_ENTRY_stripe_ptr:
+                       break;
                }
-
-               return NULL;
-       }
-
-       case BCH_RESERVATION: {
-               struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
-
-               if (bkey_val_bytes(k.k) != sizeof(struct bch_reservation))
-                       return "incorrect value size";
-
-               if (!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX)
-                       return "invalid nr_replicas";
-
-               return NULL;
        }
 
-       default:
-               return "invalid value type";
-       }
+       return NULL;
 }
 
-static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b,
-                                         struct bkey_s_c_extent e)
+void bch2_extent_debugcheck(struct bch_fs *c, struct btree *b,
+                           struct bkey_s_c k)
 {
+       struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
        const struct bch_extent_ptr *ptr;
        struct bch_dev *ca;
        struct bucket_mark mark;
@@ -1429,8 +1402,7 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b,
        }
 
        if (replicas > BCH_REPLICAS_MAX) {
-               bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b),
-                                     e.s_c);
+               bch2_bkey_val_to_text(&PBUF(buf), c, e.s_c);
                bch2_fs_bug(c,
                        "extent key bad (too many replicas: %u): %s",
                        replicas, buf);
@@ -1438,10 +1410,8 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b,
        }
 
        if (!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) &&
-           !bch2_bkey_replicas_marked(c, btree_node_type(b),
-                                      e.s_c, false)) {
-               bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b),
-                                     e.s_c);
+           !bch2_bkey_replicas_marked(c, e.s_c, false)) {
+               bch2_bkey_val_to_text(&PBUF(buf), c, e.s_c);
                bch2_fs_bug(c,
                        "extent key bad (replicas not marked in superblock):\n%s",
                        buf);
@@ -1451,34 +1421,18 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b,
        return;
 
 bad_ptr:
-       bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b),
-                             e.s_c);
+       bch2_bkey_val_to_text(&PBUF(buf), c, e.s_c);
        bch2_fs_bug(c, "extent pointer bad gc mark: %s:\nbucket %zu "
                   "gen %i type %u", buf,
                   PTR_BUCKET_NR(ca, ptr), mark.gen, mark.data_type);
 }
 
-void bch2_extent_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k)
-{
-       switch (k.k->type) {
-       case BCH_EXTENT:
-       case BCH_EXTENT_CACHED:
-               bch2_extent_debugcheck_extent(c, b, bkey_s_c_to_extent(k));
-               break;
-       case BCH_RESERVATION:
-               break;
-       default:
-               BUG();
-       }
-}
-
 void bch2_extent_to_text(struct printbuf *out, struct bch_fs *c,
                         struct bkey_s_c k)
 {
        const char *invalid;
 
-       if (bkey_extent_is_data(k.k))
-               extent_print_ptrs(out, c, bkey_s_c_to_extent(k));
+       bkey_ptrs_to_text(out, c, k);
 
        invalid = bch2_extent_invalid(c, k);
        if (invalid)
@@ -1593,41 +1547,17 @@ found:
  */
 bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
 {
-       struct bkey_s_extent e;
-
-       switch (k.k->type) {
-       case KEY_TYPE_ERROR:
-               return false;
-
-       case KEY_TYPE_DELETED:
-               return true;
-       case KEY_TYPE_DISCARD:
-               return bversion_zero(k.k->version);
-       case KEY_TYPE_COOKIE:
-               return false;
-
-       case BCH_EXTENT:
-       case BCH_EXTENT_CACHED:
-               e = bkey_s_to_extent(k);
+       struct bch_extent_ptr *ptr;
 
-               bch2_extent_drop_stale(c, e);
+       bch2_bkey_drop_ptrs(k, ptr,
+               ptr->cached &&
+               ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr));
 
-               if (!bkey_val_u64s(e.k)) {
-                       if (bkey_extent_is_cached(e.k)) {
-                               k.k->type = KEY_TYPE_DISCARD;
-                               if (bversion_zero(k.k->version))
-                                       return true;
-                       } else {
-                               k.k->type = KEY_TYPE_ERROR;
-                       }
-               }
+       /* will only happen if all pointers were cached: */
+       if (!bkey_val_u64s(k.k))
+               k.k->type = KEY_TYPE_deleted;
 
-               return false;
-       case BCH_RESERVATION:
-               return false;
-       default:
-               BUG();
-       }
+       return false;
 }
 
 void bch2_extent_mark_replicas_cached(struct bch_fs *c,
@@ -1637,7 +1567,7 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *c,
 {
        union bch_extent_entry *entry;
        struct extent_ptr_decoded p;
-       int extra = bch2_extent_durability(c, e.c) - nr_desired_replicas;
+       int extra = bch2_bkey_durability(c, e.s_c) - nr_desired_replicas;
 
        if (target && extra > 0)
                extent_for_each_ptr_decode(e, p, entry) {
@@ -1661,106 +1591,40 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *c,
                }
 }
 
-/*
- * This picks a non-stale pointer, preferably from a device other than @avoid.
- * Avoid can be NULL, meaning pick any. If there are no non-stale pointers to
- * other devices, it will still pick a pointer from avoid.
- */
-int bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k,
-                        struct bch_io_failures *failed,
-                        struct extent_ptr_decoded *pick)
-{
-       int ret;
-
-       switch (k.k->type) {
-       case KEY_TYPE_ERROR:
-               return -EIO;
-
-       case BCH_EXTENT:
-       case BCH_EXTENT_CACHED:
-               ret = extent_pick_read_device(c, bkey_s_c_to_extent(k),
-                                             failed, pick);
-
-               if (!ret && !bkey_extent_is_cached(k.k))
-                       ret = -EIO;
-
-               return ret;
-
-       default:
-               return 0;
-       }
-}
-
-enum merge_result bch2_extent_merge(struct bch_fs *c, struct btree *b,
+enum merge_result bch2_extent_merge(struct bch_fs *c,
                                    struct bkey_i *l, struct bkey_i *r)
 {
-       struct bkey_s_extent el, er;
+       struct bkey_s_extent el = bkey_i_to_s_extent(l);
+       struct bkey_s_extent er = bkey_i_to_s_extent(r);
        union bch_extent_entry *en_l, *en_r;
 
-       if (key_merging_disabled(c))
-               return BCH_MERGE_NOMERGE;
-
-       /*
-        * Generic header checks
-        * Assumes left and right are in order
-        * Left and right must be exactly aligned
-        */
-
-       if (l->k.u64s           != r->k.u64s ||
-           l->k.type           != r->k.type ||
-           bversion_cmp(l->k.version, r->k.version) ||
-           bkey_cmp(l->k.p, bkey_start_pos(&r->k)))
+       if (bkey_val_u64s(&l->k) != bkey_val_u64s(&r->k))
                return BCH_MERGE_NOMERGE;
 
-       switch (l->k.type) {
-       case KEY_TYPE_DISCARD:
-       case KEY_TYPE_ERROR:
-               /* These types are mergeable, and no val to check */
-               break;
-
-       case BCH_EXTENT:
-       case BCH_EXTENT_CACHED:
-               el = bkey_i_to_s_extent(l);
-               er = bkey_i_to_s_extent(r);
-
-               extent_for_each_entry(el, en_l) {
-                       struct bch_extent_ptr *lp, *rp;
-                       struct bch_dev *ca;
-
-                       en_r = vstruct_idx(er.v, (u64 *) en_l - el.v->_data);
-
-                       if ((extent_entry_type(en_l) !=
-                            extent_entry_type(en_r)) ||
-                           !extent_entry_is_ptr(en_l))
-                               return BCH_MERGE_NOMERGE;
+       extent_for_each_entry(el, en_l) {
+               struct bch_extent_ptr *lp, *rp;
+               struct bch_dev *ca;
 
-                       lp = &en_l->ptr;
-                       rp = &en_r->ptr;
+               en_r = vstruct_idx(er.v, (u64 *) en_l - el.v->_data);
 
-                       if (lp->offset + el.k->size     != rp->offset ||
-                           lp->dev                     != rp->dev ||
-                           lp->gen                     != rp->gen)
-                               return BCH_MERGE_NOMERGE;
+               if ((extent_entry_type(en_l) !=
+                    extent_entry_type(en_r)) ||
+                   !extent_entry_is_ptr(en_l))
+                       return BCH_MERGE_NOMERGE;
 
-                       /* We don't allow extents to straddle buckets: */
-                       ca = bch_dev_bkey_exists(c, lp->dev);
+               lp = &en_l->ptr;
+               rp = &en_r->ptr;
 
-                       if (PTR_BUCKET_NR(ca, lp) != PTR_BUCKET_NR(ca, rp))
-                               return BCH_MERGE_NOMERGE;
-               }
+               if (lp->offset + el.k->size     != rp->offset ||
+                   lp->dev                     != rp->dev ||
+                   lp->gen                     != rp->gen)
+                       return BCH_MERGE_NOMERGE;
 
-               break;
-       case BCH_RESERVATION: {
-               struct bkey_i_reservation *li = bkey_i_to_reservation(l);
-               struct bkey_i_reservation *ri = bkey_i_to_reservation(r);
+               /* We don't allow extents to straddle buckets: */
+               ca = bch_dev_bkey_exists(c, lp->dev);
 
-               if (li->v.generation != ri->v.generation ||
-                   li->v.nr_replicas != ri->v.nr_replicas)
+               if (PTR_BUCKET_NR(ca, lp) != PTR_BUCKET_NR(ca, rp))
                        return BCH_MERGE_NOMERGE;
-               break;
-       }
-       default:
-               return BCH_MERGE_NOMERGE;
        }
 
        l->k.needs_whiteout |= r->k.needs_whiteout;
@@ -1810,7 +1674,7 @@ static bool bch2_extent_merge_inline(struct bch_fs *c,
        bch2_bkey_unpack(b, &li.k, l);
        bch2_bkey_unpack(b, &ri.k, r);
 
-       ret = bch2_extent_merge(c, b, &li.k, &ri.k);
+       ret = bch2_bkey_merge(c, &li.k, &ri.k);
        if (ret == BCH_MERGE_NOMERGE)
                return false;
 
@@ -1878,3 +1742,54 @@ int bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size)
 
        return ret;
 }
+
+/* KEY_TYPE_reservation: */
+
+const char *bch2_reservation_invalid(const struct bch_fs *c, struct bkey_s_c k)
+{
+       struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
+
+       if (bkey_val_bytes(k.k) != sizeof(struct bch_reservation))
+               return "incorrect value size";
+
+       if (!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX)
+               return "invalid nr_replicas";
+
+       return NULL;
+}
+
+void bch2_reservation_to_text(struct printbuf *out, struct bch_fs *c,
+                             struct bkey_s_c k)
+{
+       struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
+
+       pr_buf(out, "generation %u replicas %u",
+              le32_to_cpu(r.v->generation),
+              r.v->nr_replicas);
+}
+
+enum merge_result bch2_reservation_merge(struct bch_fs *c,
+                                        struct bkey_i *l, struct bkey_i *r)
+{
+       struct bkey_i_reservation *li = bkey_i_to_reservation(l);
+       struct bkey_i_reservation *ri = bkey_i_to_reservation(r);
+
+       if (li->v.generation != ri->v.generation ||
+           li->v.nr_replicas != ri->v.nr_replicas)
+               return BCH_MERGE_NOMERGE;
+
+       l->k.needs_whiteout |= r->k.needs_whiteout;
+
+       /* Keys with no pointers aren't restricted to one bucket and could
+        * overflow KEY_SIZE
+        */
+       if ((u64) l->k.size + r->k.size > KEY_SIZE_MAX) {
+               bch2_key_resize(&l->k, KEY_SIZE_MAX);
+               bch2_cut_front(l->k.p, r);
+               return BCH_MERGE_PARTIAL;
+       }
+
+       bch2_key_resize(&l->k, l->k.size + r->k.size);
+
+       return BCH_MERGE_MERGE;
+}
index 389604f25630428eeaecb257283f53b3c54791df..57eb356995450ba0bacc7b02d68f3952f7973fec 100644 (file)
@@ -10,125 +10,34 @@ struct bch_fs;
 struct btree_insert;
 struct btree_insert_entry;
 
-const char *bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c);
-void bch2_btree_ptr_debugcheck(struct bch_fs *, struct btree *,
-                              struct bkey_s_c);
-void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *,
-                           struct bkey_s_c);
-void bch2_ptr_swab(const struct bkey_format *, struct bkey_packed *);
-
-#define bch2_bkey_btree_ops (struct bkey_ops) {                        \
-       .key_invalid    = bch2_btree_ptr_invalid,               \
-       .key_debugcheck = bch2_btree_ptr_debugcheck,            \
-       .val_to_text    = bch2_btree_ptr_to_text,               \
-       .swab           = bch2_ptr_swab,                        \
-}
-
-const char *bch2_extent_invalid(const struct bch_fs *, struct bkey_s_c);
-void bch2_extent_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c);
-void bch2_extent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-bool bch2_ptr_normalize(struct bch_fs *, struct btree *, struct bkey_s);
-enum merge_result bch2_extent_merge(struct bch_fs *, struct btree *,
-                                   struct bkey_i *, struct bkey_i *);
-
-#define bch2_bkey_extent_ops (struct bkey_ops) {               \
-       .key_invalid    = bch2_extent_invalid,                  \
-       .key_debugcheck = bch2_extent_debugcheck,               \
-       .val_to_text    = bch2_extent_to_text,                  \
-       .swab           = bch2_ptr_swab,                        \
-       .key_normalize  = bch2_ptr_normalize,                   \
-       .key_merge      = bch2_extent_merge,                    \
-       .is_extents     = true,                                 \
-}
-
-void bch2_mark_io_failure(struct bch_io_failures *,
-                         struct extent_ptr_decoded *);
-int bch2_btree_pick_ptr(struct bch_fs *, const struct btree *,
-                       struct bch_io_failures *,
-                       struct extent_ptr_decoded *);
-int bch2_extent_pick_ptr(struct bch_fs *, struct bkey_s_c,
-                        struct bch_io_failures *,
-                        struct extent_ptr_decoded *);
-
-void bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *);
-
-static inline bool bch2_extent_is_atomic(struct bkey *k,
-                                        struct btree_iter *iter)
-{
-       struct btree *b = iter->l[0].b;
-
-       return bkey_cmp(k->p, b->key.k.p) <= 0 &&
-               bkey_cmp(bkey_start_pos(k), b->data->min_key) >= 0;
-}
-
-enum btree_insert_ret
-bch2_extent_can_insert(struct btree_insert *, struct btree_insert_entry *,
-                      unsigned *);
-enum btree_insert_ret
-bch2_insert_fixup_extent(struct btree_insert *, struct btree_insert_entry *);
-
-bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
-void bch2_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent,
-                                     unsigned, unsigned);
-
-const struct bch_extent_ptr *
-bch2_extent_has_device(struct bkey_s_c_extent, unsigned);
-void bch2_extent_drop_device(struct bkey_s_extent, unsigned);
-const struct bch_extent_ptr *
-bch2_extent_has_group(struct bch_fs *, struct bkey_s_c_extent, unsigned);
-const struct bch_extent_ptr *
-bch2_extent_has_target(struct bch_fs *, struct bkey_s_c_extent, unsigned);
-
-unsigned bch2_extent_nr_ptrs(struct bkey_s_c_extent);
-unsigned bch2_extent_nr_dirty_ptrs(struct bkey_s_c);
-unsigned bch2_extent_is_compressed(struct bkey_s_c);
-
-unsigned bch2_extent_durability(struct bch_fs *, struct bkey_s_c_extent);
-
-bool bch2_extent_matches_ptr(struct bch_fs *, struct bkey_s_c_extent,
-                            struct bch_extent_ptr, u64);
-
-static inline bool bkey_extent_is_data(const struct bkey *k)
-{
-       switch (k->type) {
-       case BCH_EXTENT:
-       case BCH_EXTENT_CACHED:
-               return true;
-       default:
-               return false;
-       }
-}
-
-static inline bool bkey_extent_is_allocation(const struct bkey *k)
-{
-       switch (k->type) {
-       case BCH_EXTENT:
-       case BCH_EXTENT_CACHED:
-       case BCH_RESERVATION:
-               return true;
-       default:
-               return false;
-       }
-}
-
-static inline bool bch2_extent_is_fully_allocated(struct bkey_s_c k)
-{
-       return bkey_extent_is_allocation(k.k) &&
-               !bch2_extent_is_compressed(k);
-}
+/* extent entries: */
 
-static inline bool bkey_extent_is_cached(const struct bkey *k)
-{
-       return k->type == BCH_EXTENT_CACHED;
-}
+#define extent_entry_last(_e)          bkey_val_end(_e)
 
-static inline void bkey_extent_set_cached(struct bkey *k, bool cached)
-{
-       EBUG_ON(k->type != BCH_EXTENT &&
-               k->type != BCH_EXTENT_CACHED);
+#define entry_to_ptr(_entry)                                           \
+({                                                                     \
+       EBUG_ON((_entry) && !extent_entry_is_ptr(_entry));              \
+                                                                       \
+       __builtin_choose_expr(                                          \
+               type_is_exact(_entry, const union bch_extent_entry *),  \
+               (const struct bch_extent_ptr *) (_entry),               \
+               (struct bch_extent_ptr *) (_entry));                    \
+})
 
-       k->type = cached ? BCH_EXTENT_CACHED : BCH_EXTENT;
-}
+/* downcast, preserves const */
+#define to_entry(_entry)                                               \
+({                                                                     \
+       BUILD_BUG_ON(!type_is(_entry, union bch_extent_crc *) &&        \
+                    !type_is(_entry, struct bch_extent_ptr *) &&       \
+                    !type_is(_entry, struct bch_extent_stripe_ptr *)); \
+                                                                       \
+       __builtin_choose_expr(                                          \
+               (type_is_exact(_entry, const union bch_extent_crc *) || \
+                type_is_exact(_entry, const struct bch_extent_ptr *) ||\
+                type_is_exact(_entry, const struct bch_extent_stripe_ptr *)),\
+               (const union bch_extent_entry *) (_entry),              \
+               (union bch_extent_entry *) (_entry));                   \
+})
 
 static inline unsigned
 __extent_entry_type(const union bch_extent_entry *e)
@@ -193,21 +102,6 @@ union bch_extent_crc {
        struct bch_extent_crc128        crc128;
 };
 
-/* downcast, preserves const */
-#define to_entry(_entry)                                               \
-({                                                                     \
-       BUILD_BUG_ON(!type_is(_entry, union bch_extent_crc *) &&        \
-                    !type_is(_entry, struct bch_extent_ptr *) &&       \
-                    !type_is(_entry, struct bch_extent_stripe_ptr *)); \
-                                                                       \
-       __builtin_choose_expr(                                          \
-               (type_is_exact(_entry, const union bch_extent_crc *) || \
-                type_is_exact(_entry, const struct bch_extent_ptr *) ||\
-                type_is_exact(_entry, const struct bch_extent_stripe_ptr *)),\
-               (const union bch_extent_entry *) (_entry),              \
-               (union bch_extent_entry *) (_entry));                   \
-})
-
 #define __entry_to_crc(_entry)                                         \
        __builtin_choose_expr(                                          \
                type_is_exact(_entry, const union bch_extent_entry *),  \
@@ -221,18 +115,6 @@ union bch_extent_crc {
        __entry_to_crc(_entry);                                         \
 })
 
-#define entry_to_ptr(_entry)                                           \
-({                                                                     \
-       EBUG_ON((_entry) && !extent_entry_is_ptr(_entry));              \
-                                                                       \
-       __builtin_choose_expr(                                          \
-               type_is_exact(_entry, const union bch_extent_entry *),  \
-               (const struct bch_extent_ptr *) (_entry),               \
-               (struct bch_extent_ptr *) (_entry));                    \
-})
-
-/* checksum entries: */
-
 static inline struct bch_extent_crc_unpacked
 bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
 {
@@ -290,71 +172,64 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
 #undef common_fields
 }
 
-/* Extent entry iteration: */
-
-#define extent_entry_next(_entry)                                      \
-       ((typeof(_entry)) ((void *) (_entry) + extent_entry_bytes(_entry)))
+/* bkey_ptrs: generically over any key type that has ptrs */
 
-#define extent_entry_last(_e)                                          \
-       vstruct_idx((_e).v, bkey_val_u64s((_e).k))
+struct bkey_ptrs_c {
+       const union bch_extent_entry    *start;
+       const union bch_extent_entry    *end;
+};
 
-/* Iterate over all entries: */
+struct bkey_ptrs {
+       union bch_extent_entry  *start;
+       union bch_extent_entry  *end;
+};
 
-#define extent_for_each_entry_from(_e, _entry, _start)                 \
-       for ((_entry) = _start;                                         \
-            (_entry) < extent_entry_last(_e);                          \
-            (_entry) = extent_entry_next(_entry))
+/* iterate over bkey ptrs */
 
-#define extent_for_each_entry(_e, _entry)                              \
-       extent_for_each_entry_from(_e, _entry, (_e).v->start)
+#define extent_entry_next(_entry)                                      \
+       ((typeof(_entry)) ((void *) (_entry) + extent_entry_bytes(_entry)))
 
-/* Iterate over pointers only: */
+#define __bkey_extent_entry_for_each_from(_start, _end, _entry)                \
+       for ((_entry) = (_start);                                       \
+            (_entry) < (_end);                                         \
+            (_entry) = extent_entry_next(_entry))
 
-#define extent_ptr_next(_e, _ptr)                                      \
+#define __bkey_ptr_next(_ptr, _end)                                    \
 ({                                                                     \
-       typeof(&(_e).v->start[0]) _entry;                               \
+       typeof(_end) _entry;                                            \
                                                                        \
-       extent_for_each_entry_from(_e, _entry, to_entry(_ptr))          \
+       __bkey_extent_entry_for_each_from(to_entry(_ptr), _end, _entry) \
                if (extent_entry_is_ptr(_entry))                        \
                        break;                                          \
                                                                        \
-       _entry < extent_entry_last(_e) ? entry_to_ptr(_entry) : NULL;   \
+       _entry < (_end) ? entry_to_ptr(_entry) : NULL;                  \
 })
 
-#define extent_for_each_ptr(_e, _ptr)                                  \
-       for ((_ptr) = &(_e).v->start->ptr;                              \
-            ((_ptr) = extent_ptr_next(_e, _ptr));                      \
-            (_ptr)++)
+#define bkey_extent_entry_for_each_from(_p, _entry, _start)            \
+       __bkey_extent_entry_for_each_from(_start, (_p).end, _entry)
 
-/* Iterate over crcs only: */
+#define bkey_extent_entry_for_each(_p, _entry)                         \
+       bkey_extent_entry_for_each_from(_p, _entry, _p.start)
 
-#define extent_crc_next(_e, _crc, _iter)                               \
-({                                                                     \
-       extent_for_each_entry_from(_e, _iter, _iter)                    \
-               if (extent_entry_is_crc(_iter)) {                       \
-                       (_crc) = bch2_extent_crc_unpack((_e).k, entry_to_crc(_iter));\
-                       break;                                          \
-               }                                                       \
-                                                                       \
-       (_iter) < extent_entry_last(_e);                                \
-})
+#define __bkey_for_each_ptr(_start, _end, _ptr)                                \
+       for ((_ptr) = (_start);                                         \
+            ((_ptr) = __bkey_ptr_next(_ptr, _end));                    \
+            (_ptr)++)
 
-#define extent_for_each_crc(_e, _crc, _iter)                           \
-       for ((_crc) = bch2_extent_crc_unpack((_e).k, NULL),             \
-            (_iter) = (_e).v->start;                                   \
-            extent_crc_next(_e, _crc, _iter);                          \
-            (_iter) = extent_entry_next(_iter))
+#define bkey_ptr_next(_p, _ptr)                                                \
+       __bkey_ptr_next(_ptr, (_p).end)
 
-/* Iterate over pointers, with crcs: */
+#define bkey_for_each_ptr(_p, _ptr)                                    \
+       __bkey_for_each_ptr(&(_p).start->ptr, (_p).end, _ptr)
 
-#define __extent_ptr_next_decode(_e, _ptr, _entry)                     \
+#define __bkey_ptr_next_decode(_k, _end, _ptr, _entry)                 \
 ({                                                                     \
        __label__ out;                                                  \
                                                                        \
        (_ptr).idx      = 0;                                            \
        (_ptr).ec_nr    = 0;                                            \
                                                                        \
-       extent_for_each_entry_from(_e, _entry, _entry)                  \
+       __bkey_extent_entry_for_each_from(_entry, _end, _entry)         \
                switch (extent_entry_type(_entry)) {                    \
                case BCH_EXTENT_ENTRY_ptr:                              \
                        (_ptr).ptr              = _entry->ptr;          \
@@ -362,7 +237,7 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
                case BCH_EXTENT_ENTRY_crc32:                            \
                case BCH_EXTENT_ENTRY_crc64:                            \
                case BCH_EXTENT_ENTRY_crc128:                           \
-                       (_ptr).crc = bch2_extent_crc_unpack((_e).k,     \
+                       (_ptr).crc = bch2_extent_crc_unpack(_k,         \
                                        entry_to_crc(_entry));          \
                        break;                                          \
                case BCH_EXTENT_ENTRY_stripe_ptr:                       \
@@ -370,122 +245,298 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
                        break;                                          \
                }                                                       \
 out:                                                                   \
-       _entry < extent_entry_last(_e);                                 \
+       _entry < (_end);                                                \
 })
 
-#define extent_for_each_ptr_decode(_e, _ptr, _entry)                   \
-       for ((_ptr).crc = bch2_extent_crc_unpack((_e).k, NULL),         \
-            (_entry) = (_e).v->start;                                  \
-            __extent_ptr_next_decode(_e, _ptr, _entry);                \
+#define __bkey_for_each_ptr_decode(_k, _start, _end, _ptr, _entry)     \
+       for ((_ptr).crc = bch2_extent_crc_unpack(_k, NULL),             \
+            (_entry) = _start;                                         \
+            __bkey_ptr_next_decode(_k, _end, _ptr, _entry);            \
             (_entry) = extent_entry_next(_entry))
 
-/* Iterate over pointers backwards: */
+#define bkey_for_each_ptr_decode(_k, _p, _ptr, _entry)                 \
+       __bkey_for_each_ptr_decode(_k, (_p).start, (_p).end,            \
+                                  _ptr, _entry)
 
-void bch2_extent_crc_append(struct bkey_i_extent *,
-                           struct bch_extent_crc_unpacked);
-void bch2_extent_ptr_decoded_append(struct bkey_i_extent *,
-                                   struct extent_ptr_decoded *);
+/* utility code common to all keys with pointers: */
 
-static inline void __extent_entry_push(struct bkey_i_extent *e)
+static inline struct bkey_ptrs_c bch2_bkey_ptrs_c(struct bkey_s_c k)
 {
-       union bch_extent_entry *entry = extent_entry_last(extent_i_to_s(e));
-
-       EBUG_ON(bkey_val_u64s(&e->k) + extent_entry_u64s(entry) >
-               BKEY_EXTENT_VAL_U64s_MAX);
-
-       e->k.u64s += extent_entry_u64s(entry);
+       switch (k.k->type) {
+       case KEY_TYPE_btree_ptr: {
+               struct bkey_s_c_btree_ptr e = bkey_s_c_to_btree_ptr(k);
+               return (struct bkey_ptrs_c) {
+                       to_entry(&e.v->start[0]),
+                       to_entry(bkey_val_end(e))
+               };
+       }
+       case KEY_TYPE_extent: {
+               struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
+               return (struct bkey_ptrs_c) {
+                       e.v->start,
+                       extent_entry_last(e)
+               };
+       }
+       case KEY_TYPE_stripe: {
+               struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
+               return (struct bkey_ptrs_c) {
+                       to_entry(&s.v->ptrs[0]),
+                       to_entry(&s.v->ptrs[s.v->nr_blocks]),
+               };
+       }
+       default:
+               return (struct bkey_ptrs_c) { NULL, NULL };
+       }
 }
 
-static inline void extent_ptr_append(struct bkey_i_extent *e,
-                                    struct bch_extent_ptr ptr)
+static inline struct bkey_ptrs bch2_bkey_ptrs(struct bkey_s k)
 {
-       ptr.type = 1 << BCH_EXTENT_ENTRY_ptr;
-       extent_entry_last(extent_i_to_s(e))->ptr = ptr;
-       __extent_entry_push(e);
+       struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k.s_c);
+
+       return (struct bkey_ptrs) {
+               (void *) p.start,
+               (void *) p.end
+       };
 }
 
-static inline struct bch_devs_list bch2_extent_devs(struct bkey_s_c_extent e)
+static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k)
 {
        struct bch_devs_list ret = (struct bch_devs_list) { 0 };
+       struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
        const struct bch_extent_ptr *ptr;
 
-       extent_for_each_ptr(e, ptr)
+       bkey_for_each_ptr(p, ptr)
                ret.devs[ret.nr++] = ptr->dev;
 
        return ret;
 }
 
-static inline struct bch_devs_list bch2_extent_dirty_devs(struct bkey_s_c_extent e)
+static inline struct bch_devs_list bch2_bkey_dirty_devs(struct bkey_s_c k)
 {
        struct bch_devs_list ret = (struct bch_devs_list) { 0 };
+       struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
        const struct bch_extent_ptr *ptr;
 
-       extent_for_each_ptr(e, ptr)
+       bkey_for_each_ptr(p, ptr)
                if (!ptr->cached)
                        ret.devs[ret.nr++] = ptr->dev;
 
        return ret;
 }
 
-static inline struct bch_devs_list bch2_extent_cached_devs(struct bkey_s_c_extent e)
+static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k)
 {
        struct bch_devs_list ret = (struct bch_devs_list) { 0 };
+       struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
        const struct bch_extent_ptr *ptr;
 
-       extent_for_each_ptr(e, ptr)
+       bkey_for_each_ptr(p, ptr)
                if (ptr->cached)
                        ret.devs[ret.nr++] = ptr->dev;
 
        return ret;
 }
 
-static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k)
+static inline bool bch2_bkey_has_device(struct bkey_s_c k, unsigned dev)
 {
-       switch (k.k->type) {
-       case BCH_EXTENT:
-       case BCH_EXTENT_CACHED:
-               return bch2_extent_devs(bkey_s_c_to_extent(k));
-       default:
-               return (struct bch_devs_list) { .nr = 0 };
-       }
+       struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
+       const struct bch_extent_ptr *ptr;
+
+       bkey_for_each_ptr(p, ptr)
+               if (ptr->dev == dev)
+                       return ptr;
+
+       return NULL;
 }
 
-static inline struct bch_devs_list bch2_bkey_dirty_devs(struct bkey_s_c k)
+unsigned bch2_bkey_nr_ptrs(struct bkey_s_c);
+unsigned bch2_bkey_nr_dirty_ptrs(struct bkey_s_c);
+unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c);
+
+void bch2_mark_io_failure(struct bch_io_failures *,
+                         struct extent_ptr_decoded *);
+int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c,
+                              struct bch_io_failures *,
+                              struct extent_ptr_decoded *);
+
+/* bch_btree_ptr: */
+
+const char *bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c);
+void bch2_btree_ptr_debugcheck(struct bch_fs *, struct btree *,
+                              struct bkey_s_c);
+void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *,
+                           struct bkey_s_c);
+void bch2_ptr_swab(const struct bkey_format *, struct bkey_packed *);
+
+#define bch2_bkey_ops_btree_ptr (struct bkey_ops) {            \
+       .key_invalid    = bch2_btree_ptr_invalid,               \
+       .key_debugcheck = bch2_btree_ptr_debugcheck,            \
+       .val_to_text    = bch2_btree_ptr_to_text,               \
+       .swab           = bch2_ptr_swab,                        \
+}
+
+/* bch_extent: */
+
+const char *bch2_extent_invalid(const struct bch_fs *, struct bkey_s_c);
+void bch2_extent_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c);
+void bch2_extent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
+enum merge_result bch2_extent_merge(struct bch_fs *,
+                                   struct bkey_i *, struct bkey_i *);
+
+#define bch2_bkey_ops_extent (struct bkey_ops) {               \
+       .key_invalid    = bch2_extent_invalid,                  \
+       .key_debugcheck = bch2_extent_debugcheck,               \
+       .val_to_text    = bch2_extent_to_text,                  \
+       .swab           = bch2_ptr_swab,                        \
+       .key_normalize  = bch2_extent_normalize,                \
+       .key_merge      = bch2_extent_merge,                    \
+}
+
+/* bch_reservation: */
+
+const char *bch2_reservation_invalid(const struct bch_fs *, struct bkey_s_c);
+void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+enum merge_result bch2_reservation_merge(struct bch_fs *,
+                                        struct bkey_i *, struct bkey_i *);
+
+#define bch2_bkey_ops_reservation (struct bkey_ops) {          \
+       .key_invalid    = bch2_reservation_invalid,             \
+       .val_to_text    = bch2_reservation_to_text,             \
+       .key_merge      = bch2_reservation_merge,               \
+}
+
+void bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *);
+
+static inline bool bch2_extent_is_atomic(struct bkey *k,
+                                        struct btree_iter *iter)
 {
-       switch (k.k->type) {
-       case BCH_EXTENT:
-       case BCH_EXTENT_CACHED:
-               return bch2_extent_dirty_devs(bkey_s_c_to_extent(k));
+       struct btree *b = iter->l[0].b;
+
+       return bkey_cmp(k->p, b->key.k.p) <= 0 &&
+               bkey_cmp(bkey_start_pos(k), b->data->min_key) >= 0;
+}
+
+enum btree_insert_ret
+bch2_extent_can_insert(struct btree_insert *, struct btree_insert_entry *,
+                      unsigned *);
+enum btree_insert_ret
+bch2_insert_fixup_extent(struct btree_insert *, struct btree_insert_entry *);
+
+void bch2_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent,
+                                     unsigned, unsigned);
+
+const struct bch_extent_ptr *
+bch2_extent_has_device(struct bkey_s_c_extent, unsigned);
+const struct bch_extent_ptr *
+bch2_extent_has_group(struct bch_fs *, struct bkey_s_c_extent, unsigned);
+const struct bch_extent_ptr *
+bch2_extent_has_target(struct bch_fs *, struct bkey_s_c_extent, unsigned);
+
+unsigned bch2_extent_is_compressed(struct bkey_s_c);
+
+bool bch2_extent_matches_ptr(struct bch_fs *, struct bkey_s_c_extent,
+                            struct bch_extent_ptr, u64);
+
+static inline bool bkey_extent_is_data(const struct bkey *k)
+{
+       switch (k->type) {
+       case KEY_TYPE_btree_ptr:
+       case KEY_TYPE_extent:
+               return true;
        default:
-               return (struct bch_devs_list) { .nr = 0 };
+               return false;
        }
 }
 
-static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k)
+static inline bool bkey_extent_is_allocation(const struct bkey *k)
 {
-       switch (k.k->type) {
-       case BCH_EXTENT:
-       case BCH_EXTENT_CACHED:
-               return bch2_extent_cached_devs(bkey_s_c_to_extent(k));
+       switch (k->type) {
+       case KEY_TYPE_extent:
+       case KEY_TYPE_reservation:
+               return true;
        default:
-               return (struct bch_devs_list) { .nr = 0 };
+               return false;
        }
 }
 
+static inline bool bch2_extent_is_fully_allocated(struct bkey_s_c k)
+{
+       return bkey_extent_is_allocation(k.k) &&
+               !bch2_extent_is_compressed(k);
+}
+
+void bch2_bkey_append_ptr(struct bkey_i *, struct bch_extent_ptr);
+void bch2_bkey_drop_device(struct bkey_s, unsigned);
+
+/* Extent entry iteration: */
+
+#define extent_for_each_entry_from(_e, _entry, _start)                 \
+       __bkey_extent_entry_for_each_from(_start,                       \
+                               extent_entry_last(_e),_entry)
+
+#define extent_for_each_entry(_e, _entry)                              \
+       extent_for_each_entry_from(_e, _entry, (_e).v->start)
+
+#define extent_ptr_next(_e, _ptr)                                      \
+       __bkey_ptr_next(_ptr, extent_entry_last(_e))
+
+#define extent_for_each_ptr(_e, _ptr)                                  \
+       __bkey_for_each_ptr(&(_e).v->start->ptr, extent_entry_last(_e), _ptr)
+
+#define extent_crc_next(_e, _crc, _iter)                               \
+({                                                                     \
+       extent_for_each_entry_from(_e, _iter, _iter)                    \
+               if (extent_entry_is_crc(_iter)) {                       \
+                       (_crc) = bch2_extent_crc_unpack((_e).k, entry_to_crc(_iter));\
+                       break;                                          \
+               }                                                       \
+                                                                       \
+       (_iter) < extent_entry_last(_e);                                \
+})
+
+#define extent_for_each_crc(_e, _crc, _iter)                           \
+       for ((_crc) = bch2_extent_crc_unpack((_e).k, NULL),             \
+            (_iter) = (_e).v->start;                                   \
+            extent_crc_next(_e, _crc, _iter);                          \
+            (_iter) = extent_entry_next(_iter))
+
+#define extent_for_each_ptr_decode(_e, _ptr, _entry)                   \
+       __bkey_for_each_ptr_decode((_e).k, (_e).v->start,               \
+                                  extent_entry_last(_e), _ptr, _entry)
+
+void bch2_extent_crc_append(struct bkey_i_extent *,
+                           struct bch_extent_crc_unpacked);
+void bch2_extent_ptr_decoded_append(struct bkey_i_extent *,
+                                   struct extent_ptr_decoded *);
+
+static inline void __extent_entry_push(struct bkey_i_extent *e)
+{
+       union bch_extent_entry *entry = extent_entry_last(extent_i_to_s(e));
+
+       EBUG_ON(bkey_val_u64s(&e->k) + extent_entry_u64s(entry) >
+               BKEY_EXTENT_VAL_U64s_MAX);
+
+       e->k.u64s += extent_entry_u64s(entry);
+}
+
 bool bch2_can_narrow_extent_crcs(struct bkey_s_c_extent,
                                 struct bch_extent_crc_unpacked);
 bool bch2_extent_narrow_crcs(struct bkey_i_extent *, struct bch_extent_crc_unpacked);
 
-union bch_extent_entry *bch2_extent_drop_ptr(struct bkey_s_extent ,
-                                            struct bch_extent_ptr *);
+union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s,
+                                          struct bch_extent_ptr *);
 
-#define bch2_extent_drop_ptrs(_e, _ptr, _cond)                         \
+#define bch2_bkey_drop_ptrs(_k, _ptr, _cond)                           \
 do {                                                                   \
-       _ptr = &(_e).v->start->ptr;                                     \
+       struct bkey_ptrs _ptrs = bch2_bkey_ptrs(_k);                    \
+                                                                       \
+       _ptr = &_ptrs.start->ptr;                                       \
                                                                        \
-       while ((_ptr = extent_ptr_next(e, _ptr))) {                     \
+       while ((_ptr = bkey_ptr_next(_ptrs, _ptr))) {                   \
                if (_cond) {                                            \
-                       _ptr = (void *) bch2_extent_drop_ptr(_e, _ptr); \
+                       _ptr = (void *) bch2_bkey_drop_ptr(_k, _ptr);   \
+                       _ptrs = bch2_bkey_ptrs(_k);                     \
                        continue;                                       \
                }                                                       \
                                                                        \
index e7d7c5fe6db7ff920a82dfd68873d1a09449eaa6..ad06db069fcf33e9535f26b71365a568701f1c09 100644 (file)
@@ -121,7 +121,7 @@ static void bch2_quota_reservation_put(struct bch_fs *c,
        BUG_ON(res->sectors > inode->ei_quota_reserved);
 
        bch2_quota_acct(c, inode->ei_qid, Q_SPC,
-                       -((s64) res->sectors), BCH_QUOTA_PREALLOC);
+                       -((s64) res->sectors), KEY_TYPE_QUOTA_PREALLOC);
        inode->ei_quota_reserved -= res->sectors;
        mutex_unlock(&inode->ei_quota_lock);
 
@@ -138,7 +138,7 @@ static int bch2_quota_reservation_add(struct bch_fs *c,
 
        mutex_lock(&inode->ei_quota_lock);
        ret = bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors,
-                             check_enospc ? BCH_QUOTA_PREALLOC : BCH_QUOTA_NOCHECK);
+                             check_enospc ? KEY_TYPE_QUOTA_PREALLOC : KEY_TYPE_QUOTA_NOCHECK);
        if (likely(!ret)) {
                inode->ei_quota_reserved += sectors;
                res->sectors += sectors;
@@ -220,7 +220,7 @@ static void i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
                quota_res->sectors -= sectors;
                inode->ei_quota_reserved -= sectors;
        } else {
-               bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, BCH_QUOTA_WARN);
+               bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, KEY_TYPE_QUOTA_WARN);
        }
 #endif
        inode->v.i_blocks += sectors;
@@ -813,7 +813,7 @@ static void bch2_add_page_sectors(struct bio *bio, struct bkey_s_c k)
        struct bvec_iter iter;
        struct bio_vec bv;
        unsigned nr_ptrs = !bch2_extent_is_compressed(k)
-               ? bch2_extent_nr_dirty_ptrs(k)
+               ? bch2_bkey_nr_dirty_ptrs(k)
                : 0;
 
        bio_for_each_segment(bv, bio, iter) {
@@ -2397,7 +2397,7 @@ static long bch2_fcollapse(struct bch_inode_info *inode,
                BUG_ON(bkey_cmp(dst->pos, bkey_start_pos(&copy.k.k)));
 
                ret = bch2_disk_reservation_get(c, &disk_res, copy.k.k.size,
-                               bch2_extent_nr_dirty_ptrs(bkey_i_to_s_c(&copy.k)),
+                               bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(&copy.k)),
                                BCH_DISK_RESERVATION_NOFAIL);
                BUG_ON(ret);
 
@@ -2504,7 +2504,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode,
                        goto btree_iter_err;
 
                /* already reserved */
-               if (k.k->type == BCH_RESERVATION &&
+               if (k.k->type == KEY_TYPE_reservation &&
                    bkey_s_c_to_reservation(k).v->nr_replicas >= replicas) {
                        bch2_btree_iter_next_slot(iter);
                        continue;
@@ -2517,7 +2517,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode,
                }
 
                bkey_reservation_init(&reservation.k_i);
-               reservation.k.type      = BCH_RESERVATION;
+               reservation.k.type      = KEY_TYPE_reservation;
                reservation.k.p         = k.k->p;
                reservation.k.size      = k.k->size;
 
@@ -2525,7 +2525,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode,
                bch2_cut_back(end_pos, &reservation.k);
 
                sectors = reservation.k.size;
-               reservation.v.nr_replicas = bch2_extent_nr_dirty_ptrs(k);
+               reservation.v.nr_replicas = bch2_bkey_nr_dirty_ptrs(k);
 
                if (!bkey_extent_is_allocation(k.k)) {
                        ret = bch2_quota_reservation_add(c, inode,
index b6fe2059fe5fab11572ed42405d864a7c14894a7..93e1f3aaacd42d782713877fc4fe2f25595b7979 100644 (file)
@@ -340,7 +340,7 @@ __bch2_create(struct mnt_idmap *idmap,
        if (tmpfile)
                inode_u.bi_flags |= BCH_INODE_UNLINKED;
 
-       ret = bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1, BCH_QUOTA_PREALLOC);
+       ret = bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1, KEY_TYPE_QUOTA_PREALLOC);
        if (ret)
                return ERR_PTR(ret);
 
@@ -457,7 +457,7 @@ err_trans:
        make_bad_inode(&inode->v);
        iput(&inode->v);
 err:
-       bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, BCH_QUOTA_WARN);
+       bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, KEY_TYPE_QUOTA_WARN);
        inode = ERR_PTR(ret);
        goto out;
 }
@@ -1079,7 +1079,7 @@ static int bch2_fill_extent(struct fiemap_extent_info *info,
                }
 
                return 0;
-       } else if (k->k.type == BCH_RESERVATION) {
+       } else if (k->k.type == KEY_TYPE_reservation) {
                return fiemap_fill_next_extent(info,
                                               bkey_start_offset(&k->k) << 9,
                                               0, k->k.size << 9,
@@ -1112,7 +1112,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
        for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
                           POS(ei->v.i_ino, start >> 9), 0, k)
                if (bkey_extent_is_data(k.k) ||
-                   k.k->type == BCH_RESERVATION) {
+                   k.k->type == KEY_TYPE_reservation) {
                        if (bkey_cmp(bkey_start_pos(k.k),
                                     POS(ei->v.i_ino, (start + len) >> 9)) >= 0)
                                break;
@@ -1414,9 +1414,9 @@ static void bch2_evict_inode(struct inode *vinode)
 
        if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) {
                bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks),
-                               BCH_QUOTA_WARN);
+                               KEY_TYPE_QUOTA_WARN);
                bch2_quota_acct(c, inode->ei_qid, Q_INO, -1,
-                               BCH_QUOTA_WARN);
+                               KEY_TYPE_QUOTA_WARN);
                bch2_inode_rm(c, inode->v.i_ino);
 
                WARN_ONCE(atomic_long_dec_return(&c->nr_inodes) < 0,
index 74b83201c21343dd3cc9826a2ef84105d82bd8b8..57ab8f088415484ab691ff0295218bed4c779368 100644 (file)
@@ -235,7 +235,6 @@ static int hash_check_duplicates(const struct bch_hash_desc desc,
                                !desc.cmp_bkey(k, k2), c,
                                "duplicate hash table keys:\n%s",
                                (bch2_bkey_val_to_text(&PBUF(buf), c,
-                                                      bkey_type(0, desc.btree_id),
                                                       k), buf))) {
                        ret = fsck_hash_delete_at(desc, &h->info, k_iter);
                        if (ret)
@@ -255,7 +254,7 @@ static bool key_has_correct_hash(const struct bch_hash_desc desc,
 {
        u64 hash;
 
-       if (k.k->type != desc.whiteout_type &&
+       if (k.k->type != KEY_TYPE_whiteout &&
            k.k->type != desc.key_type)
                return true;
 
@@ -280,7 +279,7 @@ static int hash_check_key(const struct bch_hash_desc desc,
        u64 hashed;
        int ret = 0;
 
-       if (k.k->type != desc.whiteout_type &&
+       if (k.k->type != KEY_TYPE_whiteout &&
            k.k->type != desc.key_type)
                return 0;
 
@@ -300,7 +299,6 @@ static int hash_check_key(const struct bch_hash_desc desc,
                        desc.btree_id, k.k->p.offset,
                        hashed, h->chain->pos.offset,
                        (bch2_bkey_val_to_text(&PBUF(buf), c,
-                                              bkey_type(0, desc.btree_id),
                                               k), buf))) {
                ret = hash_redo_key(desc, h, c, k_iter, k, hashed);
                if (ret) {
@@ -370,7 +368,7 @@ static int check_dirent_hash(struct hash_check *h, struct bch_fs *c,
 
                *k = bch2_btree_iter_peek(iter);
 
-               BUG_ON(k->k->type != BCH_DIRENT);
+               BUG_ON(k->k->type != KEY_TYPE_dirent);
        }
 err:
 fsck_err:
@@ -385,7 +383,6 @@ err_redo:
                     buf, strlen(buf), BTREE_ID_DIRENTS,
                     k->k->p.offset, hash, h->chain->pos.offset,
                     (bch2_bkey_val_to_text(&PBUF(buf), c,
-                                           bkey_type(0, BTREE_ID_DIRENTS),
                                            *k), buf))) {
                ret = hash_redo_key(bch2_dirent_hash_desc,
                                    h, c, iter, *k, hash);
@@ -471,7 +468,7 @@ static int check_extents(struct bch_fs *c)
 
                if (fsck_err_on(w.have_inode &&
                        !(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
-                       k.k->type != BCH_RESERVATION &&
+                       k.k->type != KEY_TYPE_reservation &&
                        k.k->p.offset > round_up(w.inode.bi_size, PAGE_SIZE) >> 9, c,
                        "extent type %u offset %llu past end of inode %llu, i_size %llu",
                        k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) {
@@ -529,13 +526,11 @@ static int check_dirents(struct bch_fs *c)
                if (fsck_err_on(!w.have_inode, c,
                                "dirent in nonexisting directory:\n%s",
                                (bch2_bkey_val_to_text(&PBUF(buf), c,
-                                                      (enum bkey_type) BTREE_ID_DIRENTS,
                                                       k), buf)) ||
                    fsck_err_on(!S_ISDIR(w.inode.bi_mode), c,
                                "dirent in non directory inode type %u:\n%s",
                                mode_to_type(w.inode.bi_mode),
                                (bch2_bkey_val_to_text(&PBUF(buf), c,
-                                                      (enum bkey_type) BTREE_ID_DIRENTS,
                                                       k), buf))) {
                        ret = bch2_btree_delete_at(iter, 0);
                        if (ret)
@@ -557,7 +552,7 @@ static int check_dirents(struct bch_fs *c)
                if (ret)
                        goto fsck_err;
 
-               if (k.k->type != BCH_DIRENT)
+               if (k.k->type != KEY_TYPE_dirent)
                        continue;
 
                d = bkey_s_c_to_dirent(k);
@@ -586,7 +581,6 @@ static int check_dirents(struct bch_fs *c)
                if (fsck_err_on(d_inum == d.k->p.inode, c,
                                "dirent points to own directory:\n%s",
                                (bch2_bkey_val_to_text(&PBUF(buf), c,
-                                                      (enum bkey_type) BTREE_ID_DIRENTS,
                                                       k), buf))) {
                        ret = remove_dirent(c, iter, d);
                        if (ret)
@@ -604,7 +598,6 @@ static int check_dirents(struct bch_fs *c)
                if (fsck_err_on(!have_target, c,
                                "dirent points to missing inode:\n%s",
                                (bch2_bkey_val_to_text(&PBUF(buf), c,
-                                                      (enum bkey_type) BTREE_ID_DIRENTS,
                                                       k), buf))) {
                        ret = remove_dirent(c, iter, d);
                        if (ret)
@@ -618,7 +611,6 @@ static int check_dirents(struct bch_fs *c)
                                "incorrect d_type: should be %u:\n%s",
                                mode_to_type(target.bi_mode),
                                (bch2_bkey_val_to_text(&PBUF(buf), c,
-                                                      (enum bkey_type) BTREE_ID_DIRENTS,
                                                       k), buf))) {
                        struct bkey_i_dirent *n;
 
@@ -899,7 +891,7 @@ next:
 
                        e->offset = k.k->p.offset;
 
-                       if (k.k->type != BCH_DIRENT)
+                       if (k.k->type != KEY_TYPE_dirent)
                                continue;
 
                        dirent = bkey_s_c_to_dirent(k);
@@ -942,7 +934,7 @@ up:
        }
 
        for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) {
-               if (k.k->type != BCH_INODE_FS)
+               if (k.k->type != KEY_TYPE_inode)
                        continue;
 
                if (!S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode)))
@@ -1030,7 +1022,7 @@ static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links,
 
        for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN, 0, k) {
                switch (k.k->type) {
-               case BCH_DIRENT:
+               case KEY_TYPE_dirent:
                        d = bkey_s_c_to_dirent(k);
                        d_inum = le64_to_cpu(d.v->d_inum);
 
@@ -1310,7 +1302,7 @@ peek_nlinks:      link = genradix_iter_peek(&nlinks_iter, links);
                if (iter.pos.inode < nlinks_pos || !link)
                        link = &zero_links;
 
-               if (k.k && k.k->type == BCH_INODE_FS) {
+               if (k.k && k.k->type == KEY_TYPE_inode) {
                        /*
                         * Avoid potential deadlocks with iter for
                         * truncate/rm/etc.:
@@ -1392,7 +1384,7 @@ static int check_inodes_fast(struct bch_fs *c)
        int ret = 0;
 
        for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) {
-               if (k.k->type != BCH_INODE_FS)
+               if (k.k->type != KEY_TYPE_inode)
                        continue;
 
                inode = bkey_s_c_to_inode(k);
index 0a350c6d0932d336a5d01fe35b22f65f6d6c677b..30f93fbe280d4cd9c3304d58f7c46cb15c578259 100644 (file)
@@ -178,76 +178,69 @@ int bch2_inode_unpack(struct bkey_s_c_inode inode,
 
 const char *bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k)
 {
-       if (k.k->p.offset)
-               return "nonzero offset";
-
-       switch (k.k->type) {
-       case BCH_INODE_FS: {
                struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
                struct bch_inode_unpacked unpacked;
 
-               if (bkey_val_bytes(k.k) < sizeof(struct bch_inode))
-                       return "incorrect value size";
-
-               if (k.k->p.inode < BLOCKDEV_INODE_MAX)
-                       return "fs inode in blockdev range";
+       if (k.k->p.offset)
+               return "nonzero offset";
 
-               if (INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR)
-                       return "invalid str hash type";
+       if (bkey_val_bytes(k.k) < sizeof(struct bch_inode))
+               return "incorrect value size";
 
-               if (bch2_inode_unpack(inode, &unpacked))
-                       return "invalid variable length fields";
+       if (k.k->p.inode < BLOCKDEV_INODE_MAX)
+               return "fs inode in blockdev range";
 
-               if (unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1)
-                       return "invalid data checksum type";
+       if (INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR)
+               return "invalid str hash type";
 
-               if (unpacked.bi_compression >= BCH_COMPRESSION_OPT_NR + 1)
-                       return "invalid data checksum type";
+       if (bch2_inode_unpack(inode, &unpacked))
+               return "invalid variable length fields";
 
-               if ((unpacked.bi_flags & BCH_INODE_UNLINKED) &&
-                   unpacked.bi_nlink != 0)
-                       return "flagged as unlinked but bi_nlink != 0";
+       if (unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1)
+               return "invalid data checksum type";
 
-               return NULL;
-       }
-       case BCH_INODE_BLOCKDEV:
-               if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_blockdev))
-                       return "incorrect value size";
+       if (unpacked.bi_compression >= BCH_COMPRESSION_OPT_NR + 1)
+               return "invalid data checksum type";
 
-               if (k.k->p.inode >= BLOCKDEV_INODE_MAX)
-                       return "blockdev inode in fs range";
+       if ((unpacked.bi_flags & BCH_INODE_UNLINKED) &&
+           unpacked.bi_nlink != 0)
+               return "flagged as unlinked but bi_nlink != 0";
 
-               return NULL;
-       case BCH_INODE_GENERATION:
-               if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_generation))
-                       return "incorrect value size";
-
-               return NULL;
-       default:
-               return "invalid type";
-       }
+       return NULL;
 }
 
 void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c,
                       struct bkey_s_c k)
 {
-       struct bkey_s_c_inode inode;
+       struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
        struct bch_inode_unpacked unpacked;
 
-       switch (k.k->type) {
-       case BCH_INODE_FS:
-               inode = bkey_s_c_to_inode(k);
-               if (bch2_inode_unpack(inode, &unpacked)) {
-                       pr_buf(out, "(unpack error)");
-                       break;
-               }
+       if (bch2_inode_unpack(inode, &unpacked)) {
+               pr_buf(out, "(unpack error)");
+               return;
+       }
 
 #define BCH_INODE_FIELD(_name, _bits)                                          \
-               pr_buf(out, #_name ": %llu ", (u64) unpacked._name);
-               BCH_INODE_FIELDS()
+       pr_buf(out, #_name ": %llu ", (u64) unpacked._name);
+       BCH_INODE_FIELDS()
 #undef  BCH_INODE_FIELD
-               break;
-       }
+}
+
+const char *bch2_inode_generation_invalid(const struct bch_fs *c,
+                                         struct bkey_s_c k)
+{
+       if (k.k->p.offset)
+               return "nonzero offset";
+
+       if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_generation))
+               return "incorrect value size";
+
+       return NULL;
+}
+
+void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c,
+                                  struct bkey_s_c k)
+{
 }
 
 void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
@@ -281,10 +274,9 @@ void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
 static inline u32 bkey_generation(struct bkey_s_c k)
 {
        switch (k.k->type) {
-       case BCH_INODE_BLOCKDEV:
-       case BCH_INODE_FS:
+       case KEY_TYPE_inode:
                BUG();
-       case BCH_INODE_GENERATION:
+       case KEY_TYPE_inode_generation:
                return le32_to_cpu(bkey_s_c_to_inode_generation(k).v->bi_generation);
        default:
                return 0;
@@ -330,8 +322,7 @@ again:
                        return ret;
 
                switch (k.k->type) {
-               case BCH_INODE_BLOCKDEV:
-               case BCH_INODE_FS:
+               case KEY_TYPE_inode:
                        /* slot used */
                        if (iter->pos.inode >= max)
                                goto out;
@@ -405,19 +396,19 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
                        return ret;
                }
 
-               bch2_fs_inconsistent_on(k.k->type != BCH_INODE_FS, c,
+               bch2_fs_inconsistent_on(k.k->type != KEY_TYPE_inode, c,
                                        "inode %llu not found when deleting",
                                        inode_nr);
 
                switch (k.k->type) {
-               case BCH_INODE_FS: {
+               case KEY_TYPE_inode: {
                        struct bch_inode_unpacked inode_u;
 
                        if (!bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u))
                                bi_generation = inode_u.bi_generation + 1;
                        break;
                }
-               case BCH_INODE_GENERATION: {
+               case KEY_TYPE_inode_generation: {
                        struct bkey_s_c_inode_generation g =
                                bkey_s_c_to_inode_generation(k);
                        bi_generation = le32_to_cpu(g.v->bi_generation);
@@ -455,7 +446,7 @@ int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr,
                           POS(inode_nr, 0),
                           BTREE_ITER_SLOTS, k) {
                switch (k.k->type) {
-               case BCH_INODE_FS:
+               case KEY_TYPE_inode:
                        ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode);
                        break;
                default:
@@ -464,7 +455,6 @@ int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr,
                }
 
                break;
-
        }
 
        return bch2_btree_iter_unlock(&iter) ?: ret;
index 897ff65d01cbf1607cd960132e04814c5ce54ba6..0bc852e693550ed587f8b86650bc6fb8a833eb53 100644 (file)
@@ -9,11 +9,21 @@
 const char *bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c);
 void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
 
-#define bch2_bkey_inode_ops (struct bkey_ops) {                \
+#define bch2_bkey_ops_inode (struct bkey_ops) {                \
        .key_invalid    = bch2_inode_invalid,           \
        .val_to_text    = bch2_inode_to_text,           \
 }
 
+const char *bch2_inode_generation_invalid(const struct bch_fs *,
+                                         struct bkey_s_c);
+void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *,
+                                  struct bkey_s_c);
+
+#define bch2_bkey_ops_inode_generation (struct bkey_ops) {     \
+       .key_invalid    = bch2_inode_generation_invalid,        \
+       .val_to_text    = bch2_inode_generation_to_text,        \
+}
+
 struct bch_inode_unpacked {
        u64                     bi_inum;
        __le64                  bi_hash_seed;
index 2fee2f2efd386e705bb983c992c76548ad55a3b6..3e990709fedb610292bebe67be3a6b0da1a1c299 100644 (file)
@@ -213,20 +213,20 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
                               enum bch_data_type type,
                               const struct bkey_i *k)
 {
-       struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k);
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(k));
        const struct bch_extent_ptr *ptr;
        struct bch_write_bio *n;
        struct bch_dev *ca;
 
        BUG_ON(c->opts.nochanges);
 
-       extent_for_each_ptr(e, ptr) {
+       bkey_for_each_ptr(ptrs, ptr) {
                BUG_ON(ptr->dev >= BCH_SB_MEMBERS_MAX ||
                       !c->devs[ptr->dev]);
 
                ca = bch_dev_bkey_exists(c, ptr->dev);
 
-               if (ptr + 1 < &extent_entry_last(e)->ptr) {
+               if (to_entry(ptr + 1) < ptrs.end) {
                        n = to_wbio(bio_alloc_clone(NULL, &wbio->bio,
                                                GFP_NOIO, &ca->replica_set));
 
@@ -317,7 +317,6 @@ static void __bch2_write_index(struct bch_write_op *op)
 {
        struct bch_fs *c = op->c;
        struct keylist *keys = &op->insert_keys;
-       struct bkey_s_extent e;
        struct bch_extent_ptr *ptr;
        struct bkey_i *src, *dst = keys->keys, *n, *k;
        unsigned dev;
@@ -327,12 +326,10 @@ static void __bch2_write_index(struct bch_write_op *op)
                n = bkey_next(src);
                bkey_copy(dst, src);
 
-               e = bkey_i_to_s_extent(dst);
-
-               bch2_extent_drop_ptrs(e, ptr,
+               bch2_bkey_drop_ptrs(bkey_i_to_s(dst), ptr,
                        test_bit(ptr->dev, op->failed.d));
 
-               if (!bch2_extent_nr_ptrs(e.c)) {
+               if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(dst))) {
                        ret = -EIO;
                        goto err;
                }
@@ -433,14 +430,13 @@ static void init_append_extent(struct bch_write_op *op,
        e->k.p          = op->pos;
        e->k.size       = crc.uncompressed_size;
        e->k.version    = version;
-       bkey_extent_set_cached(&e->k, op->flags & BCH_WRITE_CACHED);
 
        if (crc.csum_type ||
            crc.compression_type ||
            crc.nonce)
                bch2_extent_crc_append(e, crc);
 
-       bch2_alloc_sectors_append_ptrs(op->c, wp, e, crc.compressed_size);
+       bch2_alloc_sectors_append_ptrs(op->c, wp, &e->k_i, crc.compressed_size);
 
        bch2_keylist_push(&op->insert_keys);
 }
@@ -1608,7 +1604,7 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
        struct bpos pos = bkey_start_pos(k.k);
        int pick_ret;
 
-       pick_ret = bch2_extent_pick_ptr(c, k, failed, &pick);
+       pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick);
 
        /* hole or reservation - just zero fill: */
        if (!pick_ret)
index a74566764630b95ad94193ee9232827b388d61c7..f3bb28f32c6e56f0576f166ab7a7bfa36aab8f32 100644 (file)
@@ -141,11 +141,12 @@ static void journal_entry_null_range(void *start, void *end)
 
 static int journal_validate_key(struct bch_fs *c, struct jset *jset,
                                struct jset_entry *entry,
-                               struct bkey_i *k, enum bkey_type key_type,
+                               struct bkey_i *k, enum btree_node_type key_type,
                                const char *type, int write)
 {
        void *next = vstruct_next(entry);
        const char *invalid;
+       unsigned version = le32_to_cpu(jset->version);
        int ret = 0;
 
        if (journal_entry_err_on(!k->k.u64s, c,
@@ -174,14 +175,17 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset,
        }
 
        if (JSET_BIG_ENDIAN(jset) != CPU_BIG_ENDIAN)
-               bch2_bkey_swab(key_type, NULL, bkey_to_packed(k));
+               bch2_bkey_swab(NULL, bkey_to_packed(k));
 
-       invalid = bch2_bkey_invalid(c, key_type, bkey_i_to_s_c(k));
+       if (!write &&
+           version < bcachefs_metadata_version_bkey_renumber)
+               bch2_bkey_renumber(key_type, bkey_to_packed(k), write);
+
+       invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(k), key_type);
        if (invalid) {
                char buf[160];
 
-               bch2_bkey_val_to_text(&PBUF(buf), c, key_type,
-                                     bkey_i_to_s_c(k));
+               bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(k));
                mustfix_fsck_err(c, "invalid %s in journal: %s\n%s",
                                 type, invalid, buf);
 
@@ -190,6 +194,10 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset,
                journal_entry_null_range(vstruct_next(entry), next);
                return 0;
        }
+
+       if (write &&
+           version < bcachefs_metadata_version_bkey_renumber)
+               bch2_bkey_renumber(key_type, bkey_to_packed(k), write);
 fsck_err:
        return ret;
 }
@@ -203,8 +211,8 @@ static int journal_entry_validate_btree_keys(struct bch_fs *c,
 
        vstruct_for_each(entry, k) {
                int ret = journal_validate_key(c, jset, entry, k,
-                               bkey_type(entry->level,
-                                         entry->btree_id),
+                               __btree_node_type(entry->level,
+                                                 entry->btree_id),
                                "key", write);
                if (ret)
                        return ret;
@@ -351,14 +359,17 @@ static int jset_validate(struct bch_fs *c,
 {
        size_t bytes = vstruct_bytes(jset);
        struct bch_csum csum;
+       unsigned version;
        int ret = 0;
 
        if (le64_to_cpu(jset->magic) != jset_magic(c))
                return JOURNAL_ENTRY_NONE;
 
-       if (le32_to_cpu(jset->version) != BCACHE_JSET_VERSION) {
-               bch_err(c, "unknown journal entry version %u",
-                       le32_to_cpu(jset->version));
+       version = le32_to_cpu(jset->version);
+       if ((version != BCH_JSET_VERSION_OLD &&
+            version < bcachefs_metadata_version_min) ||
+           version >= bcachefs_metadata_version_max) {
+               bch_err(c, "unknown journal entry version %u", jset->version);
                return BCH_FSCK_UNKNOWN_VERSION;
        }
 
@@ -929,7 +940,6 @@ static void __journal_write_alloc(struct journal *j,
                                  unsigned replicas_want)
 {
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
-       struct bkey_i_extent *e = bkey_i_to_extent(&w->key);
        struct journal_device *ja;
        struct bch_dev *ca;
        unsigned i;
@@ -951,13 +961,14 @@ static void __journal_write_alloc(struct journal *j,
                if (!ca->mi.durability ||
                    ca->mi.state != BCH_MEMBER_STATE_RW ||
                    !ja->nr ||
-                   bch2_extent_has_device(extent_i_to_s_c(e), ca->dev_idx) ||
+                   bch2_bkey_has_device(bkey_i_to_s_c(&w->key),
+                                        ca->dev_idx) ||
                    sectors > ja->sectors_free)
                        continue;
 
                bch2_dev_stripe_increment(c, ca, &j->wp.stripe);
 
-               extent_ptr_append(e,
+               bch2_bkey_append_ptr(&w->key,
                        (struct bch_extent_ptr) {
                                  .offset = bucket_to_sector(ca,
                                        ja->buckets[ja->cur_idx]) +
@@ -1096,7 +1107,7 @@ static void journal_write_done(struct closure *cl)
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        struct journal_buf *w = journal_prev_buf(j);
        struct bch_devs_list devs =
-               bch2_extent_devs(bkey_i_to_s_c_extent(&w->key));
+               bch2_bkey_devs(bkey_i_to_s_c(&w->key));
        u64 seq = le64_to_cpu(w->data->seq);
        u64 last_seq = le64_to_cpu(w->data->last_seq);
 
@@ -1158,7 +1169,7 @@ static void journal_write_endio(struct bio *bio)
                unsigned long flags;
 
                spin_lock_irqsave(&j->err_lock, flags);
-               bch2_extent_drop_device(bkey_i_to_s_extent(&w->key), ca->dev_idx);
+               bch2_bkey_drop_device(bkey_i_to_s(&w->key), ca->dev_idx);
                spin_unlock_irqrestore(&j->err_lock, flags);
        }
 
@@ -1175,6 +1186,7 @@ void bch2_journal_write(struct closure *cl)
        struct jset *jset;
        struct bio *bio;
        struct bch_extent_ptr *ptr;
+       bool validate_before_checksum = false;
        unsigned i, sectors, bytes;
 
        journal_buf_realloc(j, w);
@@ -1196,12 +1208,22 @@ void bch2_journal_write(struct closure *cl)
        jset->read_clock        = cpu_to_le16(c->bucket_clock[READ].hand);
        jset->write_clock       = cpu_to_le16(c->bucket_clock[WRITE].hand);
        jset->magic             = cpu_to_le64(jset_magic(c));
-       jset->version           = cpu_to_le32(BCACHE_JSET_VERSION);
+
+       jset->version           = c->sb.version < bcachefs_metadata_version_new_versioning
+               ? cpu_to_le32(BCH_JSET_VERSION_OLD)
+               : cpu_to_le32(c->sb.version);
 
        SET_JSET_BIG_ENDIAN(jset, CPU_BIG_ENDIAN);
        SET_JSET_CSUM_TYPE(jset, bch2_meta_checksum_type(c));
 
-       if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)) &&
+       if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)))
+               validate_before_checksum = true;
+
+       if (le32_to_cpu(jset->version) <
+           bcachefs_metadata_version_bkey_renumber)
+               validate_before_checksum = true;
+
+       if (validate_before_checksum &&
            jset_validate_entries(c, jset, WRITE))
                goto err;
 
@@ -1212,7 +1234,7 @@ void bch2_journal_write(struct closure *cl)
        jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset),
                                  journal_nonce(jset), jset);
 
-       if (!bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)) &&
+       if (!validate_before_checksum &&
            jset_validate_entries(c, jset, WRITE))
                goto err;
 
index 46878590327d5aeecfea5fdea3fec723230639b9..63fe8cbb0564e902e8bfce9d2d82adc6bc22051d 100644 (file)
@@ -15,7 +15,7 @@
 #include "replicas.h"
 #include "super-io.h"
 
-static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s_extent e,
+static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k,
                         unsigned dev_idx, int flags, bool metadata)
 {
        unsigned replicas = metadata ? c->opts.metadata_replicas : c->opts.data_replicas;
@@ -23,9 +23,9 @@ static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s_extent e,
        unsigned degraded = metadata ? BCH_FORCE_IF_METADATA_DEGRADED : BCH_FORCE_IF_DATA_DEGRADED;
        unsigned nr_good;
 
-       bch2_extent_drop_device(e, dev_idx);
+       bch2_bkey_drop_device(k, dev_idx);
 
-       nr_good = bch2_extent_durability(c, e.c);
+       nr_good = bch2_bkey_durability(c, k.s_c);
        if ((!nr_good && !(flags & lost)) ||
            (nr_good < replicas && !(flags & degraded)))
                return -EINVAL;
@@ -36,7 +36,6 @@ static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s_extent e,
 static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
 {
        struct bkey_s_c k;
-       struct bkey_s_extent e;
        BKEY_PADDED(key) tmp;
        struct btree_iter iter;
        int ret = 0;
@@ -51,7 +50,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
               !(ret = btree_iter_err(k))) {
                if (!bkey_extent_is_data(k.k) ||
                    !bch2_extent_has_device(bkey_s_c_to_extent(k), dev_idx)) {
-                       ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS, k);
+                       ret = bch2_mark_bkey_replicas(c, k);
                        if (ret)
                                break;
                        bch2_btree_iter_next(&iter);
@@ -59,18 +58,18 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
                }
 
                bkey_reassemble(&tmp.key, k);
-               e = bkey_i_to_s_extent(&tmp.key);
 
-               ret = drop_dev_ptrs(c, e, dev_idx, flags, false);
+               ret = drop_dev_ptrs(c, bkey_i_to_s(&tmp.key),
+                                   dev_idx, flags, false);
                if (ret)
                        break;
 
                /*
                 * If the new extent no longer has any pointers, bch2_extent_normalize()
                 * will do the appropriate thing with it (turning it into a
-                * KEY_TYPE_ERROR key, or just a discard if it was a cached extent)
+                * KEY_TYPE_error key, or just a discard if it was a cached extent)
                 */
-               bch2_extent_normalize(c, e.s);
+               bch2_extent_normalize(c, bkey_i_to_s(&tmp.key));
 
                iter.pos = bkey_start_pos(&tmp.key.k);
 
@@ -118,10 +117,10 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
        for (id = 0; id < BTREE_ID_NR; id++) {
                for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
                        __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
-                       struct bkey_i_extent *new_key;
+                       struct bkey_i_btree_ptr *new_key;
 retry:
-                       if (!bch2_extent_has_device(bkey_i_to_s_c_extent(&b->key),
-                                                   dev_idx)) {
+                       if (!bch2_bkey_has_device(bkey_i_to_s_c(&b->key),
+                                                 dev_idx)) {
                                /*
                                 * we might have found a btree node key we
                                 * needed to update, and then tried to update it
@@ -130,15 +129,14 @@ retry:
                                 */
                                bch2_btree_iter_downgrade(&iter);
 
-                               ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE,
-                                                             bkey_i_to_s_c(&b->key));
+                               ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key));
                                if (ret)
                                        goto err;
                        } else {
                                bkey_copy(&tmp.k, &b->key);
-                               new_key = bkey_i_to_extent(&tmp.k);
+                               new_key = bkey_i_to_btree_ptr(&tmp.k);
 
-                               ret = drop_dev_ptrs(c, extent_i_to_s(new_key),
+                               ret = drop_dev_ptrs(c, bkey_i_to_s(&new_key->k_i),
                                                    dev_idx, flags, true);
                                if (ret)
                                        goto err;
index 7de3c6c475beb5e7551ed2f77755155a23ed7936..aff611c908ef124c0e77d396969fe417c7031ca0 100644 (file)
@@ -100,8 +100,8 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
                bch2_cut_back(insert->k.p, &new->k);
 
                if (m->data_cmd == DATA_REWRITE)
-                       bch2_extent_drop_device(extent_i_to_s(insert),
-                                               m->data_opts.rewrite_dev);
+                       bch2_bkey_drop_device(extent_i_to_s(insert).s,
+                                             m->data_opts.rewrite_dev);
 
                extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) {
                        if (bch2_extent_has_device(extent_i_to_s_c(insert), p.ptr.dev)) {
@@ -132,8 +132,8 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
                 * has fewer replicas than when we last looked at it - meaning
                 * we need to get a disk reservation here:
                 */
-               nr = bch2_extent_nr_dirty_ptrs(bkey_i_to_s_c(&insert->k_i)) -
-                       (bch2_extent_nr_dirty_ptrs(k) + m->nr_ptrs_reserved);
+               nr = bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(&insert->k_i)) -
+                       (bch2_bkey_nr_dirty_ptrs(k) + m->nr_ptrs_reserved);
                if (nr > 0) {
                        /*
                         * can't call bch2_disk_reservation_add() with btree
@@ -243,7 +243,7 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m,
        switch (data_cmd) {
        case DATA_ADD_REPLICAS: {
                int nr = (int) io_opts.data_replicas -
-                       bch2_extent_nr_dirty_ptrs(k);
+                       bch2_bkey_nr_dirty_ptrs(k);
 
                if (nr > 0) {
                        m->op.nr_replicas = m->nr_ptrs_reserved = nr;
@@ -477,7 +477,6 @@ int bch2_move_data(struct bch_fs *c,
        struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
        BKEY_PADDED(k) tmp;
        struct bkey_s_c k;
-       struct bkey_s_c_extent e;
        struct data_opts data_opts;
        enum data_cmd data_cmd;
        u64 delay, cur_inum = U64_MAX;
@@ -530,8 +529,6 @@ peek:
                if (!bkey_extent_is_data(k.k))
                        goto next_nondata;
 
-               e = bkey_s_c_to_extent(k);
-
                if (cur_inum != k.k->p.inode) {
                        struct bch_inode_unpacked inode;
 
@@ -545,8 +542,7 @@ peek:
                        goto peek;
                }
 
-               switch ((data_cmd = pred(c, arg, BKEY_TYPE_EXTENTS, e,
-                                        &io_opts, &data_opts))) {
+               switch ((data_cmd = pred(c, arg, k, &io_opts, &data_opts))) {
                case DATA_SKIP:
                        goto next;
                case DATA_SCRUB:
@@ -581,7 +577,7 @@ peek:
                if (rate)
                        bch2_ratelimit_increment(rate, k.k->size);
 next:
-               atomic64_add(k.k->size * bch2_extent_nr_dirty_ptrs(k),
+               atomic64_add(k.k->size * bch2_bkey_nr_dirty_ptrs(k),
                             &stats->sectors_seen);
 next_nondata:
                bch2_btree_iter_next(&stats->iter);
@@ -613,7 +609,7 @@ static int bch2_gc_data_replicas(struct bch_fs *c)
 
        for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN,
                           BTREE_ITER_PREFETCH, k) {
-               ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS, k);
+               ret = bch2_mark_bkey_replicas(c, k);
                if (ret)
                        break;
        }
@@ -637,8 +633,7 @@ static int bch2_gc_btree_replicas(struct bch_fs *c)
 
        for (id = 0; id < BTREE_ID_NR; id++) {
                for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
-                       ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE,
-                                                     bkey_i_to_s_c(&b->key));
+                       ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key));
 
                        bch2_btree_iter_cond_resched(&iter);
                }
@@ -668,10 +663,9 @@ static int bch2_move_btree(struct bch_fs *c,
 
        for (id = 0; id < BTREE_ID_NR; id++) {
                for_each_btree_node(&stats->iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
-                       switch ((cmd = pred(c, arg, BKEY_TYPE_BTREE,
-                                           bkey_i_to_s_c_extent(&b->key),
-                                           &io_opts,
-                                           &data_opts))) {
+                       switch ((cmd = pred(c, arg,
+                                           bkey_i_to_s_c(&b->key),
+                                           &io_opts, &data_opts))) {
                        case DATA_SKIP:
                                goto next;
                        case DATA_SCRUB:
@@ -697,8 +691,7 @@ next:
 
 #if 0
 static enum data_cmd scrub_pred(struct bch_fs *c, void *arg,
-                               enum bkey_type type,
-                               struct bkey_s_c_extent e,
+                               struct bkey_s_c k,
                                struct bch_io_opts *io_opts,
                                struct data_opts *data_opts)
 {
@@ -707,33 +700,38 @@ static enum data_cmd scrub_pred(struct bch_fs *c, void *arg,
 #endif
 
 static enum data_cmd rereplicate_pred(struct bch_fs *c, void *arg,
-                                     enum bkey_type type,
-                                     struct bkey_s_c_extent e,
+                                     struct bkey_s_c k,
                                      struct bch_io_opts *io_opts,
                                      struct data_opts *data_opts)
 {
-       unsigned nr_good = bch2_extent_durability(c, e);
-       unsigned replicas = type == BKEY_TYPE_BTREE
-               ? c->opts.metadata_replicas
-               : io_opts->data_replicas;
+       unsigned nr_good = bch2_bkey_durability(c, k);
+       unsigned replicas = 0;
+
+       switch (k.k->type) {
+       case KEY_TYPE_btree_ptr:
+               replicas = c->opts.metadata_replicas;
+               break;
+       case KEY_TYPE_extent:
+               replicas = io_opts->data_replicas;
+               break;
+       }
 
        if (!nr_good || nr_good >= replicas)
                return DATA_SKIP;
 
        data_opts->target               = 0;
-       data_opts->btree_insert_flags = 0;
+       data_opts->btree_insert_flags   = 0;
        return DATA_ADD_REPLICAS;
 }
 
 static enum data_cmd migrate_pred(struct bch_fs *c, void *arg,
-                                 enum bkey_type type,
-                                 struct bkey_s_c_extent e,
+                                 struct bkey_s_c k,
                                  struct bch_io_opts *io_opts,
                                  struct data_opts *data_opts)
 {
        struct bch_ioctl_data *op = arg;
 
-       if (!bch2_extent_has_device(e, op->migrate.dev))
+       if (!bch2_bkey_has_device(k, op->migrate.dev))
                return DATA_SKIP;
 
        data_opts->target               = 0;
index 3f7e31cc8f6e3b8f485090ad08c04ab7d1454bbb..71b3d2b2ddb6ddbcc1ef744a5e00676578563336 100644 (file)
@@ -47,7 +47,7 @@ int bch2_migrate_write_init(struct bch_fs *, struct migrate_write *,
                            struct bkey_s_c);
 
 typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *,
-                               enum bkey_type, struct bkey_s_c_extent,
+                               struct bkey_s_c,
                                struct bch_io_opts *, struct data_opts *);
 
 int bch2_move_data(struct bch_fs *, struct bch_ratelimit *,
index 80577661e0085c2241780e2869acd92f5d18a18a..4bf4cc33dbb1e55c506e000df06954276842cb26 100644 (file)
@@ -66,36 +66,42 @@ static int bucket_offset_cmp(const void *_l, const void *_r, size_t size)
 }
 
 static bool __copygc_pred(struct bch_dev *ca,
-                         struct bkey_s_c_extent e)
+                         struct bkey_s_c k)
 {
        copygc_heap *h = &ca->copygc_heap;
-       const struct bch_extent_ptr *ptr =
-               bch2_extent_has_device(e, ca->dev_idx);
 
-       if (ptr) {
-               struct copygc_heap_entry search = { .offset = ptr->offset };
+       switch (k.k->type) {
+       case KEY_TYPE_extent: {
+               struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
+               const struct bch_extent_ptr *ptr =
+                       bch2_extent_has_device(e, ca->dev_idx);
 
-               ssize_t i = eytzinger0_find_le(h->data, h->used,
-                                              sizeof(h->data[0]),
-                                              bucket_offset_cmp, &search);
+               if (ptr) {
+                       struct copygc_heap_entry search = { .offset = ptr->offset };
 
-               return (i >= 0 &&
-                       ptr->offset < h->data[i].offset + ca->mi.bucket_size &&
-                       ptr->gen == h->data[i].gen);
+                       ssize_t i = eytzinger0_find_le(h->data, h->used,
+                                                      sizeof(h->data[0]),
+                                                      bucket_offset_cmp, &search);
+
+                       return (i >= 0 &&
+                               ptr->offset < h->data[i].offset + ca->mi.bucket_size &&
+                               ptr->gen == h->data[i].gen);
+               }
+               break;
+       }
        }
 
        return false;
 }
 
 static enum data_cmd copygc_pred(struct bch_fs *c, void *arg,
-                                enum bkey_type type,
-                                struct bkey_s_c_extent e,
+                                struct bkey_s_c k,
                                 struct bch_io_opts *io_opts,
                                 struct data_opts *data_opts)
 {
        struct bch_dev *ca = arg;
 
-       if (!__copygc_pred(ca, e))
+       if (!__copygc_pred(ca, k))
                return DATA_SKIP;
 
        data_opts->target               = dev_to_target(ca->dev_idx);
index 80869e34e3b6d8aacb5359f763d6759a904fa5ea..acdc952c48becfd18f67fc563a682d4408fd11b9 100644 (file)
@@ -184,6 +184,9 @@ enum opt_type {
                OPT_BOOL(),                                             \
                NO_SB_OPT,                      false)                  \
        BCH_OPT(no_data_io,             u8,     OPT_MOUNT,              \
+               OPT_BOOL(),                                             \
+               NO_SB_OPT,                      false)                  \
+       BCH_OPT(version_upgrade,        u8,     OPT_MOUNT,              \
                OPT_BOOL(),                                             \
                NO_SB_OPT,                      false)
 
index cc20742d542b973e824828ca92e998280a01662a..7c38daac1cac2dde60ad20666b29cf340a3a2922 100644 (file)
@@ -22,23 +22,13 @@ const struct bch_sb_field_ops bch_sb_field_ops_quota = {
 
 const char *bch2_quota_invalid(const struct bch_fs *c, struct bkey_s_c k)
 {
-       struct bkey_s_c_quota dq;
-
        if (k.k->p.inode >= QTYP_NR)
                return "invalid quota type";
 
-       switch (k.k->type) {
-       case BCH_QUOTA: {
-               dq = bkey_s_c_to_quota(k);
+       if (bkey_val_bytes(k.k) != sizeof(struct bch_quota))
+               return "incorrect value size";
 
-               if (bkey_val_bytes(k.k) != sizeof(struct bch_quota))
-                       return "incorrect value size";
-
-               return NULL;
-       }
-       default:
-               return "invalid type";
-       }
+       return NULL;
 }
 
 static const char * const bch2_quota_counters[] = {
@@ -49,20 +39,14 @@ static const char * const bch2_quota_counters[] = {
 void bch2_quota_to_text(struct printbuf *out, struct bch_fs *c,
                        struct bkey_s_c k)
 {
-       struct bkey_s_c_quota dq;
+       struct bkey_s_c_quota dq = bkey_s_c_to_quota(k);
        unsigned i;
 
-       switch (k.k->type) {
-       case BCH_QUOTA:
-               dq = bkey_s_c_to_quota(k);
-
-               for (i = 0; i < Q_COUNTERS; i++)
-                       pr_buf(out, "%s hardlimit %llu softlimit %llu",
-                              bch2_quota_counters[i],
-                              le64_to_cpu(dq.v->c[i].hardlimit),
-                              le64_to_cpu(dq.v->c[i].softlimit));
-               break;
-       }
+       for (i = 0; i < Q_COUNTERS; i++)
+               pr_buf(out, "%s hardlimit %llu softlimit %llu",
+                      bch2_quota_counters[i],
+                      le64_to_cpu(dq.v->c[i].hardlimit),
+                      le64_to_cpu(dq.v->c[i].softlimit));
 }
 
 #ifdef CONFIG_BCACHEFS_QUOTA
@@ -178,7 +162,7 @@ static int bch2_quota_check_limit(struct bch_fs *c,
 
        BUG_ON((s64) n < 0);
 
-       if (mode == BCH_QUOTA_NOCHECK)
+       if (mode == KEY_TYPE_QUOTA_NOCHECK)
                return 0;
 
        if (v <= 0) {
@@ -201,7 +185,7 @@ static int bch2_quota_check_limit(struct bch_fs *c,
        if (qc->hardlimit &&
            qc->hardlimit < n &&
            !ignore_hardlimit(q)) {
-               if (mode == BCH_QUOTA_PREALLOC)
+               if (mode == KEY_TYPE_QUOTA_PREALLOC)
                        return -EDQUOT;
 
                prepare_warning(qc, qtype, counter, msgs, HARDWARN);
@@ -212,7 +196,7 @@ static int bch2_quota_check_limit(struct bch_fs *c,
            qc->timer &&
            ktime_get_real_seconds() >= qc->timer &&
            !ignore_hardlimit(q)) {
-               if (mode == BCH_QUOTA_PREALLOC)
+               if (mode == KEY_TYPE_QUOTA_PREALLOC)
                        return -EDQUOT;
 
                prepare_warning(qc, qtype, counter, msgs, SOFTLONGWARN);
@@ -221,7 +205,7 @@ static int bch2_quota_check_limit(struct bch_fs *c,
        if (qc->softlimit &&
            qc->softlimit < n &&
            qc->timer == 0) {
-               if (mode == BCH_QUOTA_PREALLOC)
+               if (mode == KEY_TYPE_QUOTA_PREALLOC)
                        return -EDQUOT;
 
                prepare_warning(qc, qtype, counter, msgs, SOFTWARN);
@@ -312,13 +296,13 @@ int bch2_quota_transfer(struct bch_fs *c, unsigned qtypes,
 
                ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_SPC,
                                             dst_q[i]->c[Q_SPC].v + space,
-                                            BCH_QUOTA_PREALLOC);
+                                            KEY_TYPE_QUOTA_PREALLOC);
                if (ret)
                        goto err;
 
                ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_INO,
                                             dst_q[i]->c[Q_INO].v + 1,
-                                            BCH_QUOTA_PREALLOC);
+                                            KEY_TYPE_QUOTA_PREALLOC);
                if (ret)
                        goto err;
        }
@@ -347,7 +331,7 @@ static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k)
        BUG_ON(k.k->p.inode >= QTYP_NR);
 
        switch (k.k->type) {
-       case BCH_QUOTA:
+       case KEY_TYPE_quota:
                dq = bkey_s_c_to_quota(k);
                q = &c->quotas[k.k->p.inode];
 
@@ -447,15 +431,15 @@ int bch2_fs_quota_read(struct bch_fs *c)
        for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN,
                           BTREE_ITER_PREFETCH, k) {
                switch (k.k->type) {
-               case BCH_INODE_FS:
+               case KEY_TYPE_inode:
                        ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &u);
                        if (ret)
                                return ret;
 
                        bch2_quota_acct(c, bch_qid(&u), Q_SPC, u.bi_sectors,
-                                       BCH_QUOTA_NOCHECK);
+                                       KEY_TYPE_QUOTA_NOCHECK);
                        bch2_quota_acct(c, bch_qid(&u), Q_INO, 1,
-                                       BCH_QUOTA_NOCHECK);
+                                       KEY_TYPE_QUOTA_NOCHECK);
                }
        }
        return bch2_btree_iter_unlock(&iter) ?: ret;
@@ -743,7 +727,7 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid,
                return ret;
 
        switch (k.k->type) {
-       case BCH_QUOTA:
+       case KEY_TYPE_quota:
                new_quota.v = *bkey_s_c_to_quota(k).v;
                break;
        }
index 9c06eb07bccb943e3c404b08c6371fe75790e467..294a04db84bf4e52395af855f377370b79a8f8c3 100644 (file)
@@ -10,15 +10,15 @@ extern const struct bch_sb_field_ops bch_sb_field_ops_quota;
 const char *bch2_quota_invalid(const struct bch_fs *, struct bkey_s_c);
 void bch2_quota_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
 
-#define bch2_bkey_quota_ops (struct bkey_ops) {                \
+#define bch2_bkey_ops_quota (struct bkey_ops) {                \
        .key_invalid    = bch2_quota_invalid,           \
        .val_to_text    = bch2_quota_to_text,           \
 }
 
 enum quota_acct_mode {
-       BCH_QUOTA_PREALLOC,
-       BCH_QUOTA_WARN,
-       BCH_QUOTA_NOCHECK,
+       KEY_TYPE_QUOTA_PREALLOC,
+       KEY_TYPE_QUOTA_WARN,
+       KEY_TYPE_QUOTA_NOCHECK,
 };
 
 static inline struct bch_qid bch_qid(struct bch_inode_unpacked *u)
index 5d246c5b818601273ca388a6db5031f0dbdfc7f4..eec74d4a5712686c463a6dc51f734bd3222ac5ba 100644 (file)
@@ -70,28 +70,34 @@ void bch2_rebalance_add_work(struct bch_fs *c, u64 sectors)
 }
 
 static enum data_cmd rebalance_pred(struct bch_fs *c, void *arg,
-                                   enum bkey_type type,
-                                   struct bkey_s_c_extent e,
+                                   struct bkey_s_c k,
                                    struct bch_io_opts *io_opts,
                                    struct data_opts *data_opts)
 {
-       const union bch_extent_entry *entry;
-       struct extent_ptr_decoded p;
+       switch (k.k->type) {
+       case KEY_TYPE_extent: {
+               struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
+               const union bch_extent_entry *entry;
+               struct extent_ptr_decoded p;
 
-       /* Make sure we have room to add a new pointer: */
-       if (bkey_val_u64s(e.k) + BKEY_EXTENT_PTR_U64s_MAX >
-           BKEY_EXTENT_VAL_U64s_MAX)
-               return DATA_SKIP;
+               /* Make sure we have room to add a new pointer: */
+               if (bkey_val_u64s(e.k) + BKEY_EXTENT_PTR_U64s_MAX >
+                   BKEY_EXTENT_VAL_U64s_MAX)
+                       return DATA_SKIP;
 
-       extent_for_each_ptr_decode(e, p, entry)
-               if (rebalance_ptr_pred(c, p, io_opts))
-                       goto found;
+               extent_for_each_ptr_decode(e, p, entry)
+                       if (rebalance_ptr_pred(c, p, io_opts))
+                               goto found;
 
-       return DATA_SKIP;
+               return DATA_SKIP;
 found:
-       data_opts->target               = io_opts->background_target;
-       data_opts->btree_insert_flags   = 0;
-       return DATA_ADD_REPLICAS;
+               data_opts->target               = io_opts->background_target;
+               data_opts->btree_insert_flags   = 0;
+               return DATA_ADD_REPLICAS;
+       }
+       default:
+               return DATA_SKIP;
+       }
 }
 
 struct rebalance_work {
index 1cb0c9940ec1867cc26d6b027594c3a8459330c1..17277060629447af936f07c7bc17dcc597c0695f 100644 (file)
@@ -147,6 +147,10 @@ int bch2_fs_recovery(struct bch_fs *c)
                        mutex_unlock(&c->sb_lock);
                        goto err;
                }
+
+               if (le16_to_cpu(c->disk_sb.sb->version) <
+                   bcachefs_metadata_version_bkey_renumber)
+                       bch2_sb_clean_renumber(clean, READ);
        }
        mutex_unlock(&c->sb_lock);
 
@@ -265,12 +269,18 @@ int bch2_fs_recovery(struct bch_fs *c)
        if (ret)
                goto err;
 
-       if (!test_bit(BCH_FS_FSCK_UNFIXED_ERRORS, &c->flags)) {
-               mutex_lock(&c->sb_lock);
-               c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK;
-               mutex_unlock(&c->sb_lock);
+       mutex_lock(&c->sb_lock);
+       if (c->opts.version_upgrade) {
+               if (c->sb.version < bcachefs_metadata_version_new_versioning)
+                       c->disk_sb.sb->version_min =
+                               le16_to_cpu(bcachefs_metadata_version_min);
+               c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current);
        }
 
+       if (!test_bit(BCH_FS_FSCK_UNFIXED_ERRORS, &c->flags))
+               c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK;
+       mutex_unlock(&c->sb_lock);
+
        if (enabled_qtypes(c)) {
                bch_verbose(c, "reading quotas:");
                ret = bch2_fs_quota_read(c);
@@ -379,9 +389,12 @@ int bch2_fs_initialize(struct bch_fs *c)
                goto err;
 
        mutex_lock(&c->sb_lock);
+       c->disk_sb.sb->version = c->disk_sb.sb->version_min =
+               le16_to_cpu(bcachefs_metadata_version_current);
+       c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK;
+
        SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true);
        SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
-       c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK;
 
        bch2_write_super(c);
        mutex_unlock(&c->sb_lock);
index 0296931b6b8c68686f23dbbf7955b2afabe9cc8c..77d175f34b2b2eddd0a515990128ed86f5911ab0 100644 (file)
@@ -73,64 +73,57 @@ void bch2_cpu_replicas_to_text(struct printbuf *out,
 static void extent_to_replicas(struct bkey_s_c k,
                               struct bch_replicas_entry *r)
 {
-       if (bkey_extent_is_data(k.k)) {
-               struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-               const union bch_extent_entry *entry;
-               struct extent_ptr_decoded p;
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const union bch_extent_entry *entry;
+       struct extent_ptr_decoded p;
 
-               r->nr_required  = 1;
+       r->nr_required  = 1;
 
-               extent_for_each_ptr_decode(e, p, entry) {
-                       if (p.ptr.cached)
-                               continue;
+       bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+               if (p.ptr.cached)
+                       continue;
 
-                       if (p.ec_nr) {
-                               r->nr_devs = 0;
-                               break;
-                       }
-
-                       r->devs[r->nr_devs++] = p.ptr.dev;
+               if (p.ec_nr) {
+                       r->nr_devs = 0;
+                       break;
                }
+
+               r->devs[r->nr_devs++] = p.ptr.dev;
        }
 }
 
 static void stripe_to_replicas(struct bkey_s_c k,
                               struct bch_replicas_entry *r)
 {
-       if (k.k->type == BCH_STRIPE) {
-               struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
-               const struct bch_extent_ptr *ptr;
+       struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
+       const struct bch_extent_ptr *ptr;
 
-               r->nr_required  = s.v->nr_blocks - s.v->nr_redundant;
+       r->nr_required  = s.v->nr_blocks - s.v->nr_redundant;
 
-               for (ptr = s.v->ptrs;
-                    ptr < s.v->ptrs + s.v->nr_blocks;
-                    ptr++)
-                       r->devs[r->nr_devs++] = ptr->dev;
-       }
+       for (ptr = s.v->ptrs;
+            ptr < s.v->ptrs + s.v->nr_blocks;
+            ptr++)
+               r->devs[r->nr_devs++] = ptr->dev;
 }
 
-static void bkey_to_replicas(enum bkey_type type,
-                            struct bkey_s_c k,
+static void bkey_to_replicas(struct bkey_s_c k,
                             struct bch_replicas_entry *e)
 {
        e->nr_devs = 0;
 
-       switch (type) {
-       case BKEY_TYPE_BTREE:
+       switch (k.k->type) {
+       case KEY_TYPE_btree_ptr:
                e->data_type = BCH_DATA_BTREE;
                extent_to_replicas(k, e);
                break;
-       case BKEY_TYPE_EXTENTS:
+       case KEY_TYPE_extent:
                e->data_type = BCH_DATA_USER;
                extent_to_replicas(k, e);
                break;
-       case BKEY_TYPE_EC:
+       case KEY_TYPE_stripe:
                e->data_type = BCH_DATA_USER;
                stripe_to_replicas(k, e);
                break;
-       default:
-               break;
        }
 
        replicas_entry_sort(e);
@@ -296,26 +289,21 @@ int bch2_mark_replicas(struct bch_fs *c,
        return __bch2_mark_replicas(c, &search.e);
 }
 
-int bch2_mark_bkey_replicas(struct bch_fs *c,
-                           enum bkey_type type,
-                           struct bkey_s_c k)
+int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
 {
        struct bch_replicas_entry_padded search;
+       struct bch_devs_list cached = bch2_bkey_cached_devs(k);
+       unsigned i;
        int ret;
 
        memset(&search, 0, sizeof(search));
 
-       if (type == BKEY_TYPE_EXTENTS) {
-               struct bch_devs_list cached = bch2_bkey_cached_devs(k);
-               unsigned i;
+       for (i = 0; i < cached.nr; i++)
+               if ((ret = bch2_mark_replicas(c, BCH_DATA_CACHED,
+                                             bch2_dev_list_single(cached.devs[i]))))
+                       return ret;
 
-               for (i = 0; i < cached.nr; i++)
-                       if ((ret = bch2_mark_replicas(c, BCH_DATA_CACHED,
-                                               bch2_dev_list_single(cached.devs[i]))))
-                               return ret;
-       }
-
-       bkey_to_replicas(type, k, &search.e);
+       bkey_to_replicas(k, &search.e);
 
        return search.e.nr_devs
                ? __bch2_mark_replicas(c, &search.e)
@@ -719,26 +707,22 @@ bool bch2_replicas_marked(struct bch_fs *c,
 }
 
 bool bch2_bkey_replicas_marked(struct bch_fs *c,
-                              enum bkey_type type,
                               struct bkey_s_c k,
                               bool check_gc_replicas)
 {
        struct bch_replicas_entry_padded search;
+       struct bch_devs_list cached = bch2_bkey_cached_devs(k);
+       unsigned i;
 
        memset(&search, 0, sizeof(search));
 
-       if (type == BKEY_TYPE_EXTENTS) {
-               struct bch_devs_list cached = bch2_bkey_cached_devs(k);
-               unsigned i;
-
-               for (i = 0; i < cached.nr; i++)
-                       if (!bch2_replicas_marked(c, BCH_DATA_CACHED,
-                                       bch2_dev_list_single(cached.devs[i]),
-                                       check_gc_replicas))
-                               return false;
-       }
+       for (i = 0; i < cached.nr; i++)
+               if (!bch2_replicas_marked(c, BCH_DATA_CACHED,
+                                         bch2_dev_list_single(cached.devs[i]),
+                                         check_gc_replicas))
+                       return false;
 
-       bkey_to_replicas(type, k, &search.e);
+       bkey_to_replicas(k, &search.e);
 
        return search.e.nr_devs
                ? replicas_has_entry(c, &search.e, check_gc_replicas)
index e22d2d7cd08a5dfd4221245808856a3d958328ce..03aaafdc7c170990bcdf0b7e10a2cb6d3133d5ac 100644 (file)
@@ -6,12 +6,11 @@
 
 bool bch2_replicas_marked(struct bch_fs *, enum bch_data_type,
                          struct bch_devs_list, bool);
-bool bch2_bkey_replicas_marked(struct bch_fs *, enum bkey_type,
+bool bch2_bkey_replicas_marked(struct bch_fs *,
                               struct bkey_s_c, bool);
 int bch2_mark_replicas(struct bch_fs *, enum bch_data_type,
                       struct bch_devs_list);
-int bch2_mark_bkey_replicas(struct bch_fs *, enum bkey_type,
-                           struct bkey_s_c);
+int bch2_mark_bkey_replicas(struct bch_fs *, struct bkey_s_c);
 
 void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *);
 
index fbd6c3372677655a04cc8c82ccc70b8a1a01e749..6f30fbe44eb8a74af977ecc6a37150b6de0aeae9 100644 (file)
@@ -118,7 +118,6 @@ static inline u64 bch2_str_hash_end(struct bch_str_hash_ctx *ctx,
 struct bch_hash_desc {
        enum btree_id   btree_id;
        u8              key_type;
-       u8              whiteout_type;
 
        u64             (*hash_key)(const struct bch_hash_info *, const void *);
        u64             (*hash_bkey)(const struct bch_hash_info *, struct bkey_s_c);
@@ -149,7 +148,7 @@ bch2_hash_lookup(struct btree_trans *trans,
                if (k.k->type == desc.key_type) {
                        if (!desc.cmp_key(k, key))
                                return iter;
-               } else if (k.k->type == desc.whiteout_type) {
+               } else if (k.k->type == KEY_TYPE_whiteout) {
                        ;
                } else {
                        /* hole, not found */
@@ -202,7 +201,7 @@ static inline int bch2_hash_needs_whiteout(struct btree_trans *trans,
 
        for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k) {
                if (k.k->type != desc.key_type &&
-                   k.k->type != desc.whiteout_type)
+                   k.k->type != KEY_TYPE_whiteout)
                        return false;
 
                if (k.k->type == desc.key_type &&
@@ -245,7 +244,7 @@ static inline int __bch2_hash_set(struct btree_trans *trans,
                                return PTR_ERR(slot);
                }
 
-               if (k.k->type != desc.whiteout_type)
+               if (k.k->type != KEY_TYPE_whiteout)
                        goto not_found;
        }
 
@@ -295,7 +294,7 @@ static inline int bch2_hash_delete_at(struct btree_trans *trans,
 
        bkey_init(&delete->k);
        delete->k.p = iter->pos;
-       delete->k.type = ret ? desc.whiteout_type : KEY_TYPE_DELETED;
+       delete->k.type = ret ? KEY_TYPE_whiteout : KEY_TYPE_deleted;
 
        bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, delete));
        return 0;
index 3dbcb6d7d261c2cd5c32a6dede39dfb1aca5843a..dafdc45b442cc7ac40f74e4447ab2a97dafecdbc 100644 (file)
@@ -240,21 +240,25 @@ const char *bch2_sb_validate(struct bch_sb_handle *disk_sb)
        struct bch_sb_field *f;
        struct bch_sb_field_members *mi;
        const char *err;
+       u32 version, version_min;
        u16 block_size;
 
-       if (le16_to_cpu(sb->version) < BCH_SB_VERSION_MIN ||
-           le16_to_cpu(sb->version) > BCH_SB_VERSION_MAX)
+       version         = le16_to_cpu(sb->version);
+       version_min     = version >= bcachefs_metadata_version_new_versioning
+               ? le16_to_cpu(sb->version_min)
+               : version;
+
+       if (version    >= bcachefs_metadata_version_max ||
+           version_min < bcachefs_metadata_version_min)
                return "Unsupported superblock version";
 
+       if (version_min > version)
+               return "Bad minimum version";
+
        if (sb->features[1] ||
            (le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR)))
                return "Filesystem has incompatible features";
 
-       if (le16_to_cpu(sb->version) < BCH_SB_VERSION_EXTENT_MAX) {
-               SET_BCH_SB_ENCODED_EXTENT_MAX_BITS(sb, 7);
-               SET_BCH_SB_POSIX_ACL(sb, 1);
-       }
-
        block_size = le16_to_cpu(sb->block_size);
 
        if (!is_power_of_2(block_size) ||
@@ -341,13 +345,6 @@ const char *bch2_sb_validate(struct bch_sb_handle *disk_sb)
                        return err;
        }
 
-       if (le16_to_cpu(sb->version) < BCH_SB_VERSION_EXTENT_NONCE_V1 &&
-           bch2_sb_get_crypt(sb) &&
-           BCH_SB_INITIALIZED(sb))
-               return "Incompatible extent nonces";
-
-       sb->version = cpu_to_le16(BCH_SB_VERSION_MAX);
-
        return NULL;
 }
 
@@ -364,6 +361,7 @@ static void bch2_sb_update(struct bch_fs *c)
 
        c->sb.uuid              = src->uuid;
        c->sb.user_uuid         = src->user_uuid;
+       c->sb.version           = le16_to_cpu(src->version);
        c->sb.nr_devices        = src->nr_devices;
        c->sb.clean             = BCH_SB_CLEAN(src);
        c->sb.encryption_type   = BCH_SB_ENCRYPTION_TYPE(src);
@@ -385,6 +383,7 @@ static void __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src)
        unsigned i;
 
        dst->version            = src->version;
+       dst->version_min        = src->version_min;
        dst->seq                = src->seq;
        dst->uuid               = src->uuid;
        dst->user_uuid          = src->user_uuid;
@@ -483,8 +482,8 @@ reread:
            !uuid_equal(&sb->sb->magic, &BCHFS_MAGIC))
                return "Not a bcachefs superblock";
 
-       if (le16_to_cpu(sb->sb->version) < BCH_SB_VERSION_MIN ||
-           le16_to_cpu(sb->sb->version) > BCH_SB_VERSION_MAX)
+       if (le16_to_cpu(sb->sb->version) <  bcachefs_metadata_version_min ||
+           le16_to_cpu(sb->sb->version) >= bcachefs_metadata_version_max)
                return "Unsupported superblock version";
 
        bytes = vstruct_bytes(sb->sb);
@@ -846,12 +845,6 @@ static const char *bch2_sb_validate_members(struct bch_sb *sb,
                        return "bucket size smaller than btree node size";
        }
 
-       if (le16_to_cpu(sb->version) < BCH_SB_VERSION_EXTENT_MAX)
-               for (m = mi->members;
-                    m < mi->members + sb->nr_devices;
-                    m++)
-                       SET_BCH_MEMBER_DATA_ALLOWED(m, ~0);
-
        return NULL;
 }
 
@@ -881,6 +874,16 @@ static const struct bch_sb_field_ops bch_sb_field_ops_crypt = {
 
 /* BCH_SB_FIELD_clean: */
 
+void bch2_sb_clean_renumber(struct bch_sb_field_clean *clean, int write)
+{
+       struct jset_entry *entry;
+
+       for (entry = clean->start;
+            entry < (struct jset_entry *) vstruct_end(&clean->field);
+            entry = vstruct_next(entry))
+               bch2_bkey_renumber(BKEY_TYPE_BTREE, bkey_to_packed(entry->start), write);
+}
+
 void bch2_fs_mark_clean(struct bch_fs *c, bool clean)
 {
        struct bch_sb_field_clean *sb_clean;
@@ -935,6 +938,10 @@ void bch2_fs_mark_clean(struct bch_fs *c, bool clean)
 
        BUG_ON(entry != vstruct_end(&sb_clean->field));
 
+       if (le16_to_cpu(c->disk_sb.sb->version) <
+           bcachefs_metadata_version_bkey_renumber)
+               bch2_sb_clean_renumber(sb_clean, WRITE);
+
        mutex_unlock(&c->btree_root_lock);
 write_super:
        bch2_write_super(c);
index aa618fe9cd22013b83adc27bb0b4df361ed5d9c5..ac3b704f0540636e22511690ccb71224dc54a908 100644 (file)
@@ -135,6 +135,8 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
 
 /* BCH_SB_FIELD_clean: */
 
+void bch2_sb_clean_renumber(struct bch_sb_field_clean *, int);
+
 void bch2_fs_mark_clean(struct bch_fs *, bool);
 
 void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *,
index cadbc5481bcb3bdb6abd2a04cc56be9960b537d3..7405b5cdd1bfe94b49961461f29dcb3af7019cd9 100644 (file)
@@ -1799,7 +1799,7 @@ err:
 BCH_DEBUG_PARAMS()
 #undef BCH_DEBUG_PARAM
 
-unsigned bch2_metadata_version = BCH_SB_VERSION_MAX;
+unsigned bch2_metadata_version = bcachefs_metadata_version_current;
 module_param_named(version, bch2_metadata_version, uint, 0400);
 
 module_exit(bcachefs_exit);
index 8eacc0d2550b1f1c56704cd2a8f350f831d07494..7e46b254da38ad3395b185dc0c2b7e921504a0b1 100644 (file)
@@ -277,7 +277,7 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
                return -EPERM;
 
        for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0, k)
-               if (k.k->type == BCH_EXTENT) {
+               if (k.k->type == KEY_TYPE_extent) {
                        struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
                        const union bch_extent_entry *entry;
                        struct extent_ptr_decoded p;
index 9730540f7375b03582148053a96cf95eed2feaa0..1aa6ac05d50eabc5563288ca2f4338e1a4848962 100644 (file)
@@ -165,7 +165,7 @@ TRACE_EVENT(btree_write,
        TP_ARGS(b, bytes, sectors),
 
        TP_STRUCT__entry(
-               __field(enum bkey_type, type)
+               __field(enum btree_node_type,   type)
                __field(unsigned,       bytes                   )
                __field(unsigned,       sectors                 )
        ),
index ab358c43475388dbc6eeaa3be100ea71bc5fe1c8..ff2d59ee1658d217f9489d5c8e421161a641e87f 100644 (file)
@@ -62,8 +62,7 @@ static bool xattr_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
 
 const struct bch_hash_desc bch2_xattr_hash_desc = {
        .btree_id       = BTREE_ID_XATTRS,
-       .key_type       = BCH_XATTR,
-       .whiteout_type  = BCH_XATTR_WHITEOUT,
+       .key_type       = KEY_TYPE_xattr,
        .hash_key       = xattr_hash_key,
        .hash_bkey      = xattr_hash_bkey,
        .cmp_key        = xattr_cmp_key,
@@ -73,71 +72,50 @@ const struct bch_hash_desc bch2_xattr_hash_desc = {
 const char *bch2_xattr_invalid(const struct bch_fs *c, struct bkey_s_c k)
 {
        const struct xattr_handler *handler;
-       struct bkey_s_c_xattr xattr;
-
-       switch (k.k->type) {
-       case BCH_XATTR:
-               if (bkey_val_bytes(k.k) < sizeof(struct bch_xattr))
-                       return "value too small";
-
-               xattr = bkey_s_c_to_xattr(k);
+       struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k);
 
-               if (bkey_val_u64s(k.k) <
-                       xattr_val_u64s(xattr.v->x_name_len,
-                                      le16_to_cpu(xattr.v->x_val_len)))
-                       return "value too small";
+       if (bkey_val_bytes(k.k) < sizeof(struct bch_xattr))
+               return "value too small";
 
-               if (bkey_val_u64s(k.k) >
-                       xattr_val_u64s(xattr.v->x_name_len,
-                                      le16_to_cpu(xattr.v->x_val_len) + 4))
-                       return "value too big";
+       if (bkey_val_u64s(k.k) <
+           xattr_val_u64s(xattr.v->x_name_len,
+                          le16_to_cpu(xattr.v->x_val_len)))
+               return "value too small";
 
-               handler = bch2_xattr_type_to_handler(xattr.v->x_type);
-               if (!handler)
-                       return "invalid type";
+       if (bkey_val_u64s(k.k) >
+           xattr_val_u64s(xattr.v->x_name_len,
+                          le16_to_cpu(xattr.v->x_val_len) + 4))
+               return "value too big";
 
-               if (memchr(xattr.v->x_name, '\0', xattr.v->x_name_len))
-                       return "xattr name has invalid characters";
+       handler = bch2_xattr_type_to_handler(xattr.v->x_type);
+       if (!handler)
+               return "invalid type";
 
-               return NULL;
-       case BCH_XATTR_WHITEOUT:
-               return bkey_val_bytes(k.k) != 0
-                       ? "value size should be zero"
-                       : NULL;
+       if (memchr(xattr.v->x_name, '\0', xattr.v->x_name_len))
+               return "xattr name has invalid characters";
 
-       default:
-               return "invalid type";
-       }
+       return NULL;
 }
 
 void bch2_xattr_to_text(struct printbuf *out, struct bch_fs *c,
                        struct bkey_s_c k)
 {
        const struct xattr_handler *handler;
-       struct bkey_s_c_xattr xattr;
+       struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k);
 
-       switch (k.k->type) {
-       case BCH_XATTR:
-               xattr = bkey_s_c_to_xattr(k);
+       handler = bch2_xattr_type_to_handler(xattr.v->x_type);
+       if (handler && handler->prefix)
+               pr_buf(out, "%s", handler->prefix);
+       else if (handler)
+               pr_buf(out, "(type %u)", xattr.v->x_type);
+       else
+               pr_buf(out, "(unknown type %u)", xattr.v->x_type);
 
-               handler = bch2_xattr_type_to_handler(xattr.v->x_type);
-               if (handler && handler->prefix)
-                       pr_buf(out, "%s", handler->prefix);
-               else if (handler)
-                       pr_buf(out, "(type %u)", xattr.v->x_type);
-               else
-                       pr_buf(out, "(unknown type %u)", xattr.v->x_type);
-
-               bch_scnmemcpy(out, xattr.v->x_name,
-                             xattr.v->x_name_len);
-               pr_buf(out, ":");
-               bch_scnmemcpy(out, xattr_val(xattr.v),
-                             le16_to_cpu(xattr.v->x_val_len));
-               break;
-       case BCH_XATTR_WHITEOUT:
-               pr_buf(out, "whiteout");
-               break;
-       }
+       bch_scnmemcpy(out, xattr.v->x_name,
+                     xattr.v->x_name_len);
+       pr_buf(out, ":");
+       bch_scnmemcpy(out, xattr_val(xattr.v),
+                     le16_to_cpu(xattr.v->x_val_len));
 }
 
 int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode,
@@ -261,7 +239,7 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
                if (k.k->p.inode > inum)
                        break;
 
-               if (k.k->type != BCH_XATTR)
+               if (k.k->type != KEY_TYPE_xattr)
                        continue;
 
                xattr = bkey_s_c_to_xattr(k).v;
@@ -315,7 +293,7 @@ static const struct xattr_handler bch_xattr_user_handler = {
        .prefix = XATTR_USER_PREFIX,
        .get    = bch2_xattr_get_handler,
        .set    = bch2_xattr_set_handler,
-       .flags  = BCH_XATTR_INDEX_USER,
+       .flags  = KEY_TYPE_XATTR_INDEX_USER,
 };
 
 static bool bch2_xattr_trusted_list(struct dentry *dentry)
@@ -328,14 +306,14 @@ static const struct xattr_handler bch_xattr_trusted_handler = {
        .list   = bch2_xattr_trusted_list,
        .get    = bch2_xattr_get_handler,
        .set    = bch2_xattr_set_handler,
-       .flags  = BCH_XATTR_INDEX_TRUSTED,
+       .flags  = KEY_TYPE_XATTR_INDEX_TRUSTED,
 };
 
 static const struct xattr_handler bch_xattr_security_handler = {
        .prefix = XATTR_SECURITY_PREFIX,
        .get    = bch2_xattr_get_handler,
        .set    = bch2_xattr_set_handler,
-       .flags  = BCH_XATTR_INDEX_SECURITY,
+       .flags  = KEY_TYPE_XATTR_INDEX_SECURITY,
 };
 
 #ifndef NO_BCACHEFS_FS
@@ -474,13 +452,13 @@ const struct xattr_handler *bch2_xattr_handlers[] = {
 };
 
 static const struct xattr_handler *bch_xattr_handler_map[] = {
-       [BCH_XATTR_INDEX_USER]                  = &bch_xattr_user_handler,
-       [BCH_XATTR_INDEX_POSIX_ACL_ACCESS]      =
+       [KEY_TYPE_XATTR_INDEX_USER]                     = &bch_xattr_user_handler,
+       [KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS] =
                &nop_posix_acl_access,
-       [BCH_XATTR_INDEX_POSIX_ACL_DEFAULT]     =
+       [KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT]        =
                &nop_posix_acl_default,
-       [BCH_XATTR_INDEX_TRUSTED]               = &bch_xattr_trusted_handler,
-       [BCH_XATTR_INDEX_SECURITY]              = &bch_xattr_security_handler,
+       [KEY_TYPE_XATTR_INDEX_TRUSTED]          = &bch_xattr_trusted_handler,
+       [KEY_TYPE_XATTR_INDEX_SECURITY]         = &bch_xattr_security_handler,
 };
 
 static const struct xattr_handler *bch2_xattr_type_to_handler(unsigned type)
index 63be44b02a2ba772435ac20d15cf1cb9cf6c7d45..4151065ab853546c3f071a831cfba10d9af03010 100644 (file)
@@ -9,7 +9,7 @@ extern const struct bch_hash_desc bch2_xattr_hash_desc;
 const char *bch2_xattr_invalid(const struct bch_fs *, struct bkey_s_c);
 void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
 
-#define bch2_bkey_xattr_ops (struct bkey_ops) {                \
+#define bch2_bkey_ops_xattr (struct bkey_ops) {                \
        .key_invalid    = bch2_xattr_invalid,           \
        .val_to_text    = bch2_xattr_to_text,           \
 }