bcachefs: bch_sb_field_downgrade
authorKent Overstreet <kent.overstreet@linux.dev>
Fri, 29 Dec 2023 20:25:07 +0000 (15:25 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Mon, 1 Jan 2024 16:47:07 +0000 (11:47 -0500)
Add a new superblock section that contains a list of
  { minor version, recovery passes, errors_to_fix }

that is - a list of recovery passes that must be run when downgrading
past a given version, and a list of errors to silently fix.

The upcoming disk accounting rewrite is not going to be fully
compatible: we're going to have to regenerate accounting both when
upgrading to the new version, and also from downgrading from the new
version, since the new method of doing disk space accounting is a
completely different architecture based on deltas, and synchronizing
them for every jounal entry write to maintain compatibility is going to
be too expensive and impractical.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/Makefile
fs/bcachefs/bcachefs_format.h
fs/bcachefs/errcode.h
fs/bcachefs/recovery.c
fs/bcachefs/sb-clean.c
fs/bcachefs/sb-downgrade.c [new file with mode: 0644]
fs/bcachefs/sb-downgrade.h [new file with mode: 0644]
fs/bcachefs/sb-errors.c
fs/bcachefs/super-io.c
fs/bcachefs/super-io.h

index ed550a40463e2568b27186f40238c0deb8e6d6d0..b81268418174489c5728b6f92a3b830a31a50f40 100644 (file)
@@ -71,6 +71,7 @@ bcachefs-y            :=      \
        reflink.o               \
        replicas.o              \
        sb-clean.o              \
+       sb-downgrade.o          \
        sb-errors.o             \
        sb-members.o            \
        siphash.o               \
index bd5af516994a5c3b227b3ac2503072168fd00b81..fe78e87603fcf3bf2cec9114f4ecaf8744cba7a8 100644 (file)
@@ -1220,7 +1220,8 @@ struct bch_sb_field {
        x(counters,                     10)     \
        x(members_v2,                   11)     \
        x(errors,                       12)     \
-       x(ext,                          13)
+       x(ext,                          13)     \
+       x(downgrade,                    14)
 
 enum bch_sb_field_type {
 #define x(f, nr)       BCH_SB_FIELD_##f = nr,
@@ -1638,6 +1639,18 @@ struct bch_sb_field_ext {
        __le64                  errors_silent[8];
 };
 
+struct bch_sb_field_downgrade_entry {
+       __le16                  version;
+       __le64                  recovery_passes[2];
+       __le16                  nr_errors;
+       __le16                  errors[] __counted_by(nr_errors);
+} __packed __aligned(2);
+
+struct bch_sb_field_downgrade {
+       struct bch_sb_field     field;
+       struct bch_sb_field_downgrade_entry entries[];
+};
+
 /* Superblock: */
 
 /*
@@ -1651,6 +1664,11 @@ struct bch_sb_field_ext {
 
 #define RECOVERY_PASS_ALL_FSCK         (1ULL << 63)
 
+/*
+ * field 1:            version name
+ * field 2:            BCH_VERSION(major, minor)
+ * field 3:            recovery passess required on upgrade
+ */
 #define BCH_METADATA_VERSIONS()                                                \
        x(bkey_renumber,                BCH_VERSION(0, 10),             \
          RECOVERY_PASS_ALL_FSCK)                                       \
index 79327b5cec5063be019539e0debd28b43bb425af..9ce29681eec9631a9745576f3613155e8a1dfd11 100644 (file)
@@ -95,6 +95,7 @@
        x(ENOSPC,                       ENOSPC_sb_members)                      \
        x(ENOSPC,                       ENOSPC_sb_members_v2)                   \
        x(ENOSPC,                       ENOSPC_sb_crypt)                        \
+       x(ENOSPC,                       ENOSPC_sb_downgrade)                    \
        x(ENOSPC,                       ENOSPC_btree_slot)                      \
        x(ENOSPC,                       ENOSPC_snapshot_tree)                   \
        x(ENOENT,                       ENOENT_bkey_type_mismatch)              \
        x(BCH_ERR_invalid_sb,           invalid_sb_errors)                      \
        x(BCH_ERR_invalid_sb,           invalid_sb_opt_compression)             \
        x(BCH_ERR_invalid_sb,           invalid_sb_ext)                         \
+       x(BCH_ERR_invalid_sb,           invalid_sb_downgrade)                   \
        x(BCH_ERR_invalid,              invalid_bkey)                           \
        x(BCH_ERR_operation_blocked,    nocow_lock_blocked)                     \
        x(EIO,                          btree_node_read_err)                    \
index b9c84e8cf3fd6a0cb524c95f7d77566770394d07..5cf7d053200279f536675e15317b1c2b2878f3d4 100644 (file)
@@ -27,6 +27,7 @@
 #include "recovery.h"
 #include "replicas.h"
 #include "sb-clean.h"
+#include "sb-downgrade.h"
 #include "snapshot.h"
 #include "subvolume.h"
 #include "super-io.h"
@@ -744,6 +745,27 @@ int bch2_fs_recovery(struct bch_fs *c)
                        printbuf_exit(&buf);
                }
 
+               if (bch2_check_version_downgrade(c)) {
+                       struct printbuf buf = PRINTBUF;
+
+                       prt_str(&buf, "Version downgrade required:\n");
+
+                       __le64 passes = ext->recovery_passes_required[0];
+                       bch2_sb_set_downgrade(c,
+                                       BCH_VERSION_MINOR(bcachefs_metadata_version_current),
+                                       BCH_VERSION_MINOR(c->sb.version));
+                       passes = ext->recovery_passes_required[0] & ~passes;
+                       if (passes) {
+                               prt_str(&buf, "  running recovery passes: ");
+                               prt_bitflags(&buf, bch2_recovery_passes,
+                                            bch2_recovery_passes_from_stable(le64_to_cpu(passes)));
+                       }
+
+                       bch_info(c, "%s", buf.buf);
+                       printbuf_exit(&buf);
+                       write_sb = true;
+               }
+
                if (check_version_upgrade(c))
                        write_sb = true;
 
@@ -1022,7 +1044,7 @@ int bch2_fs_initialize(struct bch_fs *c)
        c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
        c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
 
-       bch2_sb_maybe_downgrade(c);
+       bch2_check_version_downgrade(c);
 
        if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) {
                bch2_sb_upgrade(c, bcachefs_metadata_version_current);
index e151ada1c8bd2db23e31bc1f6f027815585e8ab2..c76ad8ea5e4a51c1fb82ea7ac5daf0a1e80a73e6 100644 (file)
@@ -332,8 +332,6 @@ int bch2_fs_mark_dirty(struct bch_fs *c)
 
        mutex_lock(&c->sb_lock);
        SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
-
-       bch2_sb_maybe_downgrade(c);
        c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALWAYS);
 
        ret = bch2_write_super(c);
diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c
new file mode 100644 (file)
index 0000000..4919237
--- /dev/null
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Superblock section that contains a list of recovery passes to run when
+ * downgrading past a given version
+ */
+
+#include "bcachefs.h"
+#include "darray.h"
+#include "recovery.h"
+#include "sb-downgrade.h"
+#include "sb-errors.h"
+#include "super-io.h"
+
+/*
+ * Downgrade table:
+ * When dowgrading past certain versions, we need to run certain recovery passes
+ * and fix certain errors:
+ *
+ * x(version, recovery_passes, errors...)
+ */
+
+#define DOWNGRADE_TABLE()
+
+struct downgrade_entry {
+       u64             recovery_passes;
+       u16             version;
+       u16             nr_errors;
+       const u16       *errors;
+};
+
+#define x(ver, passes, ...) static const u16 ver_##errors[] = { __VA_ARGS__ };
+DOWNGRADE_TABLE()
+#undef x
+
+static const struct downgrade_entry downgrade_table[] = {
+#define x(ver, passes, ...) {                                  \
+       .recovery_passes        = passes,                       \
+       .version                = bcachefs_metadata_version_##ver,\
+       .nr_errors              = ARRAY_SIZE(ver_##errors),     \
+       .errors                 = ver_##errors,                 \
+},
+DOWNGRADE_TABLE()
+#undef x
+};
+
+static inline const struct bch_sb_field_downgrade_entry *
+downgrade_entry_next_c(const struct bch_sb_field_downgrade_entry *e)
+{
+       return (void *) &e->errors[le16_to_cpu(e->nr_errors)];
+}
+
+#define for_each_downgrade_entry(_d, _i)                                               \
+       for (const struct bch_sb_field_downgrade_entry *_i = (_d)->entries;             \
+            (void *) _i        < vstruct_end(&(_d)->field) &&                          \
+            (void *) &_i->errors[0] < vstruct_end(&(_d)->field);                       \
+            _i = downgrade_entry_next_c(_i))
+
+static int bch2_sb_downgrade_validate(struct bch_sb *sb, struct bch_sb_field *f,
+                                     struct printbuf *err)
+{
+       struct bch_sb_field_downgrade *e = field_to_type(f, downgrade);
+
+       for_each_downgrade_entry(e, i) {
+               if (BCH_VERSION_MAJOR(le16_to_cpu(i->version)) !=
+                   BCH_VERSION_MAJOR(le16_to_cpu(sb->version))) {
+                       prt_printf(err, "downgrade entry with mismatched major version (%u != %u)",
+                                  BCH_VERSION_MAJOR(le16_to_cpu(i->version)),
+                                  BCH_VERSION_MAJOR(le16_to_cpu(sb->version)));
+                       return -BCH_ERR_invalid_sb_downgrade;
+               }
+       }
+
+       return 0;
+}
+
+static void bch2_sb_downgrade_to_text(struct printbuf *out, struct bch_sb *sb,
+                                     struct bch_sb_field *f)
+{
+       struct bch_sb_field_downgrade *e = field_to_type(f, downgrade);
+
+       if (out->nr_tabstops <= 1)
+               printbuf_tabstop_push(out, 16);
+
+       for_each_downgrade_entry(e, i) {
+               prt_str(out, "version:");
+               prt_tab(out);
+               bch2_version_to_text(out, le16_to_cpu(i->version));
+               prt_newline(out);
+
+               prt_str(out, "recovery passes:");
+               prt_tab(out);
+               prt_bitflags(out, bch2_recovery_passes,
+                            bch2_recovery_passes_from_stable(le64_to_cpu(i->recovery_passes[0])));
+               prt_newline(out);
+
+               prt_str(out, "errors:");
+               prt_tab(out);
+               bool first = true;
+               for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) {
+                       if (!first)
+                               prt_char(out, ',');
+                       first = false;
+                       unsigned e = le16_to_cpu(i->errors[j]);
+                       prt_str(out, e < BCH_SB_ERR_MAX ? bch2_sb_error_strs[e] : "(unknown)");
+               }
+               prt_newline(out);
+       }
+}
+
+const struct bch_sb_field_ops bch_sb_field_ops_downgrade = {
+       .validate       = bch2_sb_downgrade_validate,
+       .to_text        = bch2_sb_downgrade_to_text,
+};
+
+int bch2_sb_downgrade_update(struct bch_fs *c)
+{
+       darray_char table = {};
+       int ret = 0;
+
+       for (const struct downgrade_entry *src = downgrade_table;
+            src < downgrade_table + ARRAY_SIZE(downgrade_table);
+            src++) {
+               if (BCH_VERSION_MAJOR(src->version) != BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version)))
+                       continue;
+
+               struct bch_sb_field_downgrade_entry *dst;
+               unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * src->nr_errors;
+
+               ret = darray_make_room(&table, bytes);
+               if (ret)
+                       goto out;
+
+               dst = (void *) &darray_top(table);
+               dst->version = cpu_to_le16(src->version);
+               dst->recovery_passes[0] = cpu_to_le64(src->recovery_passes);
+               dst->recovery_passes[1] = 0;
+               dst->nr_errors          = cpu_to_le16(src->nr_errors);
+               for (unsigned i = 0; i < src->nr_errors; i++)
+                       dst->errors[i] = cpu_to_le16(src->errors[i]);
+
+               table.nr += bytes;
+       }
+
+       struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade);
+
+       unsigned sb_u64s = DIV_ROUND_UP(sizeof(*d) + table.nr, sizeof(u64));
+
+       if (d && le32_to_cpu(d->field.u64s) > sb_u64s)
+               goto out;
+
+       d = bch2_sb_field_resize(&c->disk_sb, downgrade, sb_u64s);
+       if (!d) {
+               ret = -BCH_ERR_ENOSPC_sb_downgrade;
+               goto out;
+       }
+
+       memcpy(d->entries, table.data, table.nr);
+       memset_u64s_tail(d->entries, 0, table.nr);
+out:
+       darray_exit(&table);
+       return ret;
+}
+
+void bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_minor)
+{
+       struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade);
+       if (!d)
+               return;
+
+       struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+
+       for_each_downgrade_entry(d, i) {
+               unsigned minor = BCH_VERSION_MINOR(le16_to_cpu(i->version));
+               if (new_minor < minor && minor <= old_minor) {
+                       ext->recovery_passes_required[0] |= i->recovery_passes[0];
+                       ext->recovery_passes_required[1] |= i->recovery_passes[1];
+
+                       for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) {
+                               unsigned e = le16_to_cpu(i->errors[j]);
+                               if (e < BCH_SB_ERR_MAX)
+                                       __set_bit(e, c->sb.errors_silent);
+                               if (e < sizeof(ext->errors_silent) * 8)
+                                       ext->errors_silent[e / 64] |= cpu_to_le64(BIT_ULL(e % 64));
+                       }
+               }
+       }
+}
diff --git a/fs/bcachefs/sb-downgrade.h b/fs/bcachefs/sb-downgrade.h
new file mode 100644 (file)
index 0000000..bc48fd2
--- /dev/null
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SB_DOWNGRADE_H
+#define _BCACHEFS_SB_DOWNGRADE_H
+
+extern const struct bch_sb_field_ops bch_sb_field_ops_downgrade;
+
+int bch2_sb_downgrade_update(struct bch_fs *);
+void bch2_sb_set_downgrade(struct bch_fs *, unsigned, unsigned);
+
+#endif /* _BCACHEFS_SB_DOWNGRADE_H */
index caf7669db6a1766bf0a0ef2c769a5e70188f77c6..5f5bcae391fb9fcde0d306870adbb6779e06cda1 100644 (file)
@@ -20,9 +20,7 @@ static void bch2_sb_error_id_to_text(struct printbuf *out, enum bch_sb_error_id
 
 static inline unsigned bch2_sb_field_errors_nr_entries(struct bch_sb_field_errors *e)
 {
-       return e
-               ? (bch2_sb_field_bytes(&e->field) - sizeof(*e)) / sizeof(e->entries[0])
-               : 0;
+       return bch2_sb_field_nr_entries(e);
 }
 
 static inline unsigned bch2_sb_field_errors_u64s(unsigned nr)
index e085d3b021e847173f438cc95a179377d5f061df..4c98d8cc2a7976cd933c2656a2a1ad07979e8244 100644 (file)
@@ -13,6 +13,7 @@
 #include "replicas.h"
 #include "quota.h"
 #include "sb-clean.h"
+#include "sb-downgrade.h"
 #include "sb-errors.h"
 #include "sb-members.h"
 #include "super-io.h"
@@ -939,6 +940,7 @@ int bch2_write_super(struct bch_fs *c)
        bch2_sb_members_from_cpu(c);
        bch2_sb_members_cpy_v2_v1(&c->disk_sb);
        bch2_sb_errors_from_cpu(c);
+       bch2_sb_downgrade_update(c);
 
        for_each_online_member(ca, c, i)
                bch2_sb_from_fs(c, ca);
@@ -1062,8 +1064,10 @@ void __bch2_check_set_feature(struct bch_fs *c, unsigned feat)
 }
 
 /* Downgrade if superblock is at a higher version than currently supported: */
-void bch2_sb_maybe_downgrade(struct bch_fs *c)
+bool bch2_check_version_downgrade(struct bch_fs *c)
 {
+       bool ret = bcachefs_metadata_version_current < c->sb.version;
+
        lockdep_assert_held(&c->sb_lock);
 
        /*
@@ -1077,12 +1081,17 @@ void bch2_sb_maybe_downgrade(struct bch_fs *c)
        if (c->sb.version_min > bcachefs_metadata_version_current)
                c->disk_sb.sb->version_min = cpu_to_le16(bcachefs_metadata_version_current);
        c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1);
+       return ret;
 }
 
 void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version)
 {
        lockdep_assert_held(&c->sb_lock);
 
+       if (BCH_VERSION_MAJOR(new_version) >
+           BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version)))
+               bch2_sb_field_resize(&c->disk_sb, downgrade, 0);
+
        c->disk_sb.sb->version = cpu_to_le16(new_version);
        c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
 }
index 589509ebe9969f257c6b4f193fec0c3ad1e39299..e41e5de531a0a254d7e9d2ee20f050c5415d19d2 100644 (file)
@@ -93,7 +93,7 @@ static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat)
                __bch2_check_set_feature(c, feat);
 }
 
-void bch2_sb_maybe_downgrade(struct bch_fs *);
+bool bch2_check_version_downgrade(struct bch_fs *);
 void bch2_sb_upgrade(struct bch_fs *, unsigned);
 
 void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *,