bcachefs: Redo checks for sufficient devices
authorKent Overstreet <kent.overstreet@gmail.com>
Sun, 7 Feb 2021 04:17:26 +0000 (23:17 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:53 +0000 (17:08 -0400)
When the replicas mechanism was added, for tracking data by which drives
it's replicated on, the check for whether we have sufficient devices was
never updated to make use of it. This patch finally does that.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/bcachefs_ioctl.h
fs/bcachefs/opts.h
fs/bcachefs/replicas.c
fs/bcachefs/replicas.h
fs/bcachefs/super-io.c
fs/bcachefs/super.c
fs/bcachefs/sysfs.c

index 1bf834e317753948f3b3d86f9c79a46ec5b5fdbb..38c6ac96e12fbcd32ec99eac02e5c9a942fa2f97 100644 (file)
@@ -14,6 +14,9 @@
 #define BCH_FORCE_IF_DATA_DEGRADED     (1 << 2)
 #define BCH_FORCE_IF_METADATA_DEGRADED (1 << 3)
 
+#define BCH_FORCE_IF_LOST                      \
+       (BCH_FORCE_IF_DATA_LOST|                \
+        BCH_FORCE_IF_METADATA_LOST)
 #define BCH_FORCE_IF_DEGRADED                  \
        (BCH_FORCE_IF_DATA_DEGRADED|            \
         BCH_FORCE_IF_METADATA_DEGRADED)
index 01b93e7eb0277e0185cd3c25eb90459c068e8c90..01282314bacb79ea132a0a5ab26202b259c1b4af 100644 (file)
@@ -222,6 +222,11 @@ enum opt_type {
          OPT_BOOL(),                                                   \
          NO_SB_OPT,                    false,                          \
          NULL,         "Allow mounting in degraded mode")              \
+       x(very_degraded,                u8,                             \
+         OPT_MOUNT,                                                    \
+         OPT_BOOL(),                                                   \
+         NO_SB_OPT,                    false,                          \
+         NULL,         "Allow mounting in when data will be missing")  \
        x(discard,                      u8,                             \
          OPT_MOUNT|OPT_DEVICE,                                         \
          OPT_BOOL(),                                                   \
index bf1804c10bfbf8d620a795e9ece4344f75efc310..8003973b0400abf42b09241f3a07a7779467ffe6 100644 (file)
@@ -967,94 +967,48 @@ const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = {
 
 /* Query replicas: */
 
-struct replicas_status __bch2_replicas_status(struct bch_fs *c,
-                                             struct bch_devs_mask online_devs)
+bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs,
+                          unsigned flags, bool print)
 {
-       struct bch_sb_field_members *mi;
        struct bch_replicas_entry *e;
-       unsigned i, nr_online, nr_offline;
-       struct replicas_status ret;
-
-       memset(&ret, 0, sizeof(ret));
-
-       for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
-               ret.replicas[i].redundancy = INT_MAX;
-
-       mi = bch2_sb_get_members(c->disk_sb.sb);
+       bool ret = true;
 
        percpu_down_read(&c->mark_lock);
-
        for_each_cpu_replicas_entry(&c->replicas, e) {
-               if (e->data_type >= ARRAY_SIZE(ret.replicas))
-                       panic("e %p data_type %u\n", e, e->data_type);
+               unsigned i, nr_online = 0, dflags = 0;
+               bool metadata = e->data_type < BCH_DATA_user;
 
-               nr_online = nr_offline = 0;
+               for (i = 0; i < e->nr_devs; i++)
+                       nr_online += test_bit(e->devs[i], devs.d);
 
-               for (i = 0; i < e->nr_devs; i++) {
-                       BUG_ON(!bch2_dev_exists(c->disk_sb.sb, mi,
-                                               e->devs[i]));
+               if (nr_online < e->nr_required)
+                       dflags |= metadata
+                               ? BCH_FORCE_IF_METADATA_LOST
+                               : BCH_FORCE_IF_DATA_LOST;
 
-                       if (test_bit(e->devs[i], online_devs.d))
-                               nr_online++;
-                       else
-                               nr_offline++;
-               }
+               if (nr_online < e->nr_devs)
+                       dflags |= metadata
+                               ? BCH_FORCE_IF_METADATA_DEGRADED
+                               : BCH_FORCE_IF_DATA_DEGRADED;
 
-               ret.replicas[e->data_type].redundancy =
-                       min(ret.replicas[e->data_type].redundancy,
-                           (int) nr_online - (int) e->nr_required);
+               if (dflags & ~flags) {
+                       if (print) {
+                               char buf[100];
 
-               ret.replicas[e->data_type].nr_offline =
-                       max(ret.replicas[e->data_type].nr_offline,
-                           nr_offline);
-       }
+                               bch2_replicas_entry_to_text(&PBUF(buf), e);
+                               bch_err(c, "insufficient devices online (%u) for replicas entry %s",
+                                       nr_online, buf);
+                       }
+                       ret = false;
+                       break;
+               }
 
+       }
        percpu_up_read(&c->mark_lock);
 
-       for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
-               if (ret.replicas[i].redundancy == INT_MAX)
-                       ret.replicas[i].redundancy = 0;
-
        return ret;
 }
 
-struct replicas_status bch2_replicas_status(struct bch_fs *c)
-{
-       return __bch2_replicas_status(c, bch2_online_devs(c));
-}
-
-static bool have_enough_devs(struct replicas_status s,
-                            enum bch_data_type type,
-                            bool force_if_degraded,
-                            bool force_if_lost)
-{
-       return (!s.replicas[type].nr_offline || force_if_degraded) &&
-               (s.replicas[type].redundancy >= 0 || force_if_lost);
-}
-
-bool bch2_have_enough_devs(struct replicas_status s, unsigned flags)
-{
-       return (have_enough_devs(s, BCH_DATA_journal,
-                                flags & BCH_FORCE_IF_METADATA_DEGRADED,
-                                flags & BCH_FORCE_IF_METADATA_LOST) &&
-               have_enough_devs(s, BCH_DATA_btree,
-                                flags & BCH_FORCE_IF_METADATA_DEGRADED,
-                                flags & BCH_FORCE_IF_METADATA_LOST) &&
-               have_enough_devs(s, BCH_DATA_user,
-                                flags & BCH_FORCE_IF_DATA_DEGRADED,
-                                flags & BCH_FORCE_IF_DATA_LOST));
-}
-
-int bch2_replicas_online(struct bch_fs *c, bool meta)
-{
-       struct replicas_status s = bch2_replicas_status(c);
-
-       return (meta
-               ? min(s.replicas[BCH_DATA_journal].redundancy,
-                     s.replicas[BCH_DATA_btree].redundancy)
-               : s.replicas[BCH_DATA_user].redundancy) + 1;
-}
-
 unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
 {
        struct bch_replicas_entry *e;
index a16ef23bde8af4fdf7d9113601c504a04c5b3853..9c8fd3d9824767448f3dcdfe321ff4fd2be1f523 100644 (file)
@@ -39,19 +39,9 @@ static inline void bch2_replicas_entry_cached(struct bch_replicas_entry *e,
        e->devs[0]      = dev;
 }
 
-struct replicas_status {
-       struct {
-               int             redundancy;
-               unsigned        nr_offline;
-       }                       replicas[BCH_DATA_NR];
-};
-
-struct replicas_status __bch2_replicas_status(struct bch_fs *,
-                                             struct bch_devs_mask);
-struct replicas_status bch2_replicas_status(struct bch_fs *);
-bool bch2_have_enough_devs(struct replicas_status, unsigned);
-
-int bch2_replicas_online(struct bch_fs *, bool);
+bool bch2_have_enough_devs(struct bch_fs *, struct bch_devs_mask,
+                          unsigned, bool);
+
 unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *);
 
 int bch2_replicas_gc_end(struct bch_fs *, int);
index 0356541c00e2e507b4bd57382e348555d0d8182b..767baab18807b68c297e6989cfdd11756f211d91 100644 (file)
@@ -770,15 +770,13 @@ int bch2_write_super(struct bch_fs *c)
        nr_wrote = dev_mask_nr(&sb_written);
 
        can_mount_with_written =
-               bch2_have_enough_devs(__bch2_replicas_status(c, sb_written),
-                                     BCH_FORCE_IF_DEGRADED);
+               bch2_have_enough_devs(c, sb_written, BCH_FORCE_IF_DEGRADED, false);
 
        for (i = 0; i < ARRAY_SIZE(sb_written.d); i++)
                sb_written.d[i] = ~sb_written.d[i];
 
        can_mount_without_written =
-               bch2_have_enough_devs(__bch2_replicas_status(c, sb_written),
-                                     BCH_FORCE_IF_DEGRADED);
+               bch2_have_enough_devs(c, sb_written, BCH_FORCE_IF_DEGRADED, false);
 
        /*
         * If we would be able to mount _without_ the devices we successfully
@@ -789,6 +787,7 @@ int bch2_write_super(struct bch_fs *c)
         * mount with the devices we did successfully write to:
         */
        if (bch2_fs_fatal_err_on(!nr_wrote ||
+                                !can_mount_with_written ||
                                 (can_mount_without_written &&
                                  !can_mount_with_written), c,
                "Unable to write superblock to sufficient devices"))
index e242b72155483ac65d5167532a4d5de1bad8f25f..e10e7e0c04540c18fadbf056a008081d5e225016 100644 (file)
@@ -1265,7 +1265,6 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
                            enum bch_member_state new_state, int flags)
 {
        struct bch_devs_mask new_online_devs;
-       struct replicas_status s;
        struct bch_dev *ca2;
        int i, nr_rw = 0, required;
 
@@ -1301,9 +1300,7 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
                new_online_devs = bch2_online_devs(c);
                __clear_bit(ca->dev_idx, new_online_devs.d);
 
-               s = __bch2_replicas_status(c, new_online_devs);
-
-               return bch2_have_enough_devs(s, flags);
+               return bch2_have_enough_devs(c, new_online_devs, flags, false);
        default:
                BUG();
        }
@@ -1311,14 +1308,18 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
 
 static bool bch2_fs_may_start(struct bch_fs *c)
 {
-       struct replicas_status s;
        struct bch_sb_field_members *mi;
        struct bch_dev *ca;
-       unsigned i, flags = c->opts.degraded
-               ? BCH_FORCE_IF_DEGRADED
-               : 0;
+       unsigned i, flags = 0;
+
+       if (c->opts.very_degraded)
+               flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST;
 
-       if (!c->opts.degraded) {
+       if (c->opts.degraded)
+               flags |= BCH_FORCE_IF_DEGRADED;
+
+       if (!c->opts.degraded &&
+           !c->opts.very_degraded) {
                mutex_lock(&c->sb_lock);
                mi = bch2_sb_get_members(c->disk_sb.sb);
 
@@ -1338,9 +1339,7 @@ static bool bch2_fs_may_start(struct bch_fs *c)
                mutex_unlock(&c->sb_lock);
        }
 
-       s = bch2_replicas_status(c);
-
-       return bch2_have_enough_devs(s, flags);
+       return bch2_have_enough_devs(c, bch2_online_devs(c), flags, true);
 }
 
 static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
index 8fdbeaf9df321229e0ef0579e872924b3505907a..49c19873ad6f0529196940a34318bdf0c81d49a1 100644 (file)
@@ -199,9 +199,6 @@ read_attribute(new_stripes);
 
 rw_attribute(pd_controllers_update_seconds);
 
-read_attribute(meta_replicas_have);
-read_attribute(data_replicas_have);
-
 read_attribute(io_timers_read);
 read_attribute(io_timers_write);
 
@@ -347,9 +344,6 @@ SHOW(bch2_fs)
 
        sysfs_print(promote_whole_extents,      c->promote_whole_extents);
 
-       sysfs_printf(meta_replicas_have, "%i",  bch2_replicas_online(c, true));
-       sysfs_printf(data_replicas_have, "%i",  bch2_replicas_online(c, false));
-
        /* Debugging: */
 
        if (attr == &sysfs_alloc_debug)
@@ -520,9 +514,6 @@ struct attribute *bch2_fs_files[] = {
        &sysfs_btree_node_size,
        &sysfs_btree_cache_size,
 
-       &sysfs_meta_replicas_have,
-       &sysfs_data_replicas_have,
-
        &sysfs_journal_write_delay_ms,
        &sysfs_journal_reclaim_delay_ms,