bcachefs: Go RW before check_alloc_info()
authorKent Overstreet <kent.overstreet@linux.dev>
Mon, 12 Dec 2022 00:14:30 +0000 (19:14 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:50 +0000 (17:09 -0400)
It's possible to do btree updates before going RW by adding them to the
list of updates for journal replay to do, but this is limited by what
fits in RAM. This patch switches the second alloc info phase to run
after going RW - btree_gc has already ensured the alloc btree itself is
correct - and tweaks the allocation path to deal with the potential
small inconsistencies.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/alloc_background.c
fs/bcachefs/alloc_foreground.c
fs/bcachefs/bcachefs.h
fs/bcachefs/recovery.c

index 1db0b6253661ce24deeae4947b846dfd4b9203f9..f75d05beaf3146b85f0a7b0e839578a341d6028f 100644 (file)
@@ -583,6 +583,7 @@ static int bch2_bucket_do_index(struct btree_trans *trans,
                goto err;
 
        if (ca->mi.freespace_initialized &&
+           test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags) &&
            bch2_trans_inconsistent_on(old.k->type != old_type, trans,
                        "incorrect key when %s %s btree (got %s should be %s)\n"
                        "  for %s",
@@ -1028,21 +1029,28 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
                goto write;
        }
 
-       if (bch2_trans_inconsistent_on(a->v.journal_seq > c->journal.flushed_seq_ondisk, trans,
-                       "clearing need_discard but journal_seq %llu > flushed_seq %llu\n"
-                       "%s",
-                       a->v.journal_seq,
-                       c->journal.flushed_seq_ondisk,
-                       (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
-               ret = -EIO;
+       if (a->v.journal_seq > c->journal.flushed_seq_ondisk) {
+               if (test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) {
+                       bch2_trans_inconsistent(trans,
+                               "clearing need_discard but journal_seq %llu > flushed_seq %llu\n"
+                               "%s",
+                               a->v.journal_seq,
+                               c->journal.flushed_seq_ondisk,
+                               (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
+                       ret = -EIO;
+               }
                goto out;
        }
 
-       if (bch2_trans_inconsistent_on(a->v.data_type != BCH_DATA_need_discard, trans,
-                       "bucket incorrectly set in need_discard btree\n"
-                       "%s",
-                       (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
-               ret = -EIO;
+       if (a->v.data_type != BCH_DATA_need_discard) {
+               if (test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) {
+                       bch2_trans_inconsistent(trans,
+                               "bucket incorrectly set in need_discard btree\n"
+                               "%s",
+                               (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
+                       ret = -EIO;
+               }
+
                goto out;
        }
 
index 534dbf197d585a4f46105101aff74f2f64412e95..ba14cfe06515ca1af116ebe22b4256cfa6ee2ac2 100644 (file)
@@ -316,28 +316,34 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc
 
        a = bch2_alloc_to_v4(k, &a_convert);
 
-       if (genbits != (alloc_freespace_genbits(*a) >> 56)) {
-               prt_printf(&buf, "bucket in freespace btree with wrong genbits (got %u should be %llu)\n"
-                      "  freespace key ",
-                      genbits, alloc_freespace_genbits(*a) >> 56);
+       if (a->data_type != BCH_DATA_free) {
+               if (!test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) {
+                       ob = NULL;
+                       goto err;
+               }
+
+               prt_printf(&buf, "non free bucket in freespace btree\n"
+                      "  freespace key ");
                bch2_bkey_val_to_text(&buf, c, freespace_k);
                prt_printf(&buf, "\n  ");
                bch2_bkey_val_to_text(&buf, c, k);
                bch2_trans_inconsistent(trans, "%s", buf.buf);
                ob = ERR_PTR(-EIO);
                goto err;
-
        }
 
-       if (a->data_type != BCH_DATA_free) {
-               prt_printf(&buf, "non free bucket in freespace btree\n"
-                      "  freespace key ");
+       if (genbits != (alloc_freespace_genbits(*a) >> 56) &&
+           test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) {
+               prt_printf(&buf, "bucket in freespace btree with wrong genbits (got %u should be %llu)\n"
+                      "  freespace key ",
+                      genbits, alloc_freespace_genbits(*a) >> 56);
                bch2_bkey_val_to_text(&buf, c, freespace_k);
                prt_printf(&buf, "\n  ");
                bch2_bkey_val_to_text(&buf, c, k);
                bch2_trans_inconsistent(trans, "%s", buf.buf);
                ob = ERR_PTR(-EIO);
                goto err;
+
        }
 
        ob = __try_alloc_bucket(c, ca, b, reserve, a, s, cl);
@@ -505,6 +511,7 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
 {
        struct bch_fs *c = trans->c;
        struct open_bucket *ob = NULL;
+       bool freespace = READ_ONCE(ca->mi.freespace_initialized);
        u64 avail;
        struct bucket_alloc_state s = { 0 };
        bool waiting = false;
@@ -543,13 +550,18 @@ again:
                if (ob)
                        return ob;
        }
-
-       ob = likely(ca->mi.freespace_initialized)
+alloc:
+       ob = likely(freespace)
                ? bch2_bucket_alloc_freelist(trans, ca, reserve, &s, cl)
                : bch2_bucket_alloc_early(trans, ca, reserve, &s, cl);
 
        if (s.skipped_need_journal_commit * 2 > avail)
                bch2_journal_flush_async(&c->journal, NULL);
+
+       if (!ob && freespace && !test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) {
+               freespace = false;
+               goto alloc;
+       }
 err:
        if (!ob)
                ob = ERR_PTR(-BCH_ERR_no_buckets_found);
index 56bc58a7bfcf6ef3ce738d260c30b0ec6174f8bd..ad3bf019487ec7fa5d36e22267a8a3ea2bdde428 100644 (file)
@@ -549,6 +549,7 @@ enum {
        /* fsck passes: */
        BCH_FS_TOPOLOGY_REPAIR_DONE,
        BCH_FS_INITIAL_GC_DONE,         /* kill when we enumerate fsck passes */
+       BCH_FS_CHECK_ALLOC_DONE,
        BCH_FS_CHECK_LRUS_DONE,
        BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE,
        BCH_FS_FSCK_DONE,
index ebdf9f754e081b7039bc74f981a2603529c0d4a3..61890755d33517de71a639c779fb9cf40bca16a9 100644 (file)
@@ -1260,13 +1260,6 @@ use_clean:
 
                set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
 
-               bch_info(c, "checking need_discard and freespace btrees");
-               err = "error checking need_discard and freespace btrees";
-               ret = bch2_check_alloc_info(c);
-               if (ret)
-                       goto err;
-               bch_verbose(c, "done checking need_discard and freespace btrees");
-
                if (c->sb.version < bcachefs_metadata_version_snapshot_2) {
                        err = "error creating root snapshot node";
                        ret = bch2_fs_initialize_subvolumes(c);
@@ -1291,6 +1284,15 @@ use_clean:
                if (c->opts.verbose || !c->sb.clean)
                        bch_info(c, "journal replay done");
 
+               bch_info(c, "checking need_discard and freespace btrees");
+               err = "error checking need_discard and freespace btrees";
+               ret = bch2_check_alloc_info(c);
+               if (ret)
+                       goto err;
+               bch_verbose(c, "done checking need_discard and freespace btrees");
+
+               set_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags);
+
                bch_info(c, "checking lrus");
                err = "error checking lrus";
                ret = bch2_check_lrus(c);
@@ -1308,6 +1310,7 @@ use_clean:
                set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags);
        } else {
                set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
+               set_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags);
                set_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags);
                set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags);
                set_bit(BCH_FS_FSCK_DONE, &c->flags);