bcachefs: Fsck for need_discard & freespace btrees
authorKent Overstreet <kent.overstreet@gmail.com>
Thu, 17 Feb 2022 08:11:39 +0000 (03:11 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:29 +0000 (17:09 -0400)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/alloc_background.c
fs/bcachefs/alloc_background.h
fs/bcachefs/lru.c
fs/bcachefs/lru.h
fs/bcachefs/recovery.c

index fac9337dc5434df2bfd17bc98a3ae7d09b95f228..e8de96e4adf31f81846c5d1737fb89c9707c28a5 100644 (file)
@@ -580,6 +580,332 @@ int bch2_trans_mark_alloc(struct btree_trans *trans,
        return 0;
 }
 
+static int bch2_check_alloc_key(struct btree_trans *trans,
+                               struct btree_iter *alloc_iter)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter discard_iter, freespace_iter;
+       struct bch_alloc_v4 a;
+       unsigned discard_key_type, freespace_key_type;
+       struct bkey_s_c alloc_k, k;
+       struct printbuf buf = PRINTBUF;
+       struct printbuf buf2 = PRINTBUF;
+       int ret;
+
+       alloc_k = bch2_btree_iter_peek(alloc_iter);
+       if (!alloc_k.k)
+               return 0;
+
+       ret = bkey_err(alloc_k);
+       if (ret)
+               return ret;
+
+       bch2_alloc_to_v4(alloc_k, &a);
+       discard_key_type = bucket_state(a) == BUCKET_need_discard
+               ? KEY_TYPE_set : 0;
+       freespace_key_type = bucket_state(a) == BUCKET_free
+               ? KEY_TYPE_set : 0;
+
+       bch2_trans_iter_init(trans, &discard_iter, BTREE_ID_need_discard,
+                            alloc_k.k->p, 0);
+       bch2_trans_iter_init(trans, &freespace_iter, BTREE_ID_freespace,
+                            alloc_freespace_pos(alloc_k.k->p, a), 0);
+
+       k = bch2_btree_iter_peek_slot(&discard_iter);
+       ret = bkey_err(k);
+       if (ret)
+               goto err;
+
+       if (fsck_err_on(k.k->type != discard_key_type, c,
+                       "incorrect key in need_discard btree (got %s should be %s)\n"
+                       "  %s",
+                       bch2_bkey_types[k.k->type],
+                       bch2_bkey_types[discard_key_type],
+                       (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
+               struct bkey_i *update =
+                       bch2_trans_kmalloc(trans, sizeof(*update));
+
+               ret = PTR_ERR_OR_ZERO(update);
+               if (ret)
+                       goto err;
+
+               bkey_init(&update->k);
+               update->k.type  = discard_key_type;
+               update->k.p     = discard_iter.pos;
+
+               ret = bch2_trans_update(trans, &discard_iter, update, 0);
+               if (ret)
+                       goto err;
+       }
+
+       k = bch2_btree_iter_peek_slot(&freespace_iter);
+       ret = bkey_err(k);
+       if (ret)
+               goto err;
+
+       if (fsck_err_on(k.k->type != freespace_key_type, c,
+                       "incorrect key in freespace btree (got %s should be %s)\n"
+                       "  %s",
+                       bch2_bkey_types[k.k->type],
+                       bch2_bkey_types[freespace_key_type],
+                       (printbuf_reset(&buf),
+                        bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
+               struct bkey_i *update =
+                       bch2_trans_kmalloc(trans, sizeof(*update));
+
+               ret = PTR_ERR_OR_ZERO(update);
+               if (ret)
+                       goto err;
+
+               bkey_init(&update->k);
+               update->k.type  = freespace_key_type;
+               update->k.p     = freespace_iter.pos;
+               bch2_key_resize(&update->k, 1);
+
+               ret = bch2_trans_update(trans, &freespace_iter, update, 0);
+               if (ret)
+                       goto err;
+       }
+err:
+fsck_err:
+       bch2_trans_iter_exit(trans, &freespace_iter);
+       bch2_trans_iter_exit(trans, &discard_iter);
+       printbuf_exit(&buf2);
+       printbuf_exit(&buf);
+       return ret;
+}
+
+static inline bool bch2_dev_bucket_exists(struct bch_fs *c, struct bpos pos)
+{
+       struct bch_dev *ca;
+
+       if (pos.inode >= c->sb.nr_devices || !c->devs[pos.inode])
+               return false;
+
+       ca = bch_dev_bkey_exists(c, pos.inode);
+       return pos.offset >= ca->mi.first_bucket &&
+               pos.offset < ca->mi.nbuckets;
+}
+
+static int bch2_check_freespace_key(struct btree_trans *trans,
+                                   struct btree_iter *freespace_iter,
+                                   bool initial)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter alloc_iter;
+       struct bkey_s_c k, freespace_k;
+       struct bch_alloc_v4 a;
+       u64 genbits;
+       struct bpos pos;
+       struct bkey_i *update;
+       struct printbuf buf = PRINTBUF;
+       int ret;
+
+       freespace_k = bch2_btree_iter_peek(freespace_iter);
+       if (!freespace_k.k)
+               return 1;
+
+       ret = bkey_err(freespace_k);
+       if (ret)
+               return ret;
+
+       pos = freespace_iter->pos;
+       pos.offset &= ~(~0ULL << 56);
+       genbits = freespace_iter->pos.offset & (~0ULL << 56);
+
+       bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc, pos, 0);
+
+       if (fsck_err_on(!bch2_dev_bucket_exists(c, pos), c,
+                       "%llu:%llu set in freespace btree but device or bucket does not exist",
+                       pos.inode, pos.offset))
+               goto delete;
+
+       k = bch2_btree_iter_peek_slot(&alloc_iter);
+       ret = bkey_err(k);
+       if (ret)
+               goto err;
+
+       bch2_alloc_to_v4(k, &a);
+
+       if (fsck_err_on(bucket_state(a) != BUCKET_free ||
+                       genbits != alloc_freespace_genbits(a), c,
+                       "%s\n  incorrectly set in freespace index (free %u, genbits %llu should be %llu)",
+                       (bch2_bkey_val_to_text(&buf, c, k), buf.buf),
+                       bucket_state(a) == BUCKET_free,
+                       genbits >> 56, alloc_freespace_genbits(a) >> 56))
+               goto delete;
+out:
+err:
+fsck_err:
+       bch2_trans_iter_exit(trans, &alloc_iter);
+       printbuf_exit(&buf);
+       return ret;
+delete:
+       update = bch2_trans_kmalloc(trans, sizeof(*update));
+       ret = PTR_ERR_OR_ZERO(update);
+       if (ret)
+               goto err;
+
+       bkey_init(&update->k);
+       update->k.p = freespace_iter->pos;
+       bch2_key_resize(&update->k, 1);
+
+       ret   = bch2_trans_update(trans, freespace_iter, update, 0) ?:
+               bch2_trans_commit(trans, NULL, NULL, 0);
+       goto out;
+}
+
+int bch2_check_alloc_info(struct bch_fs *c, bool initial)
+{
+       struct btree_trans trans;
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       int ret = 0, last_dev = -1;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
+                          BTREE_ITER_PREFETCH, k, ret) {
+               if (k.k->p.inode != last_dev) {
+                       struct bch_dev *ca = bch_dev_bkey_exists(c, k.k->p.inode);
+
+                       if (!ca->mi.freespace_initialized) {
+                               bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0));
+                               continue;
+                       }
+
+                       last_dev = k.k->p.inode;
+               }
+
+               ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+                       bch2_check_alloc_key(&trans, &iter));
+               if (ret)
+                       break;
+       }
+       bch2_trans_iter_exit(&trans, &iter);
+
+       if (ret)
+               goto err;
+
+       bch2_trans_iter_init(&trans, &iter, BTREE_ID_freespace, POS_MIN,
+                            BTREE_ITER_PREFETCH);
+       while (1) {
+               ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+                       bch2_check_freespace_key(&trans, &iter, initial));
+               if (ret)
+                       break;
+
+               bch2_btree_iter_set_pos(&iter, bpos_nosnap_successor(iter.pos));
+       }
+       bch2_trans_iter_exit(&trans, &iter);
+err:
+       bch2_trans_exit(&trans);
+       return ret < 0 ? ret : 0;
+}
+
+static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
+                                      struct btree_iter *alloc_iter)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter lru_iter;
+       struct bch_alloc_v4 a;
+       struct bkey_s_c alloc_k, k;
+       struct printbuf buf = PRINTBUF;
+       struct printbuf buf2 = PRINTBUF;
+       int ret;
+
+       alloc_k = bch2_btree_iter_peek(alloc_iter);
+       if (!alloc_k.k)
+               return 0;
+
+       ret = bkey_err(alloc_k);
+       if (ret)
+               return ret;
+
+       bch2_alloc_to_v4(alloc_k, &a);
+
+       if (bucket_state(a) != BUCKET_cached)
+               return 0;
+
+       bch2_trans_iter_init(trans, &lru_iter, BTREE_ID_lru,
+                            POS(alloc_k.k->p.inode, a.io_time[READ]), 0);
+
+       k = bch2_btree_iter_peek_slot(&lru_iter);
+       ret = bkey_err(k);
+       if (ret)
+               goto err;
+
+       if (fsck_err_on(!a.io_time[READ], c,
+                       "cached bucket with read_time 0\n"
+                       "  %s",
+               (printbuf_reset(&buf),
+                bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)) ||
+           fsck_err_on(k.k->type != KEY_TYPE_lru ||
+                       le64_to_cpu(bkey_s_c_to_lru(k).v->idx) != alloc_k.k->p.offset, c,
+                       "incorrect/missing lru entry\n"
+                       "  %s\n"
+                       "  %s",
+                       (printbuf_reset(&buf),
+                        bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf),
+                       (bch2_bkey_val_to_text(&buf2, c, k), buf2.buf))) {
+               u64 read_time = a.io_time[READ];
+
+               if (!a.io_time[READ])
+                       a.io_time[READ] = atomic64_read(&c->io_clock[READ].now);
+
+               ret = bch2_lru_change(trans,
+                                     alloc_k.k->p.inode,
+                                     alloc_k.k->p.offset,
+                                     0, &a.io_time[READ]);
+               if (ret)
+                       goto err;
+
+               if (a.io_time[READ] != read_time) {
+                       struct bkey_i_alloc_v4 *a_mut =
+                               bch2_alloc_to_v4_mut(trans, alloc_k);
+                       ret = PTR_ERR_OR_ZERO(a_mut);
+                       if (ret)
+                               goto err;
+
+                       a_mut->v.io_time[READ] = a.io_time[READ];
+                       ret = bch2_trans_update(trans, alloc_iter,
+                                               &a_mut->k_i, BTREE_TRIGGER_NORUN);
+                       if (ret)
+                               goto err;
+               }
+       }
+err:
+fsck_err:
+       bch2_trans_iter_exit(trans, &lru_iter);
+       printbuf_exit(&buf2);
+       printbuf_exit(&buf);
+       return ret;
+}
+
+int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
+{
+       struct btree_trans trans;
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       int ret = 0;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
+                          BTREE_ITER_PREFETCH, k, ret) {
+               ret = __bch2_trans_do(&trans, NULL, NULL,
+                                     BTREE_INSERT_NOFAIL|
+                                     BTREE_INSERT_LAZY_RW,
+                       bch2_check_alloc_to_lru_ref(&trans, &iter));
+               if (ret)
+                       break;
+       }
+       bch2_trans_iter_exit(&trans, &iter);
+
+       bch2_trans_exit(&trans);
+       return ret < 0 ? ret : 0;
+}
+
 static int bch2_clear_need_discard(struct btree_trans *trans, struct bpos pos,
                                   struct bch_dev *ca, bool *discard_done)
 {
index d4883d3cd642dc823c992e1df8446c98cba4c6cf..d82e80218b8e4c47f8db0b1c8bf6f17355db520d 100644 (file)
@@ -113,6 +113,8 @@ int bch2_alloc_read(struct bch_fs *, bool, bool);
 
 int bch2_trans_mark_alloc(struct btree_trans *, struct bkey_s_c,
                          struct bkey_i *, unsigned);
+int bch2_check_alloc_info(struct bch_fs *, bool);
+int bch2_check_alloc_to_lru_refs(struct bch_fs *);
 void bch2_do_discards(struct bch_fs *);
 
 static inline bool should_invalidate_buckets(struct bch_dev *ca)
index 2ababca5efe5e699f6488b63a7a421d85e76ae25..4f0e6960e5977a33ccf763039db198b0e3967549 100644 (file)
@@ -1,10 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
+#include "alloc_background.h"
 #include "btree_iter.h"
 #include "btree_update.h"
 #include "error.h"
 #include "lru.h"
+#include "recovery.h"
 
 const char *bch2_lru_invalid(const struct bch_fs *c, struct bkey_s_c k)
 {
@@ -117,3 +119,85 @@ int bch2_lru_change(struct btree_trans *trans, u64 id, u64 idx,
        return  lru_delete(trans, id, idx, old_time) ?:
                lru_set(trans, id, idx, new_time);
 }
+
+static int bch2_check_lru_key(struct btree_trans *trans,
+                             struct btree_iter *lru_iter, bool initial)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter iter;
+       struct bkey_s_c lru_k, k;
+       struct bch_alloc_v4 a;
+       struct printbuf buf1 = PRINTBUF;
+       struct printbuf buf2 = PRINTBUF;
+       u64 idx;
+       int ret;
+
+       lru_k = bch2_btree_iter_peek(lru_iter);
+       if (!lru_k.k)
+               return 0;
+
+       ret = bkey_err(lru_k);
+       if (ret)
+               return ret;
+
+       idx = le64_to_cpu(bkey_s_c_to_lru(lru_k).v->idx);
+
+       bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
+                            POS(lru_k.k->p.inode, idx), 0);
+       k = bch2_btree_iter_peek_slot(&iter);
+       ret = bkey_err(k);
+       if (ret)
+               goto err;
+
+       bch2_alloc_to_v4(k, &a);
+
+       if (fsck_err_on(bucket_state(a) != BUCKET_cached ||
+                       a.io_time[READ] != lru_k.k->p.offset, c,
+                       "incorrect lru entry %s\n"
+                       "  for %s",
+                       (bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf),
+                       (bch2_bkey_val_to_text(&buf2, c, k), buf2.buf))) {
+               struct bkey_i *update =
+                       bch2_trans_kmalloc(trans, sizeof(*update));
+
+               ret = PTR_ERR_OR_ZERO(update);
+               if (ret)
+                       goto err;
+
+               bkey_init(&update->k);
+               update->k.p = lru_iter->pos;
+
+               ret = bch2_trans_update(trans, lru_iter, update, 0);
+               if (ret)
+                       goto err;
+       }
+err:
+fsck_err:
+       bch2_trans_iter_exit(trans, &iter);
+       printbuf_exit(&buf2);
+       printbuf_exit(&buf1);
+       return ret;
+}
+
+int bch2_check_lrus(struct bch_fs *c, bool initial)
+{
+       struct btree_trans trans;
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       int ret = 0;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_lru, POS_MIN,
+                          BTREE_ITER_PREFETCH, k, ret) {
+               ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+                       bch2_check_lru_key(&trans, &iter, initial));
+               if (ret)
+                       break;
+       }
+       bch2_trans_iter_exit(&trans, &iter);
+
+       bch2_trans_exit(&trans);
+       return ret;
+
+}
index c3121cfee2859c6d41f8a3041cddcb068b18c911..4db6a8399332d3c3181255f8c5086bb8f4a61230 100644 (file)
@@ -12,4 +12,6 @@ void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
 
 int bch2_lru_change(struct btree_trans *, u64, u64, u64, u64 *);
 
+int bch2_check_lrus(struct bch_fs *, bool);
+
 #endif /* _BCACHEFS_LRU_H */
index 50e5c5e852f76616c6cb492759e9ac26833bba88..14edc0bf51122b64eebb3b4b6be99e11936c8656 100644 (file)
@@ -16,6 +16,7 @@
 #include "journal_io.h"
 #include "journal_reclaim.h"
 #include "journal_seq_blacklist.h"
+#include "lru.h"
 #include "move.h"
 #include "quota.h"
 #include "recovery.h"
@@ -1166,13 +1167,26 @@ use_clean:
                bool metadata_only = c->opts.norecovery;
 
                bch_info(c, "checking allocations");
-               err = "error in mark and sweep";
+               err = "error checking allocations";
                ret = bch2_gc(c, true, metadata_only);
                if (ret)
                        goto err;
                bch_verbose(c, "done checking allocations");
        }
 
+       if (c->opts.fsck) {
+               bch_info(c, "checking need_discard and freespace btrees");
+               err = "error checking need_discard and freespace btrees";
+               ret = bch2_check_alloc_info(c, true);
+               if (ret)
+                       goto err;
+
+               ret = bch2_check_lrus(c, true);
+               if (ret)
+                       goto err;
+               bch_verbose(c, "done checking need_discard and freespace btrees");
+       }
+
        bch2_stripes_heap_start(c);
 
        clear_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
@@ -1202,6 +1216,19 @@ use_clean:
        if (ret)
                goto err;
 
+       if (c->opts.fsck) {
+               bch_info(c, "checking alloc to lru refs");
+               err = "error checking alloc to lru refs";
+               ret = bch2_check_alloc_to_lru_refs(c);
+               if (ret)
+                       goto err;
+
+               ret = bch2_check_lrus(c, true);
+               if (ret)
+                       goto err;
+               bch_verbose(c, "done checking alloc to lru refs");
+       }
+
        if (c->sb.version < bcachefs_metadata_version_snapshot_2) {
                bch2_fs_lazy_rw(c);