bcachefs: btree gc refactoring
authorKent Overstreet <kent.overstreet@gmail.com>
Sun, 21 Oct 2018 14:56:11 +0000 (10:56 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:10 +0000 (17:08 -0400)
prep work for erasure coding

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/bkey_methods.h
fs/bcachefs/btree_gc.c
fs/bcachefs/btree_gc.h
fs/bcachefs/btree_types.h
fs/bcachefs/journal.h
fs/bcachefs/journal_io.c
fs/bcachefs/journal_io.h

index 989b577da928dc5035ad9b4c17eb987a789c8969..6ee774ba3d7a48c9fb1e1ea0ab0ea02a136ae14f 100644 (file)
@@ -19,17 +19,6 @@ static inline enum bkey_type bkey_type(unsigned level, enum btree_id id)
        return level ? BKEY_TYPE_BTREE : (enum bkey_type) id;
 }
 
-static inline bool btree_type_has_ptrs(enum bkey_type type)
-{
-       switch (type) {
-       case BKEY_TYPE_BTREE:
-       case BKEY_TYPE_EXTENTS:
-               return true;
-       default:
-               return false;
-       }
-}
-
 struct bch_fs;
 struct btree;
 struct bkey;
index d07a6b297078625a90a8098ce86bbe9c388a14af..757a170e75083eec5818d5fd8b2a76dab11a9e0d 100644 (file)
@@ -18,6 +18,7 @@
 #include "error.h"
 #include "extents.h"
 #include "journal.h"
+#include "journal_io.h"
 #include "keylist.h"
 #include "move.h"
 #include "replicas.h"
 #include <linux/rcupdate.h>
 #include <linux/sched/task.h>
 
+static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
+{
+       preempt_disable();
+       write_seqcount_begin(&c->gc_pos_lock);
+       c->gc_pos = new_pos;
+       write_seqcount_end(&c->gc_pos_lock);
+       preempt_enable();
+}
+
+static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
+{
+       BUG_ON(gc_pos_cmp(new_pos, c->gc_pos) <= 0);
+       __gc_pos_set(c, new_pos);
+}
+
+/* range_checks - for validating min/max pos of each btree node: */
+
 struct range_checks {
        struct range_level {
                struct bpos     min;
@@ -91,6 +109,19 @@ static void btree_node_range_checks(struct bch_fs *c, struct btree *b,
        }
 }
 
+/* marking of btree keys/nodes: */
+
+static bool bkey_type_needs_gc(enum bkey_type type)
+{
+       switch (type) {
+       case BKEY_TYPE_BTREE:
+       case BKEY_TYPE_EXTENTS:
+               return true;
+       default:
+               return false;
+       }
+}
+
 u8 bch2_btree_key_recalc_oldest_gen(struct bch_fs *c, struct bkey_s_c k)
 {
        const struct bch_extent_ptr *ptr;
@@ -113,39 +144,8 @@ u8 bch2_btree_key_recalc_oldest_gen(struct bch_fs *c, struct bkey_s_c k)
        return max_stale;
 }
 
-/*
- * For runtime mark and sweep:
- */
-static u8 bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type,
-                          struct bkey_s_c k, unsigned flags)
-{
-       struct gc_pos pos = { 0 };
-       u8 ret = 0;
-
-       switch (type) {
-       case BKEY_TYPE_BTREE:
-               bch2_mark_key(c, k, c->opts.btree_node_size,
-                             BCH_DATA_BTREE, pos, NULL,
-                             0, flags|
-                             BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
-                             BCH_BUCKET_MARK_GC_LOCK_HELD);
-               break;
-       case BKEY_TYPE_EXTENTS:
-               bch2_mark_key(c, k, k.k->size, BCH_DATA_USER, pos, NULL,
-                             0, flags|
-                             BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
-                             BCH_BUCKET_MARK_GC_LOCK_HELD);
-               ret = bch2_btree_key_recalc_oldest_gen(c, k);
-               break;
-       default:
-               BUG();
-       }
-
-       return ret;
-}
-
-int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type,
-                               struct bkey_s_c k)
+static int bch2_btree_mark_ptrs_initial(struct bch_fs *c, enum bkey_type type,
+                                       struct bkey_s_c k)
 {
        enum bch_data_type data_type = type == BKEY_TYPE_BTREE
                ? BCH_DATA_BTREE : BCH_DATA_USER;
@@ -199,54 +199,90 @@ int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type,
        }
        }
 
-       atomic64_set(&c->key_version,
-                    max_t(u64, k.k->version.lo,
-                          atomic64_read(&c->key_version)));
-
-       bch2_gc_mark_key(c, type, k, BCH_BUCKET_MARK_NOATOMIC);
+       if (k.k->version.lo > atomic64_read(&c->key_version))
+               atomic64_set(&c->key_version, k.k->version.lo);
 fsck_err:
        return ret;
 }
 
-static unsigned btree_gc_mark_node(struct bch_fs *c, struct btree *b)
+/*
+ * For runtime mark and sweep:
+ */
+static int bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type,
+                           struct bkey_s_c k, bool initial)
+{
+       struct gc_pos pos = { 0 };
+       unsigned flags = initial ? BCH_BUCKET_MARK_NOATOMIC : 0;
+       int ret = 0;
+
+       switch (type) {
+       case BKEY_TYPE_BTREE:
+               if (initial) {
+                       ret = bch2_btree_mark_ptrs_initial(c, type, k);
+                       if (ret < 0)
+                               return ret;
+               }
+
+               bch2_mark_key(c, k, c->opts.btree_node_size,
+                             BCH_DATA_BTREE, pos, NULL,
+                             0, flags|
+                             BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
+                             BCH_BUCKET_MARK_GC_LOCK_HELD);
+               break;
+       case BKEY_TYPE_EXTENTS:
+               if (initial) {
+                       ret = bch2_btree_mark_ptrs_initial(c, type, k);
+                       if (ret < 0)
+                               return ret;
+               }
+
+               bch2_mark_key(c, k, k.k->size, BCH_DATA_USER, pos, NULL,
+                             0, flags|
+                             BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
+                             BCH_BUCKET_MARK_GC_LOCK_HELD);
+               ret = bch2_btree_key_recalc_oldest_gen(c, k);
+               break;
+       default:
+               break;
+       }
+
+       return ret;
+}
+
+static int btree_gc_mark_node(struct bch_fs *c, struct btree *b,
+                             bool initial)
 {
        enum bkey_type type = btree_node_type(b);
        struct btree_node_iter iter;
        struct bkey unpacked;
        struct bkey_s_c k;
        u8 stale = 0;
+       int ret;
 
-       if (btree_node_has_ptrs(b))
-               for_each_btree_node_key_unpack(b, k, &iter,
-                                              &unpacked) {
-                       bch2_bkey_debugcheck(c, b, k);
-                       stale = max(stale, bch2_gc_mark_key(c, type, k, 0));
-               }
+       if (!bkey_type_needs_gc(type))
+               return 0;
 
-       return stale;
-}
+       for_each_btree_node_key_unpack(b, k, &iter,
+                                      &unpacked) {
+               bch2_bkey_debugcheck(c, b, k);
 
-static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
-{
-       preempt_disable();
-       write_seqcount_begin(&c->gc_pos_lock);
-       c->gc_pos = new_pos;
-       write_seqcount_end(&c->gc_pos_lock);
-       preempt_enable();
-}
+               ret = bch2_gc_mark_key(c, type, k, initial);
+               if (ret < 0)
+                       return ret;
 
-static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
-{
-       BUG_ON(gc_pos_cmp(new_pos, c->gc_pos) <= 0);
-       __gc_pos_set(c, new_pos);
+               stale = max_t(u8, stale, ret);
+       }
+
+       return stale;
 }
 
-static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
+static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
+                        bool initial)
 {
        struct btree_iter iter;
        struct btree *b;
        struct range_checks r;
-       unsigned depth = btree_id == BTREE_ID_EXTENTS ? 0 : 1;
+       unsigned depth = bkey_type_needs_gc(btree_id) ? 0 : 1;
        unsigned max_stale;
        int ret = 0;
 
@@ -257,8 +293,11 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
 
        /*
         * if expensive_debug_checks is on, run range_checks on all leaf nodes:
+        *
+        * and on startup, we have to read every btree node (XXX: only if it was
+        * an unclean shutdown)
         */
-       if (expensive_debug_checks(c))
+       if (initial || expensive_debug_checks(c))
                depth = 0;
 
        btree_node_range_checks_init(&r, depth);
@@ -269,22 +308,24 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
 
                bch2_verify_btree_nr_keys(b);
 
-               max_stale = btree_gc_mark_node(c, b);
+               max_stale = btree_gc_mark_node(c, b, initial);
 
                gc_pos_set(c, gc_pos_btree_node(b));
 
-               if (max_stale > 64)
-                       bch2_btree_node_rewrite(c, &iter,
-                                       b->data->keys.seq,
-                                       BTREE_INSERT_USE_RESERVE|
-                                       BTREE_INSERT_NOWAIT|
-                                       BTREE_INSERT_GC_LOCK_HELD);
-               else if (!btree_gc_rewrite_disabled(c) &&
-                        (btree_gc_always_rewrite(c) || max_stale > 16))
-                       bch2_btree_node_rewrite(c, &iter,
-                                       b->data->keys.seq,
-                                       BTREE_INSERT_NOWAIT|
-                                       BTREE_INSERT_GC_LOCK_HELD);
+               if (!initial) {
+                       if (max_stale > 64)
+                               bch2_btree_node_rewrite(c, &iter,
+                                               b->data->keys.seq,
+                                               BTREE_INSERT_USE_RESERVE|
+                                               BTREE_INSERT_NOWAIT|
+                                               BTREE_INSERT_GC_LOCK_HELD);
+                       else if (!btree_gc_rewrite_disabled(c) &&
+                                (btree_gc_always_rewrite(c) || max_stale > 16))
+                               bch2_btree_node_rewrite(c, &iter,
+                                               b->data->keys.seq,
+                                               BTREE_INSERT_NOWAIT|
+                                               BTREE_INSERT_GC_LOCK_HELD);
+               }
 
                bch2_btree_iter_cond_resched(&iter);
        }
@@ -296,13 +337,47 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
 
        b = c->btree_roots[btree_id].b;
        if (!btree_node_fake(b))
-               bch2_gc_mark_key(c, BKEY_TYPE_BTREE, bkey_i_to_s_c(&b->key), 0);
+               bch2_gc_mark_key(c, BKEY_TYPE_BTREE,
+                                bkey_i_to_s_c(&b->key), initial);
        gc_pos_set(c, gc_pos_btree_root(b->btree_id));
 
        mutex_unlock(&c->btree_root_lock);
        return 0;
 }
 
+static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal,
+                         bool initial)
+{
+       unsigned i;
+
+       for (i = 0; i < BTREE_ID_NR; i++) {
+               enum bkey_type type = bkey_type(0, i);
+
+               int ret = bch2_gc_btree(c, i, initial);
+               if (ret)
+                       return ret;
+
+               if (journal && bkey_type_needs_gc(type)) {
+                       struct bkey_i *k, *n;
+                       struct jset_entry *j;
+                       struct journal_replay *r;
+                       int ret;
+
+                       list_for_each_entry(r, journal, list)
+                               for_each_jset_key(k, n, j, &r->j) {
+                                       if (type == bkey_type(j->level, j->btree_id)) {
+                                               ret = bch2_gc_mark_key(c, type,
+                                                       bkey_i_to_s_c(k), initial);
+                                               if (ret < 0)
+                                                       return ret;
+                                       }
+                               }
+               }
+       }
+
+       return 0;
+}
+
 static void mark_metadata_sectors(struct bch_fs *c, struct bch_dev *ca,
                                  u64 start, u64 end,
                                  enum bch_data_type type,
@@ -525,6 +600,7 @@ void bch2_gc(struct bch_fs *c)
        struct bch_dev *ca;
        u64 start_time = local_clock();
        unsigned i;
+       int ret;
 
        /*
         * Walk _all_ references to buckets, and recompute them:
@@ -560,14 +636,11 @@ void bch2_gc(struct bch_fs *c)
 
        bch2_mark_superblocks(c);
 
-       /* Walk btree: */
-       for (i = 0; i < BTREE_ID_NR; i++) {
-               int ret = bch2_gc_btree(c, i);
-               if (ret) {
-                       bch_err(c, "btree gc failed: %d", ret);
-                       set_bit(BCH_FS_GC_FAILURE, &c->flags);
-                       goto out;
-               }
+       ret = bch2_gc_btrees(c, NULL, false);
+       if (ret) {
+               bch_err(c, "btree gc failed: %d", ret);
+               set_bit(BCH_FS_GC_FAILURE, &c->flags);
+               goto out;
        }
 
        bch2_mark_pending_btree_node_frees(c);
@@ -1009,58 +1082,9 @@ int bch2_gc_thread_start(struct bch_fs *c)
 
 /* Initial GC computes bucket marks during startup */
 
-static int bch2_initial_gc_btree(struct bch_fs *c, enum btree_id id)
-{
-       struct btree_iter iter;
-       struct btree *b;
-       struct range_checks r;
-       int ret = 0;
-
-       btree_node_range_checks_init(&r, 0);
-
-       gc_pos_set(c, gc_pos_btree(id, POS_MIN, 0));
-
-       if (!c->btree_roots[id].b)
-               return 0;
-
-       b = c->btree_roots[id].b;
-       if (!btree_node_fake(b))
-               ret = bch2_btree_mark_key_initial(c, BKEY_TYPE_BTREE,
-                                                 bkey_i_to_s_c(&b->key));
-       if (ret)
-               return ret;
-
-       /*
-        * We have to hit every btree node before starting journal replay, in
-        * order for the journal seq blacklist machinery to work:
-        */
-       for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
-               btree_node_range_checks(c, b, &r);
-
-               if (btree_node_has_ptrs(b)) {
-                       struct btree_node_iter node_iter;
-                       struct bkey unpacked;
-                       struct bkey_s_c k;
-
-                       for_each_btree_node_key_unpack(b, k, &node_iter,
-                                                      &unpacked) {
-                               ret = bch2_btree_mark_key_initial(c,
-                                                       btree_node_type(b), k);
-                               if (ret)
-                                       goto err;
-                       }
-               }
-
-               bch2_btree_iter_cond_resched(&iter);
-       }
-err:
-       return bch2_btree_iter_unlock(&iter) ?: ret;
-}
-
 int bch2_initial_gc(struct bch_fs *c, struct list_head *journal)
 {
        unsigned iter = 0;
-       enum btree_id id;
        int ret = 0;
 
        down_write(&c->gc_lock);
@@ -1069,13 +1093,7 @@ again:
 
        bch2_mark_superblocks(c);
 
-       for (id = 0; id < BTREE_ID_NR; id++) {
-               ret = bch2_initial_gc_btree(c, id);
-               if (ret)
-                       goto err;
-       }
-
-       ret = bch2_journal_mark(c, journal);
+       ret = bch2_gc_btrees(c, journal, true);
        if (ret)
                goto err;
 
index 9d2b9d5953d2f3b26d441c28aac0b9f8d5fb70ce..54c6bc8459306356b2b8c2b6ec63d1fee2b25188 100644 (file)
@@ -12,8 +12,6 @@ void bch2_gc_thread_stop(struct bch_fs *);
 int bch2_gc_thread_start(struct bch_fs *);
 int bch2_initial_gc(struct bch_fs *, struct list_head *);
 u8 bch2_btree_key_recalc_oldest_gen(struct bch_fs *, struct bkey_s_c);
-int bch2_btree_mark_key_initial(struct bch_fs *, enum bkey_type,
-                               struct bkey_s_c);
 void bch2_mark_dev_superblock(struct bch_fs *, struct bch_dev *, unsigned);
 
 /*
index dd9660a9f12bdfbe768c879a0cae044528dcb669..467c619f7f6d5417f9e97f2eaa16e03678f124ba 100644 (file)
@@ -415,11 +415,6 @@ static inline const struct bkey_ops *btree_node_ops(struct btree *b)
        return &bch2_bkey_ops[btree_node_type(b)];
 }
 
-static inline bool btree_node_has_ptrs(struct btree *b)
-{
-       return btree_type_has_ptrs(btree_node_type(b));
-}
-
 static inline bool btree_node_is_extents(struct btree *b)
 {
        return btree_node_type(b) == BKEY_TYPE_EXTENTS;
index f39b37e6e3d56ed30f430705eb6545c3c9a204ff..77cf39cc64ff57b42a5e152710f6b9083572893b 100644 (file)
@@ -355,10 +355,6 @@ static inline bool journal_flushes_device(struct bch_dev *ca)
        return true;
 }
 
-int bch2_journal_mark(struct bch_fs *, struct list_head *);
-void bch2_journal_entries_free(struct list_head *);
-int bch2_journal_replay(struct bch_fs *, struct list_head *);
-
 static inline void bch2_journal_set_replay_done(struct journal *j)
 {
        BUG_ON(!test_bit(JOURNAL_STARTED, &j->flags));
index 648c4ac58a2c3d613702af62a585a52c10574d52..3dc24b39022f8278733711d4f76f83c08a682759 100644 (file)
@@ -852,28 +852,6 @@ fsck_err:
 
 /* journal replay: */
 
-int bch2_journal_mark(struct bch_fs *c, struct list_head *list)
-{
-       struct bkey_i *k, *n;
-       struct jset_entry *j;
-       struct journal_replay *r;
-       int ret;
-
-       list_for_each_entry(r, list, list)
-               for_each_jset_key(k, n, j, &r->j) {
-                       enum bkey_type type = bkey_type(j->level, j->btree_id);
-                       struct bkey_s_c k_s_c = bkey_i_to_s_c(k);
-
-                       if (btree_type_has_ptrs(type)) {
-                               ret = bch2_btree_mark_key_initial(c, type, k_s_c);
-                               if (ret)
-                                       return ret;
-                       }
-               }
-
-       return 0;
-}
-
 int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
 {
        struct journal *j = &c->journal;
index 35f90c96008a3a837af7368c9abbc12405952918..e19e549baf8a7d2c6f3d51d252cbba09f5aa4038 100644 (file)
@@ -37,6 +37,8 @@ static inline struct jset_entry *__jset_entry_type_next(struct jset *jset,
 
 int bch2_journal_set_seq(struct bch_fs *c, u64, u64);
 int bch2_journal_read(struct bch_fs *, struct list_head *);
+void bch2_journal_entries_free(struct list_head *);
+int bch2_journal_replay(struct bch_fs *, struct list_head *);
 
 int bch2_journal_entry_sectors(struct journal *);
 void bch2_journal_write(struct closure *);