bcachefs: add ability to run gc on metadata only
authorKent Overstreet <kent.overstreet@gmail.com>
Sat, 30 Mar 2019 02:22:45 +0000 (22:22 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:20 +0000 (17:08 -0400)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/alloc_background.c
fs/bcachefs/btree_gc.c
fs/bcachefs/btree_gc.h
fs/bcachefs/buckets.c
fs/bcachefs/buckets.h
fs/bcachefs/buckets_types.h
fs/bcachefs/recovery.c
fs/bcachefs/sysfs.c

index b5f5c223e00880626c83446c888bc769868c109e..c6a909bdfc02e8cad6e9a90c6ec6b9d92caaef16 100644 (file)
@@ -292,8 +292,7 @@ int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list)
        }
 
        percpu_down_write(&c->mark_lock);
-       for_each_member_device(ca, c, i)
-               bch2_dev_usage_from_buckets(c, ca);
+       bch2_dev_usage_from_buckets(c);
        percpu_up_write(&c->mark_lock);
 
        mutex_lock(&c->bucket_clock[READ].lock);
index 4119f48281fbc3de2cb59a8a70309dd1363cdef8..c572391c4dad9bed29ec70cc70ea5c7da452d85e 100644 (file)
@@ -204,7 +204,7 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b,
 }
 
 static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
-                        bool initial)
+                        bool initial, bool metadata_only)
 {
        struct btree_trans trans;
        struct btree_iter *iter;
@@ -224,7 +224,9 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
         * and on startup, we have to read every btree node (XXX: only if it was
         * an unclean shutdown)
         */
-       if (initial || expensive_debug_checks(c))
+       if (metadata_only)
+               depth = 1;
+       else if (initial || expensive_debug_checks(c))
                depth = 0;
 
        btree_node_range_checks_init(&r, depth);
@@ -280,7 +282,7 @@ static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
 }
 
 static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal,
-                         bool initial)
+                         bool initial, bool metadata_only)
 {
        enum btree_id ids[BTREE_ID_NR];
        u8 max_stale;
@@ -294,11 +296,12 @@ static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal,
                enum btree_id id = ids[i];
                enum btree_node_type type = __btree_node_type(0, id);
 
-               int ret = bch2_gc_btree(c, id, initial);
+               int ret = bch2_gc_btree(c, id, initial, metadata_only);
                if (ret)
                        return ret;
 
-               if (journal && btree_node_type_needs_gc(type)) {
+               if (journal && !metadata_only &&
+                   btree_node_type_needs_gc(type)) {
                        struct bkey_i *k, *n;
                        struct jset_entry *j;
                        struct journal_replay *r;
@@ -476,11 +479,13 @@ static void bch2_gc_free(struct bch_fs *c)
        c->usage[1] = NULL;
 }
 
-static int bch2_gc_done(struct bch_fs *c, bool initial)
+static int bch2_gc_done(struct bch_fs *c,
+                       bool initial, bool metadata_only)
 {
        struct bch_dev *ca;
-       bool verify = !initial ||
-               (c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO));
+       bool verify = !metadata_only &&
+               (!initial ||
+                (c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)));
        unsigned i;
        int ret = 0;
 
@@ -515,7 +520,7 @@ static int bch2_gc_done(struct bch_fs *c, bool initial)
 #define copy_fs_field(_f, _msg, ...)                                   \
        copy_field(_f, "fs has wrong " _msg, ##__VA_ARGS__)
 
-       {
+       if (!metadata_only) {
                struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0);
                struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0);
                struct stripe *dst, *src;
@@ -567,26 +572,7 @@ static int bch2_gc_done(struct bch_fs *c, bool initial)
                }
        };
 
-       for_each_member_device(ca, c, i) {
-               unsigned nr = sizeof(struct bch_dev_usage) / sizeof(u64);
-               struct bch_dev_usage *dst = (void *)
-                       bch2_acc_percpu_u64s((void *) ca->usage[0], nr);
-               struct bch_dev_usage *src = (void *)
-                       bch2_acc_percpu_u64s((void *) ca->usage[1], nr);
-               unsigned b;
-
-               for (b = 0; b < BCH_DATA_NR; b++)
-                       copy_dev_field(buckets[b],      "buckets[%s]",
-                                      bch2_data_types[b]);
-               copy_dev_field(buckets_alloc,           "buckets_alloc");
-               copy_dev_field(buckets_ec,              "buckets_ec");
-               copy_dev_field(buckets_unavailable,     "buckets_unavailable");
-
-               for (b = 0; b < BCH_DATA_NR; b++)
-                       copy_dev_field(sectors[b],      "sectors[%s]",
-                                      bch2_data_types[b]);
-               copy_dev_field(sectors_fragmented,      "sectors_fragmented");
-       }
+       bch2_dev_usage_from_buckets(c);
 
        {
                unsigned nr = fs_usage_u64s(c);
@@ -596,20 +582,29 @@ static int bch2_gc_done(struct bch_fs *c, bool initial)
                        bch2_acc_percpu_u64s((void *) c->usage[1], nr);
 
                copy_fs_field(hidden,           "hidden");
-               copy_fs_field(data,             "data");
-               copy_fs_field(cached,           "cached");
-               copy_fs_field(reserved,         "reserved");
-               copy_fs_field(nr_inodes,        "nr_inodes");
+               copy_fs_field(btree,            "btree");
 
-               for (i = 0; i < BCH_REPLICAS_MAX; i++)
-                       copy_fs_field(persistent_reserved[i],
-                                     "persistent_reserved[%i]", i);
+               if (!metadata_only) {
+                       copy_fs_field(data,     "data");
+                       copy_fs_field(cached,   "cached");
+                       copy_fs_field(reserved, "reserved");
+                       copy_fs_field(nr_inodes,"nr_inodes");
+
+                       for (i = 0; i < BCH_REPLICAS_MAX; i++)
+                               copy_fs_field(persistent_reserved[i],
+                                             "persistent_reserved[%i]", i);
+               }
 
                for (i = 0; i < c->replicas.nr; i++) {
                        struct bch_replicas_entry *e =
                                cpu_replicas_entry(&c->replicas, i);
                        char buf[80];
 
+                       if (metadata_only &&
+                           (e->data_type == BCH_DATA_USER ||
+                            e->data_type == BCH_DATA_CACHED))
+                               continue;
+
                        bch2_replicas_entry_to_text(&PBUF(buf), e);
 
                        copy_fs_field(replicas[i], "%s", buf);
@@ -625,7 +620,8 @@ fsck_err:
        return ret;
 }
 
-static int bch2_gc_start(struct bch_fs *c)
+static int bch2_gc_start(struct bch_fs *c,
+                        bool metadata_only)
 {
        struct bch_dev *ca;
        unsigned i;
@@ -671,10 +667,18 @@ static int bch2_gc_start(struct bch_fs *c)
                dst->nbuckets           = src->nbuckets;
 
                for (b = 0; b < src->nbuckets; b++) {
-                       dst->b[b]._mark.gen =
-                               dst->b[b].oldest_gen =
-                               src->b[b].mark.gen;
-                       dst->b[b].gen_valid = src->b[b].gen_valid;
+                       struct bucket *d = &dst->b[b];
+                       struct bucket *s = &src->b[b];
+
+                       d->_mark.gen = dst->b[b].oldest_gen = s->mark.gen;
+                       d->gen_valid = s->gen_valid;
+
+                       if (metadata_only &&
+                           (s->mark.data_type == BCH_DATA_USER ||
+                            s->mark.data_type == BCH_DATA_CACHED)) {
+                               d->_mark = s->mark;
+                               d->_mark.owned_by_allocator = 0;
+                       }
                }
        };
 
@@ -699,7 +703,8 @@ static int bch2_gc_start(struct bch_fs *c)
  *    move around - if references move backwards in the ordering GC
  *    uses, GC could skip past them
  */
-int bch2_gc(struct bch_fs *c, struct list_head *journal, bool initial)
+int bch2_gc(struct bch_fs *c, struct list_head *journal,
+           bool initial, bool metadata_only)
 {
        struct bch_dev *ca;
        u64 start_time = local_clock();
@@ -711,7 +716,7 @@ int bch2_gc(struct bch_fs *c, struct list_head *journal, bool initial)
        down_write(&c->gc_lock);
 again:
        percpu_down_write(&c->mark_lock);
-       ret = bch2_gc_start(c);
+       ret = bch2_gc_start(c, metadata_only);
        percpu_up_write(&c->mark_lock);
 
        if (ret)
@@ -719,7 +724,7 @@ again:
 
        bch2_mark_superblocks(c);
 
-       ret = bch2_gc_btrees(c, journal, initial);
+       ret = bch2_gc_btrees(c, journal, initial, metadata_only);
        if (ret)
                goto out;
 
@@ -753,7 +758,7 @@ out:
        percpu_down_write(&c->mark_lock);
 
        if (!ret)
-               ret = bch2_gc_done(c, initial);
+               ret = bch2_gc_done(c, initial, metadata_only);
 
        /* Indicates that gc is no longer in progress: */
        __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
@@ -1155,7 +1160,7 @@ static int bch2_gc_thread(void *arg)
                last = atomic_long_read(&clock->now);
                last_kick = atomic_read(&c->kick_gc);
 
-               ret = bch2_gc(c, NULL, false);
+               ret = bch2_gc(c, NULL, false, false);
                if (ret)
                        bch_err(c, "btree gc failed: %i", ret);
 
index 9eb2b0527a9208233d55560b281ee49de8863c09..b7982e64b2352dc3072e678d0f272cd715174d7e 100644 (file)
@@ -5,7 +5,7 @@
 #include "btree_types.h"
 
 void bch2_coalesce(struct bch_fs *);
-int bch2_gc(struct bch_fs *, struct list_head *, bool);
+int bch2_gc(struct bch_fs *, struct list_head *, bool, bool);
 void bch2_gc_thread_stop(struct bch_fs *);
 int bch2_gc_thread_start(struct bch_fs *);
 void bch2_mark_dev_superblock(struct bch_fs *, struct bch_dev *, unsigned);
index 495ef473260295a7f7a33dd1eb48524c42f04d06..4fe66ee1f745d7b73ddbe0c1d614e81e453e0459 100644 (file)
@@ -132,6 +132,8 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
 
                switch (e->data_type) {
                case BCH_DATA_BTREE:
+                       usage->btree    += usage->replicas[i];
+                       break;
                case BCH_DATA_USER:
                        usage->data     += usage->replicas[i];
                        break;
@@ -226,6 +228,7 @@ static u64 avail_factor(u64 r)
 u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage *fs_usage)
 {
        return min(fs_usage->hidden +
+                  fs_usage->btree +
                   fs_usage->data +
                   reserve_factor(fs_usage->reserved +
                                  fs_usage->online_reserved),
@@ -241,7 +244,8 @@ __bch2_fs_usage_read_short(struct bch_fs *c)
        ret.capacity = c->capacity -
                percpu_u64_get(&c->usage[0]->hidden);
 
-       data            = percpu_u64_get(&c->usage[0]->data);
+       data            = percpu_u64_get(&c->usage[0]->data) +
+                         percpu_u64_get(&c->usage[0]->btree);
        reserved        = percpu_u64_get(&c->usage[0]->reserved) +
                percpu_u64_get(&c->usage[0]->online_reserved);
 
@@ -386,12 +390,17 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
                bch2_wake_allocator(ca);
 }
 
-void bch2_dev_usage_from_buckets(struct bch_fs *c, struct bch_dev *ca)
+void bch2_dev_usage_from_buckets(struct bch_fs *c)
 {
+       struct bch_dev *ca;
        struct bucket_mark old = { .v.counter = 0 };
        struct bch_fs_usage *fs_usage;
        struct bucket_array *buckets;
        struct bucket *g;
+       unsigned i;
+       int cpu;
+
+       percpu_u64_set(&c->usage[0]->hidden, 0);
 
        /*
         * This is only called during startup, before there's any multithreaded
@@ -401,11 +410,17 @@ void bch2_dev_usage_from_buckets(struct bch_fs *c, struct bch_dev *ca)
        fs_usage = this_cpu_ptr(c->usage[0]);
        preempt_enable();
 
-       buckets = bucket_array(ca);
+       for_each_member_device(ca, c, i) {
+               for_each_possible_cpu(cpu)
+                       memset(per_cpu_ptr(ca->usage[0], cpu), 0,
+                              sizeof(*ca->usage[0]));
+
+               buckets = bucket_array(ca);
 
-       for_each_bucket(g, buckets)
-               if (g->mark.data_type)
-                       bch2_dev_usage_update(c, ca, fs_usage, old, g->mark, false);
+               for_each_bucket(g, buckets)
+                       bch2_dev_usage_update(c, ca, fs_usage,
+                                             old, g->mark, false);
+       }
 }
 
 #define bucket_data_cmpxchg(c, ca, fs_usage, g, new, expr)     \
@@ -426,10 +441,17 @@ static inline void update_replicas(struct bch_fs *c,
        BUG_ON(idx < 0);
        BUG_ON(!sectors);
 
-       if (r->data_type == BCH_DATA_CACHED)
-               fs_usage->cached        += sectors;
-       else
+       switch (r->data_type) {
+       case BCH_DATA_BTREE:
+               fs_usage->btree         += sectors;
+               break;
+       case BCH_DATA_USER:
                fs_usage->data          += sectors;
+               break;
+       case BCH_DATA_CACHED:
+               fs_usage->cached        += sectors;
+               break;
+       }
        fs_usage->replicas[idx]         += sectors;
 }
 
index 6af8b418b1e3e5cd833cbac5f23bd24351857faa..095015f17f7682ad1da4ad9acaf3c70d6a863834 100644 (file)
@@ -174,7 +174,7 @@ static inline bool bucket_needs_journal_commit(struct bucket_mark m,
 
 struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *, struct bch_dev *);
 
-void bch2_dev_usage_from_buckets(struct bch_fs *, struct bch_dev *);
+void bch2_dev_usage_from_buckets(struct bch_fs *);
 
 static inline u64 __dev_buckets_available(struct bch_dev *ca,
                                          struct bch_dev_usage stats)
index 348d062dd744dda54e55319b80e2e80160c16263..a98493dd2ba845ce432535f2f1594369a52e8b1d 100644 (file)
@@ -70,6 +70,7 @@ struct bch_fs_usage {
        u64                     gc_start[0];
 
        u64                     hidden;
+       u64                     btree;
        u64                     data;
        u64                     cached;
        u64                     reserved;
index 0fa952fa1053cd580a4fdcef88d58d63818be9bd..67b4dda9cfeb7c692e8a7a85bb5294b0654c4382 100644 (file)
@@ -361,7 +361,7 @@ int bch2_fs_recovery(struct bch_fs *c)
            test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) {
                bch_verbose(c, "starting mark and sweep:");
                err = "error in recovery";
-               ret = bch2_gc(c, &journal, true);
+               ret = bch2_gc(c, &journal, true, false);
                if (ret)
                        goto err;
                bch_verbose(c, "mark and sweep done");
index 1354dd33874c21be8652705c830bc61949e1d175..59503ad0006c20b7ef8d85e75ae04b22960f9687 100644 (file)
@@ -497,7 +497,7 @@ STORE(__bch2_fs)
                bch2_coalesce(c);
 
        if (attr == &sysfs_trigger_gc)
-               bch2_gc(c, NULL, false);
+               bch2_gc(c, NULL, false, false);
 
        if (attr == &sysfs_trigger_alloc_write) {
                bool wrote;