bcachefs: Allocator startup improvements
authorKent Overstreet <kent.overstreet@gmail.com>
Mon, 19 Nov 2018 04:20:21 +0000 (23:20 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:12 +0000 (17:08 -0400)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/alloc_background.c
fs/bcachefs/alloc_background.h
fs/bcachefs/bcachefs.h
fs/bcachefs/buckets.c
fs/bcachefs/journal_io.c

index b49d0cd84b7880bbf33ce62f679d615c5ea5d494..c17fba1eae963d44482cfb9d3a548483727b0e8e 100644 (file)
@@ -237,6 +237,7 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
        __BKEY_PADDED(k, DIV_ROUND_UP(sizeof(struct bch_alloc), 8)) alloc_key;
        struct bucket *g;
        struct bkey_i_alloc *a;
+       int ret;
        u8 *d;
 
        percpu_down_read(&c->usage_lock);
@@ -260,32 +261,50 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
 
        bch2_btree_iter_set_pos(iter, a->k.p);
 
-       return bch2_btree_insert_at(c, NULL, journal_seq,
-                                   BTREE_INSERT_NOFAIL|
-                                   BTREE_INSERT_USE_RESERVE|
-                                   BTREE_INSERT_USE_ALLOC_RESERVE|
-                                   flags,
-                                   BTREE_INSERT_ENTRY(iter, &a->k_i));
+       ret = bch2_btree_insert_at(c, NULL, journal_seq,
+                                  BTREE_INSERT_NOFAIL|
+                                  BTREE_INSERT_USE_RESERVE|
+                                  BTREE_INSERT_USE_ALLOC_RESERVE|
+                                  flags,
+                                  BTREE_INSERT_ENTRY(iter, &a->k_i));
+
+       if (!ret && ca->buckets_written)
+               set_bit(b, ca->buckets_written);
+
+       return ret;
 }
 
-int bch2_alloc_replay_key(struct bch_fs *c, struct bpos pos)
+int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
 {
        struct bch_dev *ca;
        struct btree_iter iter;
        int ret;
 
-       if (pos.inode >= c->sb.nr_devices || !c->devs[pos.inode])
+       if (k->k.p.inode >= c->sb.nr_devices ||
+           !c->devs[k->k.p.inode])
                return 0;
 
-       ca = bch_dev_bkey_exists(c, pos.inode);
+       ca = bch_dev_bkey_exists(c, k->k.p.inode);
 
-       if (pos.offset >= ca->mi.nbuckets)
+       if (k->k.p.offset >= ca->mi.nbuckets)
                return 0;
 
-       bch2_btree_iter_init(&iter, c, BTREE_ID_ALLOC, POS_MIN,
-                            BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+       bch2_btree_iter_init(&iter, c, BTREE_ID_ALLOC, k->k.p,
+                            BTREE_ITER_INTENT);
 
-       ret = __bch2_alloc_write_key(c, ca, pos.offset, &iter, NULL, 0);
+       ret = bch2_btree_iter_traverse(&iter);
+       if (ret)
+               goto err;
+
+       /* check buckets_written with btree node locked: */
+
+       ret = test_bit(k->k.p.offset, ca->buckets_written)
+               ? 0
+               : bch2_btree_insert_at(c, NULL, NULL,
+                                      BTREE_INSERT_NOFAIL|
+                                      BTREE_INSERT_JOURNAL_REPLAY,
+                                      BTREE_INSERT_ENTRY(&iter, k));
+err:
        bch2_btree_iter_unlock(&iter);
        return ret;
 }
@@ -1284,51 +1303,49 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
 
        /* Scan for buckets that are already invalidated: */
        for_each_rw_member(ca, c, dev_iter) {
-               struct btree_iter iter;
+               struct bucket_array *buckets;
                struct bucket_mark m;
-               struct bkey_s_c k;
 
-               for_each_btree_key(&iter, c, BTREE_ID_ALLOC, POS(ca->dev_idx, 0), 0, k) {
-                       if (k.k->type != BCH_ALLOC)
-                               continue;
+               down_read(&ca->bucket_lock);
+               percpu_down_read(&c->usage_lock);
+
+               buckets = bucket_array(ca);
 
-                       bu = k.k->p.offset;
-                       m = READ_ONCE(bucket(ca, bu)->mark);
+               for (bu = buckets->first_bucket;
+                    bu < buckets->nbuckets; bu++) {
+                       m = READ_ONCE(buckets->b[bu].mark);
 
-                       if (!is_available_bucket(m) || m.cached_sectors)
+                       if (!m.gen_valid ||
+                           !is_available_bucket(m) ||
+                           m.cached_sectors)
                                continue;
 
-                       percpu_down_read(&c->usage_lock);
                        bch2_mark_alloc_bucket(c, ca, bu, true,
                                        gc_pos_alloc(c, NULL),
                                        BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
                                        BCH_BUCKET_MARK_GC_LOCK_HELD);
-                       percpu_up_read(&c->usage_lock);
 
                        fifo_push(&ca->free_inc, bu);
 
-                       if (fifo_full(&ca->free_inc))
+                       discard_invalidated_buckets(c, ca);
+
+                       if (fifo_full(&ca->free[RESERVE_BTREE]))
                                break;
                }
-               bch2_btree_iter_unlock(&iter);
+               percpu_up_read(&c->usage_lock);
+               up_read(&ca->bucket_lock);
        }
 
        /* did we find enough buckets? */
        for_each_rw_member(ca, c, dev_iter)
-               if (fifo_used(&ca->free_inc) < ca->free[RESERVE_BTREE].size) {
+               if (!fifo_full(&ca->free[RESERVE_BTREE])) {
                        percpu_ref_put(&ca->io_ref);
                        goto not_enough;
                }
 
        return 0;
 not_enough:
-       pr_debug("did not find enough empty buckets; issuing discards");
-
-       /* clear out free_inc, we'll be using it again below: */
-       for_each_rw_member(ca, c, dev_iter)
-               discard_invalidated_buckets(c, ca);
-
-       pr_debug("scanning for reclaimable buckets");
+       pr_debug("not enough empty buckets; scanning for reclaimable buckets");
 
        for_each_rw_member(ca, c, dev_iter) {
                find_reclaimable_buckets(c, ca);
index 99535fa602144963ed927ec3ce3d8b5c6fca0bcb..59b6a5f2f89045f97880430818665b984299e4a6 100644 (file)
@@ -17,7 +17,7 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
 }
 
 int bch2_alloc_read(struct bch_fs *, struct list_head *);
-int bch2_alloc_replay_key(struct bch_fs *, struct bpos);
+int bch2_alloc_replay_key(struct bch_fs *, struct bkey_i *);
 
 static inline void bch2_wake_allocator(struct bch_dev *ca)
 {
index b33fbf709705cb16e540b594364bb434bcbb03c7..cdea3a1d9176b58f2eb85740df6c265be47b5a2f 100644 (file)
@@ -394,6 +394,7 @@ struct bch_dev {
         */
        struct bucket_array __rcu *buckets;
        unsigned long           *buckets_dirty;
+       unsigned long           *buckets_written;
        /* most out of date gen in the btree */
        u8                      *oldest_gens;
        struct rw_semaphore     bucket_lock;
index 9558129e77ba7d806c43dec3090a39a93efee3e2..201798866242b6949398d3193f85ec2e0915becb 100644 (file)
@@ -1096,6 +1096,7 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
 {
        struct bucket_array *buckets = NULL, *old_buckets = NULL;
        unsigned long *buckets_dirty = NULL;
+       unsigned long *buckets_written = NULL;
        u8 *oldest_gens = NULL;
        alloc_fifo      free[RESERVE_NR];
        alloc_fifo      free_inc;
@@ -1127,6 +1128,9 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
            !(buckets_dirty     = kvpmalloc(BITS_TO_LONGS(nbuckets) *
                                            sizeof(unsigned long),
                                            GFP_KERNEL|__GFP_ZERO)) ||
+           !(buckets_written   = kvpmalloc(BITS_TO_LONGS(nbuckets) *
+                                           sizeof(unsigned long),
+                                           GFP_KERNEL|__GFP_ZERO)) ||
            !init_fifo(&free[RESERVE_BTREE], btree_reserve, GFP_KERNEL) ||
            !init_fifo(&free[RESERVE_MOVINGGC],
                       copygc_reserve, GFP_KERNEL) ||
@@ -1161,6 +1165,9 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
                memcpy(buckets_dirty,
                       ca->buckets_dirty,
                       BITS_TO_LONGS(n) * sizeof(unsigned long));
+               memcpy(buckets_written,
+                      ca->buckets_written,
+                      BITS_TO_LONGS(n) * sizeof(unsigned long));
        }
 
        rcu_assign_pointer(ca->buckets, buckets);
@@ -1168,6 +1175,7 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
 
        swap(ca->oldest_gens, oldest_gens);
        swap(ca->buckets_dirty, buckets_dirty);
+       swap(ca->buckets_written, buckets_written);
 
        if (resize)
                percpu_up_write(&c->usage_lock);
@@ -1207,6 +1215,8 @@ err:
                free_fifo(&free[i]);
        kvpfree(buckets_dirty,
                BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
+       kvpfree(buckets_written,
+               BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
        kvpfree(oldest_gens,
                nbuckets * sizeof(u8));
        if (buckets)
@@ -1224,6 +1234,8 @@ void bch2_dev_buckets_free(struct bch_dev *ca)
        free_fifo(&ca->free_inc);
        for (i = 0; i < RESERVE_NR; i++)
                free_fifo(&ca->free[i]);
+       kvpfree(ca->buckets_written,
+               BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
        kvpfree(ca->buckets_dirty,
                BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
        kvpfree(ca->oldest_gens, ca->mi.nbuckets * sizeof(u8));
index 60fc2eced71aeeaf24ab26578a2e373656f37881..a74566764630b95ad94193ee9232827b388d61c7 100644 (file)
@@ -780,7 +780,6 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
        int ret = 0;
 
        list_for_each_entry_safe(i, n, list, list) {
-
                j->replay_journal_seq = le64_to_cpu(i->j.seq);
 
                for_each_jset_key(k, _n, entry, &i->j) {
@@ -790,7 +789,7 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
                                 * allocation code handles replay for
                                 * BTREE_ID_ALLOC keys:
                                 */
-                               ret = bch2_alloc_replay_key(c, k->k.p);
+                               ret = bch2_alloc_replay_key(c, k);
                        } else {
                                /*
                                 * We might cause compressed extents to be
@@ -801,9 +800,9 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
                                        bch2_disk_reservation_init(c, 0);
 
                                ret = bch2_btree_insert(c, entry->btree_id, k,
-                                                       &disk_res, NULL,
-                                                       BTREE_INSERT_NOFAIL|
-                                                       BTREE_INSERT_JOURNAL_REPLAY);
+                                               &disk_res, NULL,
+                                               BTREE_INSERT_NOFAIL|
+                                               BTREE_INSERT_JOURNAL_REPLAY);
                        }
 
                        if (ret) {