bcachefs: Add a single slot percpu buf for btree iters
authorKent Overstreet <kent.overstreet@gmail.com>
Fri, 6 Nov 2020 01:02:01 +0000 (20:02 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:46 +0000 (17:08 -0400)
Allocating our array of btree iters is a big enough allocation that it
hits the buddy allocator, and we're seeing lots of lock contention.
Sticking a single element buffer in front of it should help.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/bcachefs.h
fs/bcachefs/btree_iter.c
fs/bcachefs/super.c

index c14117227dd78455f92801bcca480de6a9a0a378..8ac96384fddf15ba526d81d32a07affc8ce1b9d6 100644 (file)
@@ -541,6 +541,10 @@ struct journal_keys {
        u64                     journal_seq_base;
 };
 
+struct btree_iter_buf {
+       struct btree_iter       *iter;
+};
+
 struct bch_fs {
        struct closure          cl;
 
@@ -636,6 +640,7 @@ struct bch_fs {
        struct mutex            btree_trans_lock;
        struct list_head        btree_trans_list;
        mempool_t               btree_iters_pool;
+       struct btree_iter_buf  __percpu *btree_iters_bufs;
 
        struct btree_key_cache  btree_key_cache;
 
index a4141a5b569ea211ae69946b2aaeef029dc52db6..f62658f1b1dd509e8b0938ac3f36183bbea2e4a8 100644 (file)
@@ -1991,6 +1991,7 @@ int bch2_trans_iter_free(struct btree_trans *trans,
        return bch2_trans_iter_put(trans, iter);
 }
 
+#if 0
 static int bch2_trans_realloc_iters(struct btree_trans *trans,
                                    unsigned new_size)
 {
@@ -2053,6 +2054,7 @@ success:
 
        return 0;
 }
+#endif
 
 static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans)
 {
@@ -2062,28 +2064,27 @@ static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans)
                goto got_slot;
 
        if (trans->nr_iters == trans->size) {
-               int ret;
-
-               if (trans->nr_iters >= BTREE_ITER_MAX) {
-                       struct btree_iter *iter;
-
-                       trans_for_each_iter(trans, iter) {
-                               pr_err("iter: btree %s pos %llu:%llu%s%s%s %ps",
-                                      bch2_btree_ids[iter->btree_id],
-                                      iter->pos.inode,
-                                      iter->pos.offset,
-                                      (trans->iters_live & (1ULL << iter->idx)) ? " live" : "",
-                                      (trans->iters_touched & (1ULL << iter->idx)) ? " touched" : "",
-                                      iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT ? " keep" : "",
-                                      (void *) iter->ip_allocated);
-                       }
+               struct btree_iter *iter;
 
-                       panic("trans iter oveflow\n");
+               BUG_ON(trans->size < BTREE_ITER_MAX);
+
+               trans_for_each_iter(trans, iter) {
+                       pr_err("iter: btree %s pos %llu:%llu%s%s%s %ps",
+                              bch2_btree_ids[iter->btree_id],
+                              iter->pos.inode,
+                              iter->pos.offset,
+                              (trans->iters_live & (1ULL << iter->idx)) ? " live" : "",
+                              (trans->iters_touched & (1ULL << iter->idx)) ? " touched" : "",
+                              iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT ? " keep" : "",
+                              (void *) iter->ip_allocated);
                }
 
+               panic("trans iter oveflow\n");
+#if 0
                ret = bch2_trans_realloc_iters(trans, trans->size * 2);
                if (ret)
                        return ERR_PTR(ret);
+#endif
        }
 
        idx = trans->nr_iters++;
@@ -2326,22 +2327,37 @@ void bch2_trans_reset(struct btree_trans *trans, unsigned flags)
                bch2_btree_iter_traverse_all(trans);
 }
 
+static void bch2_trans_alloc_iters(struct btree_trans *trans, struct bch_fs *c)
+{
+       unsigned new_size = BTREE_ITER_MAX;
+       size_t iters_bytes      = sizeof(struct btree_iter) * new_size;
+       size_t updates_bytes    = sizeof(struct btree_insert_entry) * new_size;
+       void *p;
+
+       BUG_ON(trans->used_mempool);
+
+       p =     this_cpu_xchg(c->btree_iters_bufs->iter, NULL) ?:
+               mempool_alloc(&trans->c->btree_iters_pool, GFP_NOFS);
+
+       trans->iters            = p; p += iters_bytes;
+       trans->updates          = p; p += updates_bytes;
+       trans->updates2         = p; p += updates_bytes;
+       trans->size             = new_size;
+}
+
 void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
                     unsigned expected_nr_iters,
                     size_t expected_mem_bytes)
 {
-       /*
-        * reallocating iterators currently completely breaks
-        * bch2_trans_iter_put():
-        */
-       expected_nr_iters = BTREE_ITER_MAX;
-
        memset(trans, 0, sizeof(*trans));
        trans->c                = c;
        trans->ip               = _RET_IP_;
 
-       if (expected_nr_iters > trans->size)
-               bch2_trans_realloc_iters(trans, expected_nr_iters);
+       /*
+        * reallocating iterators currently completely breaks
+        * bch2_trans_iter_put(), we always allocate the max:
+        */
+       bch2_trans_alloc_iters(trans, c);
 
        if (expected_mem_bytes)
                bch2_trans_preload_mem(trans, expected_mem_bytes);
@@ -2356,6 +2372,8 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
 
 int bch2_trans_exit(struct btree_trans *trans)
 {
+       struct bch_fs *c = trans->c;
+
        bch2_trans_unlock(trans);
 
 #ifdef CONFIG_BCACHEFS_DEBUG
@@ -2368,10 +2386,11 @@ int bch2_trans_exit(struct btree_trans *trans)
 
        kfree(trans->fs_usage_deltas);
        kfree(trans->mem);
-       if (trans->used_mempool)
+
+       trans->iters = this_cpu_xchg(c->btree_iters_bufs->iter, trans->iters);
+       if (trans->iters)
                mempool_free(trans->iters, &trans->c->btree_iters_pool);
-       else
-               kfree(trans->iters);
+
        trans->mem      = (void *) 0x1;
        trans->iters    = (void *) 0x1;
 
index a2ade0df62b5fd58e2901de4dc17b9e740443f94..e55fcbcbd37fb0223a337ff32c0a78ddeeabe6e9 100644 (file)
@@ -458,6 +458,7 @@ int bch2_fs_read_write_early(struct bch_fs *c)
 static void __bch2_fs_free(struct bch_fs *c)
 {
        unsigned i;
+       int cpu;
 
        for (i = 0; i < BCH_TIME_STAT_NR; i++)
                bch2_time_stats_exit(&c->times[i]);
@@ -483,6 +484,12 @@ static void __bch2_fs_free(struct bch_fs *c)
        free_percpu(c->usage[1]);
        free_percpu(c->usage[0]);
        kfree(c->usage_base);
+
+       if (c->btree_iters_bufs)
+               for_each_possible_cpu(cpu)
+                       kfree(per_cpu_ptr(c->btree_iters_bufs, cpu)->iter);
+
+       free_percpu(c->btree_iters_bufs);
        free_percpu(c->pcpu);
        mempool_exit(&c->large_bkey_pool);
        mempool_exit(&c->btree_bounce_pool);
@@ -762,6 +769,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
                        BIOSET_NEED_BVECS) ||
            !(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) ||
            !(c->online_reserved = alloc_percpu(u64)) ||
+           !(c->btree_iters_bufs = alloc_percpu(struct btree_iter_buf)) ||
            mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
                                        btree_bytes(c)) ||
            mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) ||