bcachefs: Allocate fs_usage in do_btree_insert_at()
authorKent Overstreet <kent.overstreet@gmail.com>
Fri, 15 Mar 2019 22:20:46 +0000 (18:20 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:18 +0000 (17:08 -0400)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/bcachefs.h
fs/bcachefs/btree_update_interior.c
fs/bcachefs/btree_update_leaf.c
fs/bcachefs/buckets.c
fs/bcachefs/buckets.h
fs/bcachefs/extents.c
fs/bcachefs/replicas.c
fs/bcachefs/replicas.h
fs/bcachefs/super.c

index 0b495dd32f6750c8200de66d63b2c460138d107d..27ffecb912a39dfaa6e70d1edda06b9d1ba15ee3 100644 (file)
@@ -635,7 +635,10 @@ struct bch_fs {
        struct percpu_rw_semaphore      mark_lock;
 
        struct bch_fs_usage __percpu    *usage[2];
-       struct bch_fs_usage __percpu    *usage_scratch;
+
+       /* single element mempool: */
+       struct mutex            usage_scratch_lock;
+       struct bch_fs_usage     *usage_scratch;
 
        /*
         * When we invalidate buckets, we use both the priority and the amount
index 7ccf2f935701cc100eb0701e294f201168e4b2a7..31c1474cd4947e725c13b00d8692ee1500515732 100644 (file)
@@ -1076,8 +1076,7 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
 
        mutex_lock(&c->btree_interior_update_lock);
        percpu_down_read(&c->mark_lock);
-       preempt_disable();
-       fs_usage = bch2_fs_usage_get_scratch(c);
+       fs_usage = bch2_fs_usage_scratch_get(c);
 
        bch2_mark_key_locked(c, bkey_i_to_s_c(&b->key),
                      true, 0,
@@ -1090,7 +1089,7 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
                                           fs_usage);
        bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res);
 
-       preempt_enable();
+       bch2_fs_usage_scratch_put(c, fs_usage);
        percpu_up_read(&c->mark_lock);
        mutex_unlock(&c->btree_interior_update_lock);
 }
@@ -1171,8 +1170,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
 
        mutex_lock(&c->btree_interior_update_lock);
        percpu_down_read(&c->mark_lock);
-       preempt_disable();
-       fs_usage = bch2_fs_usage_get_scratch(c);
+       fs_usage = bch2_fs_usage_scratch_get(c);
 
        bch2_mark_key_locked(c, bkey_i_to_s_c(insert),
                             true, 0,
@@ -1193,7 +1191,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
 
        bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res);
 
-       preempt_enable();
+       bch2_fs_usage_scratch_put(c, fs_usage);
        percpu_up_read(&c->mark_lock);
        mutex_unlock(&c->btree_interior_update_lock);
 
@@ -1987,7 +1985,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
 
                mutex_lock(&c->btree_interior_update_lock);
                percpu_down_read(&c->mark_lock);
-               fs_usage = bch2_fs_usage_get_scratch(c);
+               fs_usage = bch2_fs_usage_scratch_get(c);
 
                bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i),
                              true, 0,
@@ -1998,6 +1996,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
                                           fs_usage);
                bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res);
 
+               bch2_fs_usage_scratch_put(c, fs_usage);
                percpu_up_read(&c->mark_lock);
                mutex_unlock(&c->btree_interior_update_lock);
 
index 42fdb6c2963aae6cfc674a3707d3da9ec7b57570..5349790547f4c26b775d8e58f93b6135892ea1f1 100644 (file)
@@ -269,8 +269,6 @@ static void btree_insert_key_leaf(struct btree_trans *trans,
        int old_live_u64s = b->nr.live_u64s;
        int live_u64s_added, u64s_added;
 
-       bch2_mark_update(trans, insert);
-
        if (!btree_node_is_extents(b))
                bch2_insert_fixup_key(trans, insert);
        else
@@ -499,11 +497,6 @@ btree_key_can_insert(struct btree_trans *trans,
        if (unlikely(btree_node_fake(b)))
                return BTREE_INSERT_BTREE_NODE_FULL;
 
-       if (!bch2_bkey_replicas_marked(c,
-                       bkey_i_to_s_c(insert->k),
-                       true))
-               return BTREE_INSERT_NEED_MARK_REPLICAS;
-
        ret = !btree_node_is_extents(b)
                ? BTREE_INSERT_OK
                : bch2_extent_can_insert(trans, insert, u64s);
@@ -555,6 +548,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
                                     struct btree_insert_entry **stopped_at)
 {
        struct bch_fs *c = trans->c;
+       struct bch_fs_usage *fs_usage = NULL;
        struct btree_insert_entry *i;
        struct btree_iter *linked;
        int ret;
@@ -562,12 +556,29 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
        trans_for_each_update_iter(trans, i)
                BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK);
 
+       trans_for_each_update_iter(trans, i) {
+               if (i->deferred ||
+                   !btree_node_type_needs_gc(i->iter->btree_id))
+                       continue;
+
+               if (!fs_usage) {
+                       percpu_down_read(&c->mark_lock);
+                       fs_usage = bch2_fs_usage_scratch_get(c);
+               }
+
+               if (!bch2_bkey_replicas_marked_locked(c,
+                               bkey_i_to_s_c(i->k), true)) {
+                       ret = BTREE_INSERT_NEED_MARK_REPLICAS;
+                       goto out;
+               }
+       }
+
        btree_trans_lock_write(c, trans);
 
        if (race_fault()) {
                ret = -EINTR;
                trans_restart(" (race)");
-               goto out;
+               goto out_unlock;
        }
 
        /*
@@ -577,7 +588,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
         */
        ret = btree_trans_check_can_insert(trans, stopped_at);
        if (ret)
-               goto out;
+               goto out_unlock;
 
        /*
         * Don't get journal reservation until after we know insert will
@@ -585,7 +596,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
         */
        ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_NONBLOCK);
        if (ret)
-               goto out;
+               goto out_unlock;
 
        if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) {
                if (journal_seq_verify(c))
@@ -610,14 +621,25 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
                }
        }
 
+       trans_for_each_update_iter(trans, i)
+               bch2_mark_update(trans, i, fs_usage);
+       if (fs_usage)
+               bch2_trans_fs_usage_apply(trans, fs_usage);
+
        trans_for_each_update(trans, i)
                do_btree_insert_one(trans, i);
-out:
+out_unlock:
        BUG_ON(ret &&
               (trans->flags & BTREE_INSERT_JOURNAL_RESERVED) &&
               trans->journal_res.ref);
 
        btree_trans_unlock_write(trans);
+out:
+       if (fs_usage) {
+               bch2_fs_usage_scratch_put(c, fs_usage);
+               percpu_up_read(&c->mark_lock);
+       }
+
        bch2_journal_res_put(&c->journal, &trans->journal_res);
 
        return ret;
index 3744d55b8495ca1a154fa0c0b06e21981b061c5a..2fbcd85d9e754df9c1c9a78608e8caefe8237262 100644 (file)
@@ -144,6 +144,37 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
        percpu_up_write(&c->mark_lock);
 }
 
+void bch2_fs_usage_scratch_put(struct bch_fs *c, struct bch_fs_usage *fs_usage)
+{
+       if (fs_usage == c->usage_scratch)
+               mutex_unlock(&c->usage_scratch_lock);
+       else
+               kfree(fs_usage);
+}
+
+struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *c)
+{
+       struct bch_fs_usage *ret;
+       unsigned bytes = fs_usage_u64s(c) * sizeof(u64);
+
+       ret = kzalloc(bytes, GFP_NOWAIT);
+       if (ret)
+               return ret;
+
+       if (mutex_trylock(&c->usage_scratch_lock))
+               goto out_pool;
+
+       ret = kzalloc(bytes, GFP_NOFS);
+       if (ret)
+               return ret;
+
+       mutex_lock(&c->usage_scratch_lock);
+out_pool:
+       ret = c->usage_scratch;
+       memset(ret, 0, bytes);
+       return ret;
+}
+
 struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *c, struct bch_dev *ca)
 {
        struct bch_dev_usage ret;
@@ -906,31 +937,39 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
                           unsigned journal_seq, unsigned flags,
                           bool gc)
 {
+       int ret = 0;
+
+       preempt_disable();
+
        if (!fs_usage || gc)
                fs_usage = this_cpu_ptr(c->usage[gc]);
 
        switch (k.k->type) {
        case KEY_TYPE_alloc:
-               return bch2_mark_alloc(c, k, inserting,
+               ret = bch2_mark_alloc(c, k, inserting,
                                fs_usage, journal_seq, flags, gc);
+               break;
        case KEY_TYPE_btree_ptr:
-               return bch2_mark_extent(c, k, inserting
+               ret = bch2_mark_extent(c, k, inserting
                                ?  c->opts.btree_node_size
                                : -c->opts.btree_node_size,
                                BCH_DATA_BTREE,
                                fs_usage, journal_seq, flags, gc);
+               break;
        case KEY_TYPE_extent:
-               return bch2_mark_extent(c, k, sectors, BCH_DATA_USER,
+               ret = bch2_mark_extent(c, k, sectors, BCH_DATA_USER,
                                fs_usage, journal_seq, flags, gc);
+               break;
        case KEY_TYPE_stripe:
-               return bch2_mark_stripe(c, k, inserting,
+               ret = bch2_mark_stripe(c, k, inserting,
                                fs_usage, journal_seq, flags, gc);
+               break;
        case KEY_TYPE_inode:
                if (inserting)
                        fs_usage->nr_inodes++;
                else
                        fs_usage->nr_inodes--;
-               return 0;
+               break;
        case KEY_TYPE_reservation: {
                unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
 
@@ -940,11 +979,13 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
 
                fs_usage->reserved                              += sectors;
                fs_usage->persistent_reserved[replicas - 1]     += sectors;
-               return 0;
+               break;
        }
-       default:
-               return 0;
        }
+
+       preempt_enable();
+
+       return ret;
 }
 
 int bch2_mark_key_locked(struct bch_fs *c,
@@ -976,25 +1017,19 @@ int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
 }
 
 void bch2_mark_update(struct btree_trans *trans,
-                     struct btree_insert_entry *insert)
+                     struct btree_insert_entry *insert,
+                     struct bch_fs_usage *fs_usage)
 {
        struct bch_fs           *c = trans->c;
        struct btree_iter       *iter = insert->iter;
        struct btree            *b = iter->l[0].b;
        struct btree_node_iter  node_iter = iter->l[0].iter;
-       struct bch_fs_usage     *fs_usage;
        struct gc_pos           pos = gc_pos_btree_node(b);
        struct bkey_packed      *_k;
-       u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
-       static int warned_disk_usage = 0;
 
        if (!btree_node_type_needs_gc(iter->btree_id))
                return;
 
-       percpu_down_read(&c->mark_lock);
-       preempt_disable();
-       fs_usage = bch2_fs_usage_get_scratch(c);
-
        if (!(trans->flags & BTREE_INSERT_NOMARK))
                bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true,
                        bpos_min(insert->k->k.p, b->key.k.p).offset -
@@ -1047,16 +1082,32 @@ void bch2_mark_update(struct btree_trans *trans,
 
                bch2_btree_node_iter_advance(&node_iter, b);
        }
+}
 
-       if (bch2_fs_usage_apply(c, fs_usage, trans->disk_res) &&
-           !warned_disk_usage &&
-           !xchg(&warned_disk_usage, 1)) {
-               char buf[200];
+void bch2_trans_fs_usage_apply(struct btree_trans *trans,
+                              struct bch_fs_usage *fs_usage)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_insert_entry *i;
+       static int warned_disk_usage = 0;
+       u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
+       char buf[200];
+
+       if (!bch2_fs_usage_apply(c, fs_usage, trans->disk_res) ||
+           warned_disk_usage ||
+           xchg(&warned_disk_usage, 1))
+               return;
 
-               pr_err("disk usage increased more than %llu sectors reserved", disk_res_sectors);
+       pr_err("disk usage increased more than %llu sectors reserved", disk_res_sectors);
+
+       trans_for_each_update_iter(trans, i) {
+               struct btree_iter       *iter = i->iter;
+               struct btree            *b = iter->l[0].b;
+               struct btree_node_iter  node_iter = iter->l[0].iter;
+               struct bkey_packed      *_k;
 
                pr_err("while inserting");
-               bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(insert->k));
+               bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k));
                pr_err("%s", buf);
                pr_err("overlapping with");
 
@@ -1069,8 +1120,8 @@ void bch2_mark_update(struct btree_trans *trans,
                        k = bkey_disassemble(b, _k, &unpacked);
 
                        if (btree_node_is_extents(b)
-                           ? bkey_cmp(insert->k->k.p, bkey_start_pos(k.k)) <= 0
-                           : bkey_cmp(insert->k->k.p, k.k->p))
+                           ? bkey_cmp(i->k->k.p, bkey_start_pos(k.k)) <= 0
+                           : bkey_cmp(i->k->k.p, k.k->p))
                                break;
 
                        bch2_bkey_val_to_text(&PBUF(buf), c, k);
@@ -1079,9 +1130,6 @@ void bch2_mark_update(struct btree_trans *trans,
                        bch2_btree_node_iter_advance(&node_iter, b);
                }
        }
-
-       preempt_enable();
-       percpu_up_read(&c->mark_lock);
 }
 
 /* Disk reservations: */
index fc2c212392b6c61ab4c4999b4476c8b6c8304e46..e34c9d24dc38f10ac9fdff9702daf9840e7710df 100644 (file)
@@ -219,13 +219,8 @@ static inline unsigned fs_usage_u64s(struct bch_fs *c)
                READ_ONCE(c->replicas.nr);
 }
 
-static inline struct bch_fs_usage *bch2_fs_usage_get_scratch(struct bch_fs *c)
-{
-       struct bch_fs_usage *ret = this_cpu_ptr(c->usage_scratch);
-
-       memset(ret, 0, fs_usage_u64s(c) * sizeof(u64));
-       return ret;
-}
+void bch2_fs_usage_scratch_put(struct bch_fs *, struct bch_fs_usage *);
+struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *);
 
 struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *);
 
@@ -256,10 +251,13 @@ int bch2_mark_key_locked(struct bch_fs *, struct bkey_s_c,
 int bch2_mark_key(struct bch_fs *, struct bkey_s_c,
                  bool, s64, struct gc_pos,
                  struct bch_fs_usage *, u64, unsigned);
-void bch2_mark_update(struct btree_trans *, struct btree_insert_entry *);
 int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
                        struct disk_reservation *);
 
+void bch2_mark_update(struct btree_trans *, struct btree_insert_entry *,
+                     struct bch_fs_usage *);
+void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage *);
+
 /* disk reservations: */
 
 void __bch2_disk_reservation_put(struct bch_fs *, struct disk_reservation *);
index 80531017b237005c18e102f3b56fabbfae1dc0b5..194b8d6da1bba800ac9dc2ab792b46d0d11e5f6a 100644 (file)
@@ -1190,11 +1190,12 @@ void bch2_insert_fixup_extent(struct btree_trans *trans,
 
                if (s.deleting)
                        tmp.k.k.type = KEY_TYPE_discard;
-
+#if 0
+               /* disabled due to lock recursion - mark_lock: */
                if (debug_check_bkeys(c))
                        bch2_bkey_debugcheck(c, iter->l[0].b,
                                             bkey_i_to_s_c(&tmp.k));
-
+#endif
                EBUG_ON(bkey_deleted(&tmp.k.k) || !tmp.k.k.size);
 
                extent_bset_insert(c, iter, &tmp.k);
index 72592df9afc0d699df807093b4e3e2d35814829f..b66217989b718a006aa5586d05c177271149e87f 100644 (file)
@@ -207,22 +207,29 @@ static bool __replicas_has_entry(struct bch_replicas_cpu *r,
        return __replicas_entry_idx(r, search) >= 0;
 }
 
-bool bch2_replicas_marked(struct bch_fs *c,
+static bool bch2_replicas_marked_locked(struct bch_fs *c,
                          struct bch_replicas_entry *search,
                          bool check_gc_replicas)
 {
-       bool marked;
-
        if (!search->nr_devs)
                return true;
 
        verify_replicas_entry_sorted(search);
 
-       percpu_down_read(&c->mark_lock);
-       marked = __replicas_has_entry(&c->replicas, search) &&
+       return __replicas_has_entry(&c->replicas, search) &&
                (!check_gc_replicas ||
                 likely((!c->replicas_gc.entries)) ||
                 __replicas_has_entry(&c->replicas_gc, search));
+}
+
+bool bch2_replicas_marked(struct bch_fs *c,
+                         struct bch_replicas_entry *search,
+                         bool check_gc_replicas)
+{
+       bool marked;
+
+       percpu_down_read(&c->mark_lock);
+       marked = bch2_replicas_marked_locked(c, search, check_gc_replicas);
        percpu_up_read(&c->mark_lock);
 
        return marked;
@@ -263,7 +270,7 @@ static int replicas_table_update(struct bch_fs *c,
                                 struct bch_replicas_cpu *new_r)
 {
        struct bch_fs_usage __percpu *new_usage[2] = { NULL, NULL };
-       struct bch_fs_usage __percpu *new_scratch = NULL;
+       struct bch_fs_usage *new_scratch = NULL;
        unsigned bytes = sizeof(struct bch_fs_usage) +
                sizeof(u64) * new_r->nr;
        int ret = -ENOMEM;
@@ -273,8 +280,7 @@ static int replicas_table_update(struct bch_fs *c,
            (c->usage[1] &&
             !(new_usage[1] = __alloc_percpu_gfp(bytes, sizeof(u64),
                                                 GFP_NOIO))) ||
-           !(new_scratch  = __alloc_percpu_gfp(bytes, sizeof(u64),
-                                               GFP_NOIO)))
+           !(new_scratch  = kmalloc(bytes, GFP_NOIO)))
                goto err;
 
        if (c->usage[0])
@@ -290,7 +296,7 @@ static int replicas_table_update(struct bch_fs *c,
        swap(c->replicas,       *new_r);
        ret = 0;
 err:
-       free_percpu(new_scratch);
+       kfree(new_scratch);
        free_percpu(new_usage[1]);
        free_percpu(new_usage[0]);
        return ret;
@@ -390,9 +396,9 @@ int bch2_mark_replicas(struct bch_fs *c,
                : bch2_mark_replicas_slowpath(c, r);
 }
 
-bool bch2_bkey_replicas_marked(struct bch_fs *c,
-                              struct bkey_s_c k,
-                              bool check_gc_replicas)
+bool bch2_bkey_replicas_marked_locked(struct bch_fs *c,
+                                     struct bkey_s_c k,
+                                     bool check_gc_replicas)
 {
        struct bch_replicas_padded search;
        struct bch_devs_list cached = bch2_bkey_cached_devs(k);
@@ -401,13 +407,27 @@ bool bch2_bkey_replicas_marked(struct bch_fs *c,
        for (i = 0; i < cached.nr; i++) {
                bch2_replicas_entry_cached(&search.e, cached.devs[i]);
 
-               if (!bch2_replicas_marked(c, &search.e, check_gc_replicas))
+               if (!bch2_replicas_marked_locked(c, &search.e,
+                                                check_gc_replicas))
                        return false;
        }
 
        bkey_to_replicas(&search.e, k);
 
-       return bch2_replicas_marked(c, &search.e, check_gc_replicas);
+       return bch2_replicas_marked_locked(c, &search.e, check_gc_replicas);
+}
+
+bool bch2_bkey_replicas_marked(struct bch_fs *c,
+                              struct bkey_s_c k,
+                              bool check_gc_replicas)
+{
+       bool marked;
+
+       percpu_down_read(&c->mark_lock);
+       marked = bch2_bkey_replicas_marked_locked(c, k, check_gc_replicas);
+       percpu_up_read(&c->mark_lock);
+
+       return marked;
 }
 
 int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
index d1457c786bb5894297d00a99d78771c5c4d0b40f..0777e7056d55dbc3f14e9eba003d940542274c5b 100644 (file)
@@ -26,6 +26,8 @@ bool bch2_replicas_marked(struct bch_fs *,
 int bch2_mark_replicas(struct bch_fs *,
                       struct bch_replicas_entry *);
 
+bool bch2_bkey_replicas_marked_locked(struct bch_fs *,
+                                     struct bkey_s_c, bool);
 bool bch2_bkey_replicas_marked(struct bch_fs *,
                               struct bkey_s_c, bool);
 int bch2_mark_bkey_replicas(struct bch_fs *, struct bkey_s_c);
index 4f627e91f04162fc3d4a9e3ddb2f7bb6a254cc72..b1eb70556f75f96ddaadad2cebfda3229767b3d7 100644 (file)
@@ -404,7 +404,7 @@ static void bch2_fs_free(struct bch_fs *c)
        bch2_io_clock_exit(&c->io_clock[READ]);
        bch2_fs_compress_exit(c);
        percpu_free_rwsem(&c->mark_lock);
-       free_percpu(c->usage_scratch);
+       kfree(c->usage_scratch);
        free_percpu(c->usage[0]);
        free_percpu(c->pcpu);
        mempool_exit(&c->btree_iters_pool);
@@ -572,6 +572,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
        mutex_init(&c->btree_reserve_cache_lock);
        mutex_init(&c->btree_interior_update_lock);
 
+       mutex_init(&c->usage_scratch_lock);
+
        mutex_init(&c->bio_bounce_pages_lock);
 
        bio_list_init(&c->btree_write_error_list);