bcachefs: New locking functions
authorKent Overstreet <kent.overstreet@gmail.com>
Sun, 21 Aug 2022 18:29:43 +0000 (14:29 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:40 +0000 (17:09 -0400)
In the future, with the new deadlock cycle detector, we won't be using
bare six_lock_* anymore: lock wait entries will all be embedded in
btree_trans, and we will need a btree_trans context whenever locking a
btree node.

This patch plumbs a btree_trans to the few places that need it, and adds
two new locking functions
 - btree_node_lock_nopath, which may fail returning a transaction
   restart, and
 - btree_node_lock_nopath_nofail, to be used in places where we know we
   cannot deadlock (i.e. because we're holding no other locks).

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
fs/bcachefs/btree_cache.c
fs/bcachefs/btree_cache.h
fs/bcachefs/btree_gc.c
fs/bcachefs/btree_io.c
fs/bcachefs/btree_key_cache.c
fs/bcachefs/btree_locking.c
fs/bcachefs/btree_locking.h
fs/bcachefs/btree_update_interior.c
fs/bcachefs/btree_update_interior.h
fs/bcachefs/btree_update_leaf.c

index e09fbf36ebc26587501cd43ebd27f59427982385..a0e9e14e3fa50bdb32b4db9948550afd7ea6dcdf 100644 (file)
@@ -959,12 +959,13 @@ lock_node:
        return b;
 }
 
-struct btree *bch2_btree_node_get_noiter(struct bch_fs *c,
+struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans,
                                         const struct bkey_i *k,
                                         enum btree_id btree_id,
                                         unsigned level,
                                         bool nofill)
 {
+       struct bch_fs *c = trans->c;
        struct btree_cache *bc = &c->btree_cache;
        struct btree *b;
        struct bset_tree *t;
@@ -998,9 +999,14 @@ retry:
                        goto out;
        } else {
 lock_node:
-               ret = six_lock_read(&b->c.lock, lock_node_check_fn, (void *) k);
-               if (ret)
-                       goto retry;
+               ret = btree_node_lock_nopath(trans, &b->c, SIX_LOCK_read);
+               if (unlikely(ret)) {
+                       if (bch2_err_matches(ret, BCH_ERR_lock_fail_node_reused))
+                               goto retry;
+                       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+                               return ERR_PTR(ret);
+                       BUG();
+               }
 
                if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
                             b->c.btree_id != btree_id ||
@@ -1062,8 +1068,9 @@ int bch2_btree_node_prefetch(struct bch_fs *c,
        return PTR_ERR_OR_ZERO(b);
 }
 
-void bch2_btree_node_evict(struct bch_fs *c, const struct bkey_i *k)
+void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
 {
+       struct bch_fs *c = trans->c;
        struct btree_cache *bc = &c->btree_cache;
        struct btree *b;
 
@@ -1079,8 +1086,8 @@ wait_on_io:
        __bch2_btree_node_wait_on_read(b);
        __bch2_btree_node_wait_on_write(b);
 
-       six_lock_intent(&b->c.lock, NULL, NULL);
-       six_lock_write(&b->c.lock, NULL, NULL);
+       btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
+       btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
 
        if (btree_node_dirty(b)) {
                __bch2_btree_node_write(c, b, 0);
index 83723805f12a507f355a6743e952fb5042d7cfe8..a4df3e866bb81a730690c206a344f0a217383cb9 100644 (file)
@@ -26,13 +26,13 @@ struct btree *bch2_btree_node_get(struct btree_trans *, struct btree_path *,
                                  const struct bkey_i *, unsigned,
                                  enum six_lock_type, unsigned long);
 
-struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *,
+struct btree *bch2_btree_node_get_noiter(struct btree_trans *, const struct bkey_i *,
                                         enum btree_id, unsigned, bool);
 
 int bch2_btree_node_prefetch(struct bch_fs *, struct btree_trans *, struct btree_path *,
                             const struct bkey_i *, enum btree_id, unsigned);
 
-void bch2_btree_node_evict(struct bch_fs *, const struct bkey_i *);
+void bch2_btree_node_evict(struct btree_trans *, const struct bkey_i *);
 
 void bch2_fs_btree_cache_exit(struct bch_fs *);
 int bch2_fs_btree_cache_init(struct bch_fs *);
index 239eda57bf02680ffb667c684480541b0339b7d8..77a1fe81ac3568300787a6a8f847b9163eb4411a 100644 (file)
@@ -165,10 +165,11 @@ static void btree_ptr_to_v2(struct btree *b, struct bkey_i_btree_ptr_v2 *dst)
        }
 }
 
-static void bch2_btree_node_update_key_early(struct bch_fs *c,
+static void bch2_btree_node_update_key_early(struct btree_trans *trans,
                                             enum btree_id btree, unsigned level,
                                             struct bkey_s_c old, struct bkey_i *new)
 {
+       struct bch_fs *c = trans->c;
        struct btree *b;
        struct bkey_buf tmp;
        int ret;
@@ -176,7 +177,7 @@ static void bch2_btree_node_update_key_early(struct bch_fs *c,
        bch2_bkey_buf_init(&tmp);
        bch2_bkey_buf_reassemble(&tmp, c, old);
 
-       b = bch2_btree_node_get_noiter(c, tmp.k, btree, level, true);
+       b = bch2_btree_node_get_noiter(trans, tmp.k, btree, level, true);
        if (!IS_ERR_OR_NULL(b)) {
                mutex_lock(&c->btree_cache.lock);
 
@@ -352,8 +353,9 @@ fsck_err:
        return ret;
 }
 
-static int bch2_btree_repair_topology_recurse(struct bch_fs *c, struct btree *b)
+static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct btree *b)
 {
+       struct bch_fs *c = trans->c;
        struct btree_and_journal_iter iter;
        struct bkey_s_c k;
        struct bkey_buf prev_k, cur_k;
@@ -378,7 +380,7 @@ again:
                bch2_btree_and_journal_iter_advance(&iter);
                bch2_bkey_buf_reassemble(&cur_k, c, k);
 
-               cur = bch2_btree_node_get_noiter(c, cur_k.k,
+               cur = bch2_btree_node_get_noiter(trans, cur_k.k,
                                        b->c.btree_id, b->c.level - 1,
                                        false);
                ret = PTR_ERR_OR_ZERO(cur);
@@ -392,7 +394,7 @@ again:
                                bch2_btree_ids[b->c.btree_id],
                                b->c.level - 1,
                                buf.buf)) {
-                       bch2_btree_node_evict(c, cur_k.k);
+                       bch2_btree_node_evict(trans, cur_k.k);
                        ret = bch2_journal_key_delete(c, b->c.btree_id,
                                                      b->c.level, cur_k.k->k.p);
                        cur = NULL;
@@ -411,7 +413,7 @@ again:
 
                if (ret == DROP_THIS_NODE) {
                        six_unlock_read(&cur->c.lock);
-                       bch2_btree_node_evict(c, cur_k.k);
+                       bch2_btree_node_evict(trans, cur_k.k);
                        ret = bch2_journal_key_delete(c, b->c.btree_id,
                                                      b->c.level, cur_k.k->k.p);
                        cur = NULL;
@@ -425,7 +427,7 @@ again:
                prev = NULL;
 
                if (ret == DROP_PREV_NODE) {
-                       bch2_btree_node_evict(c, prev_k.k);
+                       bch2_btree_node_evict(trans, prev_k.k);
                        ret = bch2_journal_key_delete(c, b->c.btree_id,
                                                      b->c.level, prev_k.k->k.p);
                        if (ret)
@@ -465,7 +467,7 @@ again:
                bch2_bkey_buf_reassemble(&cur_k, c, k);
                bch2_btree_and_journal_iter_advance(&iter);
 
-               cur = bch2_btree_node_get_noiter(c, cur_k.k,
+               cur = bch2_btree_node_get_noiter(trans, cur_k.k,
                                        b->c.btree_id, b->c.level - 1,
                                        false);
                ret = PTR_ERR_OR_ZERO(cur);
@@ -476,12 +478,12 @@ again:
                        goto err;
                }
 
-               ret = bch2_btree_repair_topology_recurse(c, cur);
+               ret = bch2_btree_repair_topology_recurse(trans, cur);
                six_unlock_read(&cur->c.lock);
                cur = NULL;
 
                if (ret == DROP_THIS_NODE) {
-                       bch2_btree_node_evict(c, cur_k.k);
+                       bch2_btree_node_evict(trans, cur_k.k);
                        ret = bch2_journal_key_delete(c, b->c.btree_id,
                                                      b->c.level, cur_k.k->k.p);
                        dropped_children = true;
@@ -522,17 +524,20 @@ fsck_err:
 
 static int bch2_repair_topology(struct bch_fs *c)
 {
+       struct btree_trans trans;
        struct btree *b;
        unsigned i;
        int ret = 0;
 
+       bch2_trans_init(&trans, c, 0, 0);
+
        for (i = 0; i < BTREE_ID_NR && !ret; i++) {
                b = c->btree_roots[i].b;
                if (btree_node_fake(b))
                        continue;
 
-               six_lock_read(&b->c.lock, NULL, NULL);
-               ret = bch2_btree_repair_topology_recurse(c, b);
+               btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
+               ret = bch2_btree_repair_topology_recurse(&trans, b);
                six_unlock_read(&b->c.lock);
 
                if (ret == DROP_THIS_NODE) {
@@ -541,13 +546,16 @@ static int bch2_repair_topology(struct bch_fs *c)
                }
        }
 
+       bch2_trans_exit(&trans);
+
        return ret;
 }
 
-static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
+static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id,
                               unsigned level, bool is_root,
                               struct bkey_s_c *k)
 {
+       struct bch_fs *c = trans->c;
        struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(*k);
        const union bch_extent_entry *entry;
        struct extent_ptr_decoded p = { 0 };
@@ -747,7 +755,7 @@ found:
                }
 
                if (level)
-                       bch2_btree_node_update_key_early(c, btree_id, level - 1, *k, new);
+                       bch2_btree_node_update_key_early(trans, btree_id, level - 1, *k, new);
 
                if (c->opts.verbose) {
                        printbuf_reset(&buf);
@@ -788,7 +796,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
                BUG_ON(bch2_journal_seq_verify &&
                       k->k->version.lo > atomic64_read(&c->journal.seq));
 
-               ret = bch2_check_fix_ptrs(c, btree_id, level, is_root, k);
+               ret = bch2_check_fix_ptrs(trans, btree_id, level, is_root, k);
                if (ret)
                        goto err;
 
@@ -941,7 +949,7 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
                        bch2_bkey_buf_reassemble(&cur, c, k);
                        bch2_btree_and_journal_iter_advance(&iter);
 
-                       child = bch2_btree_node_get_noiter(c, cur.k,
+                       child = bch2_btree_node_get_noiter(trans, cur.k,
                                                b->c.btree_id, b->c.level - 1,
                                                false);
                        ret = PTR_ERR_OR_ZERO(child);
index b3dc8b43298e7c7b01482508467bf92e5a0635c0..c63cb70836cc25db318e6e5bdfdb9e7ceee1c566 100644 (file)
@@ -1652,9 +1652,15 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
 
 static void btree_node_write_done(struct bch_fs *c, struct btree *b)
 {
-       six_lock_read(&b->c.lock, NULL, NULL);
+       struct btree_trans trans;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
        __btree_node_write_done(c, b);
        six_unlock_read(&b->c.lock);
+
+       bch2_trans_exit(&trans);
 }
 
 static void btree_node_write_work(struct work_struct *work)
index 7349c70f844519f3e2727ade756e788369a4856e..38a66302d6e9f84bcb7d3fe06baa9df0aafac97d 100644 (file)
@@ -95,25 +95,14 @@ static void bkey_cached_free(struct btree_key_cache *bc,
        six_unlock_intent(&ck->c.lock);
 }
 
-static void bkey_cached_free_fast(struct btree_key_cache *bc,
-                                 struct bkey_cached *ck)
+static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
+                                   struct bkey_cached *ck)
 {
-       struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
        struct btree_key_cache_freelist *f;
        bool freed = false;
 
        BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags));
 
-       ck->btree_trans_barrier_seq =
-               start_poll_synchronize_srcu(&c->btree_trans_barrier);
-
-       list_del_init(&ck->list);
-       atomic_long_inc(&bc->nr_freed);
-
-       kfree(ck->k);
-       ck->k           = NULL;
-       ck->u64s        = 0;
-
        preempt_disable();
        f = this_cpu_ptr(bc->pcpu_freed);
 
@@ -138,13 +127,32 @@ static void bkey_cached_free_fast(struct btree_key_cache *bc,
                list_move_tail(&ck->list, &bc->freed);
                mutex_unlock(&bc->lock);
        }
+}
+
+static void bkey_cached_free_fast(struct btree_key_cache *bc,
+                                 struct bkey_cached *ck)
+{
+       struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
+
+       ck->btree_trans_barrier_seq =
+               start_poll_synchronize_srcu(&c->btree_trans_barrier);
+
+       list_del_init(&ck->list);
+       atomic_long_inc(&bc->nr_freed);
+
+       kfree(ck->k);
+       ck->k           = NULL;
+       ck->u64s        = 0;
+
+       bkey_cached_move_to_freelist(bc, ck);
 
        six_unlock_write(&ck->c.lock);
        six_unlock_intent(&ck->c.lock);
 }
 
 static struct bkey_cached *
-bkey_cached_alloc(struct btree_key_cache *c)
+bkey_cached_alloc(struct btree_trans *trans,
+                 struct btree_key_cache *c)
 {
        struct bkey_cached *ck = NULL;
        struct btree_key_cache_freelist *f;
@@ -173,8 +181,21 @@ bkey_cached_alloc(struct btree_key_cache *c)
        }
 
        if (ck) {
-               six_lock_intent(&ck->c.lock, NULL, NULL);
-               six_lock_write(&ck->c.lock, NULL, NULL);
+               int ret;
+
+               ret = btree_node_lock_nopath(trans, &ck->c, SIX_LOCK_intent);
+               if (unlikely(ret)) {
+                       bkey_cached_move_to_freelist(c, ck);
+                       return ERR_PTR(ret);
+               }
+
+               ret = btree_node_lock_nopath(trans, &ck->c, SIX_LOCK_write);
+               if (unlikely(ret)) {
+                       six_unlock_intent(&ck->c.lock);
+                       bkey_cached_move_to_freelist(c, ck);
+                       return ERR_PTR(ret);
+               }
+
                return ck;
        }
 
@@ -216,15 +237,18 @@ bkey_cached_reuse(struct btree_key_cache *c)
 }
 
 static struct bkey_cached *
-btree_key_cache_create(struct bch_fs *c,
+btree_key_cache_create(struct btree_trans *trans,
                       enum btree_id btree_id,
                       struct bpos pos)
 {
+       struct bch_fs *c = trans->c;
        struct btree_key_cache *bc = &c->btree_key_cache;
        struct bkey_cached *ck;
        bool was_new = true;
 
-       ck = bkey_cached_alloc(bc);
+       ck = bkey_cached_alloc(trans, bc);
+       if (unlikely(IS_ERR(ck)))
+               return ck;
 
        if (unlikely(!ck)) {
                ck = bkey_cached_reuse(bc);
@@ -370,7 +394,7 @@ int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path
 retry:
        ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos);
        if (!ck) {
-               ck = btree_key_cache_create(c, path->btree_id, path->pos);
+               ck = btree_key_cache_create(trans, path->btree_id, path->pos);
                ret = PTR_ERR_OR_ZERO(ck);
                if (ret)
                        goto err;
@@ -519,10 +543,15 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
 evict:
                BUG_ON(!btree_node_intent_locked(c_iter.path, 0));
 
-               mark_btree_node_unlocked(c_iter.path, 0);
-               c_iter.path->l[0].b = NULL;
+               /*
+                * XXX: holding a lock that is not marked in btree_trans, not
+                * ideal:
+                */
+               six_lock_increment(&ck->c.lock, SIX_LOCK_intent);
+               bch2_trans_unlock(trans);
 
-               six_lock_write(&ck->c.lock, NULL, NULL);
+               /* Will not fail because we are holding no other locks: */
+               btree_node_lock_nopath_nofail(trans, &ck->c, SIX_LOCK_write);
 
                if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
                        clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
@@ -546,11 +575,13 @@ int bch2_btree_key_cache_journal_flush(struct journal *j,
        struct bkey_cached *ck =
                container_of(pin, struct bkey_cached, journal);
        struct bkey_cached_key key;
+       struct btree_trans trans;
+       int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
        int ret = 0;
 
-       int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
+       bch2_trans_init(&trans, c, 0, 0);
 
-       six_lock_read(&ck->c.lock, NULL, NULL);
+       btree_node_lock_nopath_nofail(&trans, &ck->c, SIX_LOCK_read);
        key = ck->key;
 
        if (ck->journal.seq != seq ||
@@ -567,12 +598,13 @@ int bch2_btree_key_cache_journal_flush(struct journal *j,
        }
        six_unlock_read(&ck->c.lock);
 
-       ret = bch2_trans_do(c, NULL, NULL, 0,
+       ret = commit_do(&trans, NULL, NULL, 0,
                btree_key_cache_flush_pos(&trans, key, seq,
                                BTREE_INSERT_JOURNAL_RECLAIM, false));
 unlock:
        srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
 
+       bch2_trans_exit(&trans);
        return ret;
 }
 
index 301311763d59142a85498e911172f9e4b55bc4b4..24d0ea903380249bec97193bb2a69573c226c811 100644 (file)
@@ -61,7 +61,7 @@ void __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree *b)
         * locked:
         */
        six_lock_readers_add(&b->c.lock, -readers);
-       six_lock_write(&b->c.lock, NULL, NULL);
+       btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
        six_lock_readers_add(&b->c.lock, readers);
 }
 
index ab3161c1b1f4743a119257f17b336db24177f9eb..32c28c1341e977af42bced5f76cda58c6b438459 100644 (file)
@@ -185,6 +185,24 @@ void bch2_btree_node_unlock_write(struct btree_trans *,
 
 /* lock: */
 
+static inline int __must_check
+btree_node_lock_nopath(struct btree_trans *trans,
+                      struct btree_bkey_cached_common *b,
+                      enum six_lock_type type)
+{
+       six_lock_type(&b->lock, type, NULL, NULL);
+       return 0;
+}
+
+static inline void btree_node_lock_nopath_nofail(struct btree_trans *trans,
+                                        struct btree_bkey_cached_common *b,
+                                        enum six_lock_type type)
+{
+       int ret = btree_node_lock_nopath(trans, b, type);
+
+       BUG_ON(ret);
+}
+
 static inline int btree_node_lock_type(struct btree_trans *trans,
                                       struct btree_path *path,
                                       struct btree_bkey_cached_common *b,
index 1f5b98a3d0a2db4da9502172d8c091db3efc968b..6fe49766c6c8424c0a41f9f68e2f984171ed1fc5 100644 (file)
@@ -169,7 +169,7 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans,
                BUG_ON(path->l[b->c.level].b == b &&
                       path->l[b->c.level].lock_seq == b->c.lock.state.seq);
 
-       six_lock_write(&b->c.lock, NULL, NULL);
+       btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
 
        bch2_btree_node_hash_remove(&c->btree_cache, b);
        __btree_node_free(c, b);
@@ -259,7 +259,9 @@ mem_alloc:
        return b;
 }
 
-static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned level)
+static struct btree *bch2_btree_node_alloc(struct btree_update *as,
+                                          struct btree_trans *trans,
+                                          unsigned level)
 {
        struct bch_fs *c = as->c;
        struct btree *b;
@@ -271,8 +273,8 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
 
        b = p->b[--p->nr];
 
-       six_lock_intent(&b->c.lock, NULL, NULL);
-       six_lock_write(&b->c.lock, NULL, NULL);
+       btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
+       btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
 
        set_btree_node_accessed(b);
        set_btree_node_dirty_acct(c, b);
@@ -323,12 +325,13 @@ static void btree_set_max(struct btree *b, struct bpos pos)
 }
 
 struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *as,
+                                                 struct btree_trans *trans,
                                                  struct btree *b,
                                                  struct bkey_format format)
 {
        struct btree *n;
 
-       n = bch2_btree_node_alloc(as, b->c.level);
+       n = bch2_btree_node_alloc(as, trans, b->c.level);
 
        SET_BTREE_NODE_SEQ(n->data, BTREE_NODE_SEQ(b->data) + 1);
 
@@ -347,6 +350,7 @@ struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *as,
 }
 
 static struct btree *bch2_btree_node_alloc_replacement(struct btree_update *as,
+                                                      struct btree_trans *trans,
                                                       struct btree *b)
 {
        struct bkey_format new_f = bch2_btree_calc_format(b);
@@ -358,12 +362,13 @@ static struct btree *bch2_btree_node_alloc_replacement(struct btree_update *as,
        if (!bch2_btree_node_format_fits(as->c, b, &new_f))
                new_f = b->format;
 
-       return __bch2_btree_node_alloc_replacement(as, b, new_f);
+       return __bch2_btree_node_alloc_replacement(as, trans, b, new_f);
 }
 
-static struct btree *__btree_root_alloc(struct btree_update *as, unsigned level)
+static struct btree *__btree_root_alloc(struct btree_update *as,
+                               struct btree_trans *trans, unsigned level)
 {
-       struct btree *b = bch2_btree_node_alloc(as, level);
+       struct btree *b = bch2_btree_node_alloc(as, trans, level);
 
        btree_set_min(b, POS_MIN);
        btree_set_max(b, SPOS_MAX);
@@ -378,7 +383,7 @@ static struct btree *__btree_root_alloc(struct btree_update *as, unsigned level)
        return b;
 }
 
-static void bch2_btree_reserve_put(struct btree_update *as)
+static void bch2_btree_reserve_put(struct btree_update *as, struct btree_trans *trans)
 {
        struct bch_fs *c = as->c;
        struct prealloc_nodes *p;
@@ -405,8 +410,8 @@ static void bch2_btree_reserve_put(struct btree_update *as)
 
                        mutex_unlock(&c->btree_reserve_cache_lock);
 
-                       six_lock_intent(&b->c.lock, NULL, NULL);
-                       six_lock_write(&b->c.lock, NULL, NULL);
+                       btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
+                       btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
                        __btree_node_free(c, b);
                        six_unlock_write(&b->c.lock);
                        six_unlock_intent(&b->c.lock);
@@ -460,7 +465,7 @@ err:
 
 /* Asynchronous interior node update machinery */
 
-static void bch2_btree_update_free(struct btree_update *as)
+static void bch2_btree_update_free(struct btree_update *as, struct btree_trans *trans)
 {
        struct bch_fs *c = as->c;
 
@@ -473,7 +478,7 @@ static void bch2_btree_update_free(struct btree_update *as)
        bch2_journal_pin_drop(&c->journal, &as->journal);
        bch2_journal_pin_flush(&c->journal, &as->journal);
        bch2_disk_reservation_put(c, &as->disk_res);
-       bch2_btree_reserve_put(as);
+       bch2_btree_reserve_put(as, trans);
 
        bch2_time_stats_update(&c->times[BCH_TIME_btree_interior_update_total],
                               as->start_time);
@@ -551,12 +556,13 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans,
 static void btree_update_nodes_written(struct btree_update *as)
 {
        struct bch_fs *c = as->c;
-       struct btree *b = as->b;
+       struct btree *b;
        struct btree_trans trans;
        u64 journal_seq = 0;
        unsigned i;
        int ret;
 
+       bch2_trans_init(&trans, c, 0, 512);
        /*
         * If we're already in an error state, it might be because a btree node
         * was never written, and we might be trying to free that same btree
@@ -573,15 +579,16 @@ static void btree_update_nodes_written(struct btree_update *as)
         * on disk:
         */
        for (i = 0; i < as->nr_old_nodes; i++) {
-               struct btree *old = as->old_nodes[i];
                __le64 seq;
 
-               six_lock_read(&old->c.lock, NULL, NULL);
-               seq = old->data ? old->data->keys.seq : 0;
-               six_unlock_read(&old->c.lock);
+               b = as->old_nodes[i];
+
+               btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
+               seq = b->data ? b->data->keys.seq : 0;
+               six_unlock_read(&b->c.lock);
 
                if (seq == as->old_nodes_seq[i])
-                       wait_on_bit_io(&old->flags, BTREE_NODE_write_in_flight_inner,
+                       wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight_inner,
                                       TASK_UNINTERRUPTIBLE);
        }
 
@@ -598,19 +605,19 @@ static void btree_update_nodes_written(struct btree_update *as)
         * journal reclaim does btree updates when flushing bkey_cached entries,
         * which may require allocations as well.
         */
-       bch2_trans_init(&trans, c, 0, 512);
        ret = commit_do(&trans, &as->disk_res, &journal_seq,
-                             BTREE_INSERT_NOFAIL|
-                             BTREE_INSERT_NOCHECK_RW|
-                             BTREE_INSERT_JOURNAL_RECLAIM|
-                             JOURNAL_WATERMARK_reserved,
-                             btree_update_nodes_written_trans(&trans, as));
-       bch2_trans_exit(&trans);
+                       BTREE_INSERT_NOFAIL|
+                       BTREE_INSERT_NOCHECK_RW|
+                       BTREE_INSERT_JOURNAL_RECLAIM|
+                       JOURNAL_WATERMARK_reserved,
+                       btree_update_nodes_written_trans(&trans, as));
+       bch2_trans_unlock(&trans);
 
        bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c,
                             "error %i in btree_update_nodes_written()", ret);
 err:
-       if (b) {
+       if (as->b) {
+               b = as->b;
                /*
                 * @b is the node we did the final insert into:
                 *
@@ -623,8 +630,8 @@ err:
                 * we're in journal error state:
                 */
 
-               six_lock_intent(&b->c.lock, NULL, NULL);
-               six_lock_write(&b->c.lock, NULL, NULL);
+               btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_intent);
+               btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_write);
                mutex_lock(&c->btree_interior_update_lock);
 
                list_del(&as->write_blocked_list);
@@ -681,7 +688,7 @@ err:
        for (i = 0; i < as->nr_new_nodes; i++) {
                b = as->new_nodes[i];
 
-               six_lock_read(&b->c.lock, NULL, NULL);
+               btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
                btree_node_write_if_need(c, b, SIX_LOCK_read);
                six_unlock_read(&b->c.lock);
        }
@@ -689,7 +696,8 @@ err:
        for (i = 0; i < as->nr_open_buckets; i++)
                bch2_open_bucket_put(c, c->open_buckets + as->open_buckets[i]);
 
-       bch2_btree_update_free(as);
+       bch2_btree_update_free(as, &trans);
+       bch2_trans_exit(&trans);
 }
 
 static void btree_interior_update_work(struct work_struct *work)
@@ -936,7 +944,7 @@ static void bch2_btree_interior_update_will_free_node(struct btree_update *as,
        as->nr_old_nodes++;
 }
 
-static void bch2_btree_update_done(struct btree_update *as)
+static void bch2_btree_update_done(struct btree_update *as, struct btree_trans *trans)
 {
        struct bch_fs *c = as->c;
        u64 start_time = as->start_time;
@@ -947,7 +955,7 @@ static void bch2_btree_update_done(struct btree_update *as)
                up_read(&as->c->gc_lock);
        as->took_gc_lock = false;
 
-       bch2_btree_reserve_put(as);
+       bch2_btree_reserve_put(as, trans);
 
        continue_at(&as->cl, btree_update_set_nodes_written,
                    as->c->btree_interior_update_worker);
@@ -1102,7 +1110,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
        bch2_trans_verify_not_restarted(trans, restart_count);
        return as;
 err:
-       bch2_btree_update_free(as);
+       bch2_btree_update_free(as, trans);
        return ERR_PTR(ret);
 }
 
@@ -1254,6 +1262,7 @@ __bch2_btree_insert_keys_interior(struct btree_update *as,
  * node)
  */
 static struct btree *__btree_split_node(struct btree_update *as,
+                                       struct btree_trans *trans,
                                        struct btree *n1)
 {
        struct bkey_format_state s;
@@ -1263,7 +1272,7 @@ static struct btree *__btree_split_node(struct btree_update *as,
        struct bkey_packed *k, *set2_start, *set2_end, *out, *prev = NULL;
        struct bpos n1_pos;
 
-       n2 = bch2_btree_node_alloc(as, n1->c.level);
+       n2 = bch2_btree_node_alloc(as, trans, n1->c.level);
 
        n2->data->max_key       = n1->data->max_key;
        n2->data->format        = n1->format;
@@ -1427,7 +1436,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
 
        bch2_btree_interior_update_will_free_node(as, b);
 
-       n1 = bch2_btree_node_alloc_replacement(as, b);
+       n1 = bch2_btree_node_alloc_replacement(as, trans, b);
 
        if (keys)
                btree_split_insert_keys(as, trans, path, n1, keys);
@@ -1435,7 +1444,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
        if (bset_u64s(&n1->set[0]) > BTREE_SPLIT_THRESHOLD(c)) {
                trace_and_count(c, btree_node_split, c, b);
 
-               n2 = __btree_split_node(as, n1);
+               n2 = __btree_split_node(as, trans, n1);
 
                bch2_btree_build_aux_trees(n2);
                bch2_btree_build_aux_trees(n1);
@@ -1457,7 +1466,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
 
                if (!parent) {
                        /* Depth increases, make a new root */
-                       n3 = __btree_root_alloc(as, b->c.level + 1);
+                       n3 = __btree_root_alloc(as, trans, b->c.level + 1);
 
                        n3->sib_u64s[0] = U16_MAX;
                        n3->sib_u64s[1] = U16_MAX;
@@ -1622,7 +1631,7 @@ int bch2_btree_split_leaf(struct btree_trans *trans,
                return PTR_ERR(as);
 
        btree_split(as, trans, path, b, NULL, flags);
-       bch2_btree_update_done(as);
+       bch2_btree_update_done(as, trans);
 
        for (l = path->level + 1; btree_path_node(path, l) && !ret; l++)
                ret = bch2_foreground_maybe_merge(trans, path, l, flags);
@@ -1741,7 +1750,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
        bch2_btree_interior_update_will_free_node(as, b);
        bch2_btree_interior_update_will_free_node(as, m);
 
-       n = bch2_btree_node_alloc(as, b->c.level);
+       n = bch2_btree_node_alloc(as, trans, b->c.level);
 
        SET_BTREE_NODE_SEQ(n->data,
                           max(BTREE_NODE_SEQ(b->data),
@@ -1788,7 +1797,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
 
        six_unlock_intent(&n->c.lock);
 
-       bch2_btree_update_done(as);
+       bch2_btree_update_done(as, trans);
 
        bch2_time_stats_update(&c->times[BCH_TIME_btree_node_merge], start_time);
 out:
@@ -1822,7 +1831,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
 
        bch2_btree_interior_update_will_free_node(as, b);
 
-       n = bch2_btree_node_alloc_replacement(as, b);
+       n = bch2_btree_node_alloc_replacement(as, trans, b);
        bch2_btree_update_add_new_node(as, n);
 
        bch2_btree_build_aux_trees(n);
@@ -1847,7 +1856,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
        bch2_btree_node_free_inmem(trans, b);
        six_unlock_intent(&n->c.lock);
 
-       bch2_btree_update_done(as);
+       bch2_btree_update_done(as, trans);
 out:
        bch2_btree_path_downgrade(trans, iter->path);
        return ret;
index adfc6c24a7a402f3eeb88db33394909213d08fc2..7af810df8348e0ae9107ec737ef55f15acea80f1 100644 (file)
@@ -117,6 +117,7 @@ struct btree_update {
 };
 
 struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *,
+                                                 struct btree_trans *,
                                                  struct btree *,
                                                  struct bkey_format);
 
index a8306b16956d697c412b89118725ebd8bb415035..d414cbefa3c950e115a043bf35c2478d67e937b3 100644 (file)
@@ -169,10 +169,13 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        struct btree_write *w = container_of(pin, struct btree_write, journal);
        struct btree *b = container_of(w, struct btree, writes[i]);
+       struct btree_trans trans;
        unsigned long old, new, v;
        unsigned idx = w - b->writes;
 
-       six_lock_read(&b->c.lock, NULL, NULL);
+       bch2_trans_init(&trans, c, 0, 0);
+
+       btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
        v = READ_ONCE(b->flags);
 
        do {
@@ -188,6 +191,8 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
 
        btree_node_write_if_need(c, b, SIX_LOCK_read);
        six_unlock_read(&b->c.lock);
+
+       bch2_trans_exit(&trans);
        return 0;
 }