bcachefs: btree_path
authorKent Overstreet <kent.overstreet@gmail.com>
Mon, 30 Aug 2021 19:18:31 +0000 (15:18 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:11 +0000 (17:09 -0400)
This splits btree_iter into two components: btree_iter is now the
externally visible componont, and it points to a btree_path which is now
reference counted.

This means we no longer have to clone iterators up front if they might
be mutated - btree_path can be shared by multiple iterators, and cloned
if an iterator would mutate a shared btree_path. This will help us use
iterators more efficiently, as well as slimming down the main long lived
state in btree_trans, and significantly cleans up the logic for iterator
lifetimes.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
44 files changed:
fs/bcachefs/acl.c
fs/bcachefs/alloc_background.c
fs/bcachefs/bcachefs.h
fs/bcachefs/bset.c
fs/bcachefs/btree_cache.c
fs/bcachefs/btree_cache.h
fs/bcachefs/btree_gc.c
fs/bcachefs/btree_iter.c
fs/bcachefs/btree_iter.h
fs/bcachefs/btree_key_cache.c
fs/bcachefs/btree_key_cache.h
fs/bcachefs/btree_locking.h
fs/bcachefs/btree_types.h
fs/bcachefs/btree_update.h
fs/bcachefs/btree_update_interior.c
fs/bcachefs/btree_update_interior.h
fs/bcachefs/btree_update_leaf.c
fs/bcachefs/buckets.c
fs/bcachefs/buckets.h
fs/bcachefs/debug.c
fs/bcachefs/dirent.c
fs/bcachefs/dirent.h
fs/bcachefs/ec.c
fs/bcachefs/extent_update.c
fs/bcachefs/extents.c
fs/bcachefs/fs-common.c
fs/bcachefs/fs-io.c
fs/bcachefs/fs.c
fs/bcachefs/fsck.c
fs/bcachefs/inode.c
fs/bcachefs/inode.h
fs/bcachefs/io.c
fs/bcachefs/journal_seq_blacklist.c
fs/bcachefs/migrate.c
fs/bcachefs/move.c
fs/bcachefs/quota.c
fs/bcachefs/recovery.c
fs/bcachefs/reflink.c
fs/bcachefs/str_hash.h
fs/bcachefs/super.c
fs/bcachefs/sysfs.c
fs/bcachefs/tests.c
fs/bcachefs/trace.h
fs/bcachefs/xattr.c

index 39ac6d2e178d86e5d091d7db3d3386c26f45f3e5..93b78e4e6e0dc12933019e31aba78f68b5971b46 100644 (file)
@@ -219,7 +219,7 @@ struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap,
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
        struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode);
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter = { NULL };
        struct bkey_s_c_xattr xattr;
        struct posix_acl *acl = NULL;
        struct bkey_s_c k;
@@ -229,20 +229,19 @@ struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap,
 retry:
        bch2_trans_begin(&trans);
 
-       iter = bch2_hash_lookup(&trans, bch2_xattr_hash_desc,
+       ret = bch2_hash_lookup(&trans, &iter, bch2_xattr_hash_desc,
                        &hash, inode->v.i_ino,
                        &X_SEARCH(acl_to_xattr_type(type), "", 0),
                        0);
-       if (IS_ERR(iter)) {
-               if (PTR_ERR(iter) == -EINTR)
+       if (ret) {
+               if (ret == -EINTR)
                        goto retry;
-
-               if (PTR_ERR(iter) != -ENOENT)
-                       acl = ERR_CAST(iter);
+               if (ret != -ENOENT)
+                       acl = ERR_PTR(ret);
                goto out;
        }
 
-       k = bch2_btree_iter_peek_slot(iter);
+       k = bch2_btree_iter_peek_slot(&iter);
        ret = bkey_err(k);
        if (ret) {
                acl = ERR_PTR(ret);
@@ -255,8 +254,8 @@ retry:
 
        if (!IS_ERR(acl))
                set_cached_acl(&inode->v, type, acl);
-       bch2_trans_iter_put(&trans, iter);
 out:
+       bch2_trans_iter_exit(&trans, &iter);
        bch2_trans_exit(&trans);
        return acl;
 }
@@ -298,7 +297,7 @@ int bch2_set_acl(struct mnt_idmap *idmap,
        struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
        struct btree_trans trans;
-       struct btree_iter *inode_iter;
+       struct btree_iter inode_iter = { NULL };
        struct bch_inode_unpacked inode_u;
        struct bch_hash_info hash_info;
        struct posix_acl *acl;
@@ -311,9 +310,8 @@ retry:
        bch2_trans_begin(&trans);
        acl = _acl;
 
-       inode_iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino,
-                                    BTREE_ITER_INTENT);
-       ret = PTR_ERR_OR_ZERO(inode_iter);
+       ret = bch2_inode_peek(&trans, &inode_iter, &inode_u, inode->v.i_ino,
+                             BTREE_ITER_INTENT);
        if (ret)
                goto btree_err;
 
@@ -334,11 +332,11 @@ retry:
        inode_u.bi_ctime        = bch2_current_time(c);
        inode_u.bi_mode         = mode;
 
-       ret =   bch2_inode_write(&trans, inode_iter, &inode_u) ?:
+       ret =   bch2_inode_write(&trans, &inode_iter, &inode_u) ?:
                bch2_trans_commit(&trans, NULL,
                                  &inode->ei_journal_seq, 0);
 btree_err:
-       bch2_trans_iter_put(&trans, inode_iter);
+       bch2_trans_iter_exit(&trans, &inode_iter);
 
        if (ret == -EINTR)
                goto retry;
@@ -362,22 +360,21 @@ int bch2_acl_chmod(struct btree_trans *trans,
                   struct posix_acl **new_acl)
 {
        struct bch_hash_info hash_info = bch2_hash_info_init(trans->c, inode);
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c_xattr xattr;
        struct bkey_i_xattr *new;
        struct posix_acl *acl;
        struct bkey_s_c k;
        int ret;
 
-       iter = bch2_hash_lookup(trans, bch2_xattr_hash_desc,
+       ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc,
                        &hash_info, inode->bi_inum,
                        &X_SEARCH(KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0),
                        BTREE_ITER_INTENT);
-       ret = PTR_ERR_OR_ZERO(iter);
        if (ret)
                return ret == -ENOENT ? 0 : ret;
 
-       k = bch2_btree_iter_peek_slot(iter);
+       k = bch2_btree_iter_peek_slot(&iter);
        xattr = bkey_s_c_to_xattr(k);
        if (ret)
                goto err;
@@ -398,12 +395,12 @@ int bch2_acl_chmod(struct btree_trans *trans,
                goto err;
        }
 
-       new->k.p = iter->pos;
-       ret = bch2_trans_update(trans, iter, &new->k_i, 0);
+       new->k.p = iter.pos;
+       ret = bch2_trans_update(trans, &iter, &new->k_i, 0);
        *new_acl = acl;
        acl = NULL;
 err:
-       bch2_trans_iter_put(trans, iter);
+       bch2_trans_iter_exit(trans, &iter);
        if (!IS_ERR_OR_NULL(acl))
                kfree(acl);
        return ret;
index 932a8176dff743fcfee9da89b28ca6a819548e1d..54fbfb22d671b9cb42e9588433665fc0448a6e54 100644 (file)
@@ -353,32 +353,32 @@ err:
 int bch2_alloc_write(struct bch_fs *c, unsigned flags)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bch_dev *ca;
        unsigned i;
        int ret = 0;
 
        bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_alloc, POS_MIN,
-                                  BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+       bch2_trans_iter_init(&trans, &iter, BTREE_ID_alloc, POS_MIN,
+                            BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
 
        for_each_member_device(ca, c, i) {
-               bch2_btree_iter_set_pos(iter,
+               bch2_btree_iter_set_pos(&iter,
                        POS(ca->dev_idx, ca->mi.first_bucket));
 
-               while (iter->pos.offset < ca->mi.nbuckets) {
+               while (iter.pos.offset < ca->mi.nbuckets) {
                        bch2_trans_cond_resched(&trans);
 
-                       ret = bch2_alloc_write_key(&trans, iter, flags);
+                       ret = bch2_alloc_write_key(&trans, &iter, flags);
                        if (ret) {
                                percpu_ref_put(&ca->ref);
                                goto err;
                        }
-                       bch2_btree_iter_advance(iter);
+                       bch2_btree_iter_advance(&iter);
                }
        }
 err:
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
        bch2_trans_exit(&trans);
        return ret;
 }
@@ -390,18 +390,18 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
 {
        struct bch_fs *c = trans->c;
        struct bch_dev *ca = bch_dev_bkey_exists(c, dev);
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bucket *g;
        struct bkey_alloc_buf *a;
        struct bkey_alloc_unpacked u;
        u64 *time, now;
        int ret = 0;
 
-       iter = bch2_trans_get_iter(trans, BTREE_ID_alloc, POS(dev, bucket_nr),
-                                  BTREE_ITER_CACHED|
-                                  BTREE_ITER_CACHED_NOFILL|
-                                  BTREE_ITER_INTENT);
-       ret = bch2_btree_iter_traverse(iter);
+       bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS(dev, bucket_nr),
+                            BTREE_ITER_CACHED|
+                            BTREE_ITER_CACHED_NOFILL|
+                            BTREE_ITER_INTENT);
+       ret = bch2_btree_iter_traverse(&iter);
        if (ret)
                goto out;
 
@@ -412,7 +412,7 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
 
        percpu_down_read(&c->mark_lock);
        g = bucket(ca, bucket_nr);
-       u = alloc_mem_to_key(iter, g, READ_ONCE(g->mark));
+       u = alloc_mem_to_key(&iter, g, READ_ONCE(g->mark));
        percpu_up_read(&c->mark_lock);
 
        time = rw == READ ? &u.read_time : &u.write_time;
@@ -423,10 +423,10 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
        *time = now;
 
        bch2_alloc_pack(c, a, u);
-       ret   = bch2_trans_update(trans, iter, &a->k, 0) ?:
+       ret   = bch2_trans_update(trans, &iter, &a->k, 0) ?:
                bch2_trans_commit(trans, NULL, NULL, 0);
 out:
-       bch2_trans_iter_put(trans, iter);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
@@ -695,27 +695,28 @@ static int bucket_invalidate_btree(struct btree_trans *trans,
        struct bkey_alloc_unpacked u;
        struct bucket *g;
        struct bucket_mark m;
-       struct btree_iter *iter =
-               bch2_trans_get_iter(trans, BTREE_ID_alloc,
-                                   POS(ca->dev_idx, b),
-                                   BTREE_ITER_CACHED|
-                                   BTREE_ITER_CACHED_NOFILL|
-                                   BTREE_ITER_INTENT);
+       struct btree_iter iter;
        int ret;
 
+       bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
+                            POS(ca->dev_idx, b),
+                            BTREE_ITER_CACHED|
+                            BTREE_ITER_CACHED_NOFILL|
+                            BTREE_ITER_INTENT);
+
        a = bch2_trans_kmalloc(trans, sizeof(*a));
        ret = PTR_ERR_OR_ZERO(a);
        if (ret)
                goto err;
 
-       ret = bch2_btree_iter_traverse(iter);
+       ret = bch2_btree_iter_traverse(&iter);
        if (ret)
                goto err;
 
        percpu_down_read(&c->mark_lock);
        g = bucket(ca, b);
        m = READ_ONCE(g->mark);
-       u = alloc_mem_to_key(iter, g, m);
+       u = alloc_mem_to_key(&iter, g, m);
        percpu_up_read(&c->mark_lock);
 
        u.gen++;
@@ -726,10 +727,10 @@ static int bucket_invalidate_btree(struct btree_trans *trans,
        u.write_time    = atomic64_read(&c->io_clock[WRITE].now);
 
        bch2_alloc_pack(c, a, u);
-       ret = bch2_trans_update(trans, iter, &a->k,
+       ret = bch2_trans_update(trans, &iter, &a->k,
                                BTREE_TRIGGER_BUCKET_INVALIDATE);
 err:
-       bch2_trans_iter_put(trans, iter);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
index e2aac1da18ae505340509684f2ae49317526ba53..114ae77a8a02f11c58d499f82915a7b3c7a4bdb2 100644 (file)
@@ -558,8 +558,8 @@ struct journal_keys {
        u64                     journal_seq_base;
 };
 
-struct btree_iter_buf {
-       struct btree_iter       *iter;
+struct btree_path_buf {
+       struct btree_path       *path;
 };
 
 #define REPLICAS_DELTA_LIST_MAX        (1U << 16)
@@ -667,9 +667,9 @@ struct bch_fs {
        /* btree_iter.c: */
        struct mutex            btree_trans_lock;
        struct list_head        btree_trans_list;
-       mempool_t               btree_iters_pool;
+       mempool_t               btree_paths_pool;
        mempool_t               btree_trans_mem_pool;
-       struct btree_iter_buf  __percpu *btree_iters_bufs;
+       struct btree_path_buf  __percpu *btree_paths_bufs;
 
        struct srcu_struct      btree_trans_barrier;
 
index 9484f28be6de6b8fc875c0ee49858554bf6d5b1d..2e0ad3a4fa67dd060b9890b6d48ff997ddb5e5da 100644 (file)
@@ -185,9 +185,11 @@ void bch2_btree_node_iter_verify(struct btree_node_iter *iter,
                return;
 
        /* Verify no duplicates: */
-       btree_node_iter_for_each(iter, set)
+       btree_node_iter_for_each(iter, set) {
+               BUG_ON(set->k > set->end);
                btree_node_iter_for_each(iter, s2)
                        BUG_ON(set != s2 && set->end == s2->end);
+       }
 
        /* Verify that set->end is correct: */
        btree_node_iter_for_each(iter, set) {
index c94ed4da1ca4fe895eac13fcafb8f9c9bc83ab3d..d45218d5fd35f2aa678eb9d323d8613357f682f6 100644 (file)
@@ -641,7 +641,7 @@ err:
 /* Slowpath, don't want it inlined into btree_iter_traverse() */
 static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
                                struct btree_trans *trans,
-                               struct btree_iter *iter,
+                               struct btree_path *path,
                                const struct bkey_i *k,
                                enum btree_id btree_id,
                                unsigned level,
@@ -657,7 +657,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
         * Parent node must be locked, else we could read in a btree node that's
         * been freed:
         */
-       if (trans && !bch2_btree_node_relock(trans, iter, level + 1)) {
+       if (trans && !bch2_btree_node_relock(trans, path, level + 1)) {
                btree_trans_restart(trans);
                return ERR_PTR(-EINTR);
        }
@@ -699,7 +699,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
 
        if (trans &&
            (!bch2_trans_relock(trans) ||
-            !bch2_btree_iter_relock_intent(trans, iter))) {
+            !bch2_btree_path_relock_intent(trans, path))) {
                BUG_ON(!trans->restarted);
                return ERR_PTR(-EINTR);
        }
@@ -763,7 +763,7 @@ static inline void btree_check_header(struct bch_fs *c, struct btree *b)
  * The btree node will have either a read or a write lock held, depending on
  * the @write parameter.
  */
-struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_iter *iter,
+struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *path,
                                  const struct bkey_i *k, unsigned level,
                                  enum six_lock_type lock_type,
                                  unsigned long trace_ip)
@@ -788,7 +788,7 @@ retry:
                 * else we could read in a btree node from disk that's been
                 * freed:
                 */
-               b = bch2_btree_node_fill(c, trans, iter, k, iter->btree_id,
+               b = bch2_btree_node_fill(c, trans, path, k, path->btree_id,
                                         level, lock_type, true);
 
                /* We raced and found the btree node in the cache */
@@ -827,10 +827,10 @@ lock_node:
                 * the parent was modified, when the pointer to the node we want
                 * was removed - and we'll bail out:
                 */
-               if (btree_node_read_locked(iter, level + 1))
-                       btree_node_unlock(iter, level + 1);
+               if (btree_node_read_locked(path, level + 1))
+                       btree_node_unlock(path, level + 1);
 
-               if (!btree_node_lock(trans, iter, b, k->k.p, level, lock_type,
+               if (!btree_node_lock(trans, path, b, k->k.p, level, lock_type,
                                     lock_node_check_fn, (void *) k, trace_ip)) {
                        if (!trans->restarted)
                                goto retry;
@@ -841,13 +841,13 @@ lock_node:
                             b->c.level != level ||
                             race_fault())) {
                        six_unlock_type(&b->c.lock, lock_type);
-                       if (bch2_btree_node_relock(trans, iter, level + 1))
+                       if (bch2_btree_node_relock(trans, path, level + 1))
                                goto retry;
 
                        trace_trans_restart_btree_node_reused(trans->ip,
                                                              trace_ip,
-                                                             iter->btree_id,
-                                                             &iter->real_pos);
+                                                             path->btree_id,
+                                                             &path->pos);
                        btree_trans_restart(trans);
                        return ERR_PTR(-EINTR);
                }
@@ -862,12 +862,12 @@ lock_node:
                bch2_btree_node_wait_on_read(b);
 
                /*
-                * should_be_locked is not set on this iterator yet, so we need
-                * to relock it specifically:
+                * should_be_locked is not set on this path yet, so we need to
+                * relock it specifically:
                 */
                if (trans &&
                    (!bch2_trans_relock(trans) ||
-                    !bch2_btree_iter_relock_intent(trans, iter))) {
+                    !bch2_btree_path_relock_intent(trans, path))) {
                        BUG_ON(!trans->restarted);
                        return ERR_PTR(-EINTR);
                }
@@ -895,7 +895,7 @@ lock_node:
                return ERR_PTR(-EIO);
        }
 
-       EBUG_ON(b->c.btree_id != iter->btree_id);
+       EBUG_ON(b->c.btree_id != path->btree_id);
        EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
        btree_check_header(c, b);
 
@@ -986,21 +986,21 @@ out:
 
 int bch2_btree_node_prefetch(struct bch_fs *c,
                             struct btree_trans *trans,
-                            struct btree_iter *iter,
+                            struct btree_path *path,
                             const struct bkey_i *k,
                             enum btree_id btree_id, unsigned level)
 {
        struct btree_cache *bc = &c->btree_cache;
        struct btree *b;
 
-       BUG_ON(trans && !btree_node_locked(iter, level + 1));
+       BUG_ON(trans && !btree_node_locked(path, level + 1));
        BUG_ON(level >= BTREE_MAX_DEPTH);
 
        b = btree_cache_find(bc, k);
        if (b)
                return 0;
 
-       b = bch2_btree_node_fill(c, trans, iter, k, btree_id,
+       b = bch2_btree_node_fill(c, trans, path, k, btree_id,
                                 level, SIX_LOCK_read, false);
        return PTR_ERR_OR_ZERO(b);
 }
index 3b671cf0056df700db05725e5314758653b8c169..2f6e0ea87616c0c057358b1a93398ee5ff0500df 100644 (file)
@@ -20,16 +20,15 @@ int bch2_btree_cache_cannibalize_lock(struct bch_fs *, struct closure *);
 struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *);
 struct btree *bch2_btree_node_mem_alloc(struct bch_fs *);
 
-struct btree *bch2_btree_node_get(struct btree_trans *, struct btree_iter *,
+struct btree *bch2_btree_node_get(struct btree_trans *, struct btree_path *,
                                  const struct bkey_i *, unsigned,
                                  enum six_lock_type, unsigned long);
 
 struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *,
                                         enum btree_id, unsigned, bool);
 
-int bch2_btree_node_prefetch(struct bch_fs *, struct btree_trans *,
-                            struct btree_iter *, const struct bkey_i *,
-                            enum btree_id, unsigned);
+int bch2_btree_node_prefetch(struct bch_fs *, struct btree_trans *, struct btree_path *,
+                            const struct bkey_i *, enum btree_id, unsigned);
 
 void bch2_btree_node_evict(struct bch_fs *, const struct bkey_i *);
 
index 4a3f39a619a1e3cd9ac0099e410e19ed70b0d06b..66367ab9f20aa58bf6caa24102f38f1b27552eee 100644 (file)
@@ -775,7 +775,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
                         bool initial, bool metadata_only)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct btree *b;
        unsigned depth = metadata_only                  ? 1
                : bch2_expensive_debug_checks           ? 0
@@ -800,13 +800,13 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
 
                if (!initial) {
                        if (max_stale > 64)
-                               bch2_btree_node_rewrite(&trans, iter,
+                               bch2_btree_node_rewrite(&trans, &iter,
                                                b->data->keys.seq,
                                                BTREE_INSERT_NOWAIT|
                                                BTREE_INSERT_GC_LOCK_HELD);
                        else if (!bch2_btree_gc_rewrite_disabled &&
                                 (bch2_btree_gc_always_rewrite || max_stale > 16))
-                               bch2_btree_node_rewrite(&trans, iter,
+                               bch2_btree_node_rewrite(&trans, &iter,
                                                b->data->keys.seq,
                                                BTREE_INSERT_NOWAIT|
                                                BTREE_INSERT_GC_LOCK_HELD);
@@ -814,7 +814,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
 
                bch2_trans_cond_resched(&trans);
        }
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
 
        ret = bch2_trans_exit(&trans) ?: ret;
        if (ret)
@@ -1414,7 +1414,7 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
                                bool metadata_only)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        struct reflink_gc *r;
        size_t idx = 0;
@@ -1480,7 +1480,7 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
                }
        }
 fsck_err:
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
        bch2_trans_exit(&trans);
 out:
        genradix_free(&c->reflink_gc_table);
@@ -1512,7 +1512,7 @@ static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
                                 bool metadata_only)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        struct reflink_gc *r;
        int ret;
@@ -1547,7 +1547,7 @@ static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
                r->size         = k.k->size;
                r->refcount     = 0;
        }
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
 
        bch2_trans_exit(&trans);
        return 0;
@@ -1722,7 +1722,7 @@ static bool gc_btree_gens_key(struct bch_fs *c, struct bkey_s_c k)
 static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        struct bkey_buf sk;
        int ret = 0, commit_err = 0;
@@ -1730,13 +1730,13 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id)
        bch2_bkey_buf_init(&sk);
        bch2_trans_init(&trans, c, 0, 0);
 
-       iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN,
-                                  BTREE_ITER_PREFETCH|
-                                  BTREE_ITER_NOT_EXTENTS|
-                                  BTREE_ITER_ALL_SNAPSHOTS);
+       bch2_trans_iter_init(&trans, &iter, btree_id, POS_MIN,
+                            BTREE_ITER_PREFETCH|
+                            BTREE_ITER_NOT_EXTENTS|
+                            BTREE_ITER_ALL_SNAPSHOTS);
 
        while ((bch2_trans_begin(&trans),
-               k = bch2_btree_iter_peek(iter)).k) {
+               k = bch2_btree_iter_peek(&iter)).k) {
                ret = bkey_err(k);
 
                if (ret == -EINTR)
@@ -1744,7 +1744,7 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id)
                if (ret)
                        break;
 
-               c->gc_gens_pos = iter->pos;
+               c->gc_gens_pos = iter.pos;
 
                if (gc_btree_gens_key(c, k) && !commit_err) {
                        bch2_bkey_buf_reassemble(&sk, c, k);
@@ -1752,7 +1752,7 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id)
 
 
                        commit_err =
-                               bch2_trans_update(&trans, iter, sk.k, 0) ?:
+                               bch2_trans_update(&trans, &iter, sk.k, 0) ?:
                                bch2_trans_commit(&trans, NULL, NULL,
                                                       BTREE_INSERT_NOWAIT|
                                                       BTREE_INSERT_NOFAIL);
@@ -1762,9 +1762,9 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id)
                        }
                }
 
-               bch2_btree_iter_advance(iter);
+               bch2_btree_iter_advance(&iter);
        }
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
 
        bch2_trans_exit(&trans);
        bch2_bkey_buf_exit(&sk, c);
index 7acec1e6db3dba1738e81a88d5964d88d6d6566e..06379f3e40a648a8d1bd0c308cadbc65a3fb324b 100644 (file)
 
 #include <linux/prefetch.h>
 
-static void btree_iter_set_search_pos(struct btree_iter *, struct bpos);
-static inline void btree_trans_sort_iters(struct btree_trans *);
-static struct btree_iter *btree_iter_child_alloc(struct btree_trans *,
-                                       struct btree_iter *, unsigned long);
-static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *,
-                                                struct btree_iter *);
-static void btree_iter_copy(struct btree_trans *, struct btree_iter *, struct btree_iter *);
+static inline void btree_trans_sort_paths(struct btree_trans *);
 
-static inline int btree_iter_cmp(const struct btree_iter *l,
-                                const struct btree_iter *r)
+static inline void btree_path_list_remove(struct btree_trans *, struct btree_path *);
+static inline void btree_path_list_add(struct btree_trans *, struct btree_path *,
+                                      struct btree_path *);
+
+static struct btree_path *btree_path_alloc(struct btree_trans *, struct btree_path *);
+
+static inline int __btree_path_cmp(const struct btree_path *l,
+                                  enum btree_id        r_btree_id,
+                                  bool                 r_cached,
+                                  struct bpos          r_pos,
+                                  unsigned             r_level)
 {
-       return   cmp_int(l->btree_id,   r->btree_id) ?:
-               -cmp_int(l->cached,     r->cached) ?:
-                bkey_cmp(l->real_pos,  r->real_pos) ?:
-               -cmp_int(l->level,      r->level);
+       return   cmp_int(l->btree_id,   r_btree_id) ?:
+                cmp_int(l->cached,     r_cached) ?:
+                bpos_cmp(l->pos,       r_pos) ?:
+               -cmp_int(l->level,      r_level);
+}
+
+static inline int btree_path_cmp(const struct btree_path *l,
+                                const struct btree_path *r)
+{
+       return __btree_path_cmp(l, r->btree_id, r->cached, r->pos, r->level);
 }
 
 static inline struct bpos bkey_successor(struct btree_iter *iter, struct bpos p)
@@ -60,10 +69,10 @@ static inline struct bpos bkey_predecessor(struct btree_iter *iter, struct bpos
        return p;
 }
 
-static inline bool is_btree_node(struct btree_iter *iter, unsigned l)
+static inline bool is_btree_node(struct btree_path *path, unsigned l)
 {
        return l < BTREE_MAX_DEPTH &&
-               (unsigned long) iter->l[l].b >= 128;
+               (unsigned long) path->l[l].b >= 128;
 }
 
 static inline struct bpos btree_iter_search_key(struct btree_iter *iter)
@@ -76,42 +85,42 @@ static inline struct bpos btree_iter_search_key(struct btree_iter *iter)
        return pos;
 }
 
-static inline bool btree_iter_pos_before_node(struct btree_iter *iter,
+static inline bool btree_path_pos_before_node(struct btree_path *path,
                                              struct btree *b)
 {
-       return bpos_cmp(iter->real_pos, b->data->min_key) < 0;
+       return bpos_cmp(path->pos, b->data->min_key) < 0;
 }
 
-static inline bool btree_iter_pos_after_node(struct btree_iter *iter,
+static inline bool btree_path_pos_after_node(struct btree_path *path,
                                             struct btree *b)
 {
-       return bpos_cmp(b->key.k.p, iter->real_pos) < 0;
+       return bpos_cmp(b->key.k.p, path->pos) < 0;
 }
 
-static inline bool btree_iter_pos_in_node(struct btree_iter *iter,
+static inline bool btree_path_pos_in_node(struct btree_path *path,
                                          struct btree *b)
 {
-       return iter->btree_id == b->c.btree_id &&
-               !btree_iter_pos_before_node(iter, b) &&
-               !btree_iter_pos_after_node(iter, b);
+       return path->btree_id == b->c.btree_id &&
+               !btree_path_pos_before_node(path, b) &&
+               !btree_path_pos_after_node(path, b);
 }
 
 /* Btree node locking: */
 
 void bch2_btree_node_unlock_write(struct btree_trans *trans,
-                       struct btree_iter *iter, struct btree *b)
+                       struct btree_path *path, struct btree *b)
 {
-       bch2_btree_node_unlock_write_inlined(trans, iter, b);
+       bch2_btree_node_unlock_write_inlined(trans, path, b);
 }
 
 void __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree *b)
 {
-       struct btree_iter *iter;
+       struct btree_path *linked;
        unsigned readers = 0;
 
-       trans_for_each_iter(trans, iter)
-               if (iter->l[b->c.level].b == b &&
-                   btree_node_read_locked(iter, b->c.level))
+       trans_for_each_path(trans, linked)
+               if (linked->l[b->c.level].b == b &&
+                   btree_node_read_locked(linked, b->c.level))
                        readers++;
 
        /*
@@ -136,21 +145,21 @@ void __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree *b)
 }
 
 bool __bch2_btree_node_relock(struct btree_trans *trans,
-                             struct btree_iter *iter, unsigned level)
+                             struct btree_path *path, unsigned level)
 {
-       struct btree *b = btree_iter_node(iter, level);
-       int want = __btree_lock_want(iter, level);
+       struct btree *b = btree_path_node(path, level);
+       int want = __btree_lock_want(path, level);
 
-       if (!is_btree_node(iter, level))
+       if (!is_btree_node(path, level))
                return false;
 
        if (race_fault())
                return false;
 
-       if (six_relock_type(&b->c.lock, want, iter->l[level].lock_seq) ||
-           (btree_node_lock_seq_matches(iter, b, level) &&
+       if (six_relock_type(&b->c.lock, want, path->l[level].lock_seq) ||
+           (btree_node_lock_seq_matches(path, b, level) &&
             btree_node_lock_increment(trans, b, level, want))) {
-               mark_btree_node_locked(iter, level, want);
+               mark_btree_node_locked(path, level, want);
                return true;
        } else {
                return false;
@@ -158,88 +167,88 @@ bool __bch2_btree_node_relock(struct btree_trans *trans,
 }
 
 static bool bch2_btree_node_upgrade(struct btree_trans *trans,
-                                   struct btree_iter *iter, unsigned level)
+                                   struct btree_path *path, unsigned level)
 {
-       struct btree *b = iter->l[level].b;
+       struct btree *b = path->l[level].b;
 
-       EBUG_ON(btree_lock_want(iter, level) != BTREE_NODE_INTENT_LOCKED);
+       EBUG_ON(btree_lock_want(path, level) != BTREE_NODE_INTENT_LOCKED);
 
-       if (!is_btree_node(iter, level))
+       if (!is_btree_node(path, level))
                return false;
 
-       if (btree_node_intent_locked(iter, level))
+       if (btree_node_intent_locked(path, level))
                return true;
 
        if (race_fault())
                return false;
 
-       if (btree_node_locked(iter, level)
+       if (btree_node_locked(path, level)
            ? six_lock_tryupgrade(&b->c.lock)
-           : six_relock_type(&b->c.lock, SIX_LOCK_intent, iter->l[level].lock_seq))
+           : six_relock_type(&b->c.lock, SIX_LOCK_intent, path->l[level].lock_seq))
                goto success;
 
-       if (btree_node_lock_seq_matches(iter, b, level) &&
+       if (btree_node_lock_seq_matches(path, b, level) &&
            btree_node_lock_increment(trans, b, level, BTREE_NODE_INTENT_LOCKED)) {
-               btree_node_unlock(iter, level);
+               btree_node_unlock(path, level);
                goto success;
        }
 
        return false;
 success:
-       mark_btree_node_intent_locked(iter, level);
+       mark_btree_node_intent_locked(path, level);
        return true;
 }
 
-static inline bool btree_iter_get_locks(struct btree_trans *trans,
-                                       struct btree_iter *iter,
+static inline bool btree_path_get_locks(struct btree_trans *trans,
+                                       struct btree_path *path,
                                        bool upgrade, unsigned long trace_ip)
 {
-       unsigned l = iter->level;
+       unsigned l = path->level;
        int fail_idx = -1;
 
        do {
-               if (!btree_iter_node(iter, l))
+               if (!btree_path_node(path, l))
                        break;
 
                if (!(upgrade
-                     ? bch2_btree_node_upgrade(trans, iter, l)
-                     : bch2_btree_node_relock(trans, iter, l))) {
+                     ? bch2_btree_node_upgrade(trans, path, l)
+                     : bch2_btree_node_relock(trans, path, l))) {
                        (upgrade
                         ? trace_node_upgrade_fail
                         : trace_node_relock_fail)(trans->ip, trace_ip,
-                                       iter->cached,
-                                       iter->btree_id, &iter->real_pos,
-                                       l, iter->l[l].lock_seq,
-                                       is_btree_node(iter, l)
+                                       path->cached,
+                                       path->btree_id, &path->pos,
+                                       l, path->l[l].lock_seq,
+                                       is_btree_node(path, l)
                                        ? 0
-                                       : (unsigned long) iter->l[l].b,
-                                       is_btree_node(iter, l)
-                                       ? iter->l[l].b->c.lock.state.seq
+                                       : (unsigned long) path->l[l].b,
+                                       is_btree_node(path, l)
+                                       ? path->l[l].b->c.lock.state.seq
                                        : 0);
                        fail_idx = l;
-                       btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
+                       btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
                }
 
                l++;
-       } while (l < iter->locks_want);
+       } while (l < path->locks_want);
 
        /*
         * When we fail to get a lock, we have to ensure that any child nodes
-        * can't be relocked so bch2_btree_iter_traverse has to walk back up to
+        * can't be relocked so bch2_btree_path_traverse has to walk back up to
         * the node that we failed to relock:
         */
        while (fail_idx >= 0) {
-               btree_node_unlock(iter, fail_idx);
-               iter->l[fail_idx].b = BTREE_ITER_NO_NODE_GET_LOCKS;
+               btree_node_unlock(path, fail_idx);
+               path->l[fail_idx].b = BTREE_ITER_NO_NODE_GET_LOCKS;
                --fail_idx;
        }
 
-       if (iter->uptodate == BTREE_ITER_NEED_RELOCK)
-               iter->uptodate = BTREE_ITER_UPTODATE;
+       if (path->uptodate == BTREE_ITER_NEED_RELOCK)
+               path->uptodate = BTREE_ITER_UPTODATE;
 
        bch2_trans_verify_locks(trans);
 
-       return iter->uptodate < BTREE_ITER_NEED_RELOCK;
+       return path->uptodate < BTREE_ITER_NEED_RELOCK;
 }
 
 static struct bpos btree_node_pos(struct btree_bkey_cached_common *_b,
@@ -252,19 +261,20 @@ static struct bpos btree_node_pos(struct btree_bkey_cached_common *_b,
 
 /* Slowpath: */
 bool __bch2_btree_node_lock(struct btree_trans *trans,
-                           struct btree_iter *iter,
-                           struct btree *b, struct bpos pos, unsigned level,
+                           struct btree_path *path,
+                           struct btree *b,
+                           struct bpos pos, unsigned level,
                            enum six_lock_type type,
                            six_lock_should_sleep_fn should_sleep_fn, void *p,
                            unsigned long ip)
 {
-       struct btree_iter *linked, *deadlock_iter = NULL;
+       struct btree_path *linked, *deadlock_path = NULL;
        u64 start_time = local_clock();
        unsigned reason = 9;
        bool ret;
 
        /* Check if it's safe to block: */
-       trans_for_each_iter(trans, linked) {
+       trans_for_each_path(trans, linked) {
                if (!linked->nodes_locked)
                        continue;
 
@@ -282,25 +292,25 @@ bool __bch2_btree_node_lock(struct btree_trans *trans,
                 */
                if (type == SIX_LOCK_intent &&
                    linked->nodes_locked != linked->nodes_intent_locked) {
-                       deadlock_iter = linked;
+                       deadlock_path = linked;
                        reason = 1;
                }
 
-               if (linked->btree_id != iter->btree_id) {
-                       if (linked->btree_id > iter->btree_id) {
-                               deadlock_iter = linked;
+               if (linked->btree_id != path->btree_id) {
+                       if (linked->btree_id > path->btree_id) {
+                               deadlock_path = linked;
                                reason = 3;
                        }
                        continue;
                }
 
                /*
-                * Within the same btree, cached iterators come before non
-                * cached iterators:
+                * Within the same btree, cached paths come before non
+                * cached paths:
                 */
-               if (linked->cached != iter->cached) {
-                       if (iter->cached) {
-                               deadlock_iter = linked;
+               if (linked->cached != path->cached) {
+                       if (path->cached) {
+                               deadlock_path = linked;
                                reason = 4;
                        }
                        continue;
@@ -308,11 +318,11 @@ bool __bch2_btree_node_lock(struct btree_trans *trans,
 
                /*
                 * Interior nodes must be locked before their descendants: if
-                * another iterator has possible descendants locked of the node
+                * another path has possible descendants locked of the node
                 * we're about to lock, it must have the ancestors locked too:
                 */
                if (level > __fls(linked->nodes_locked)) {
-                       deadlock_iter = linked;
+                       deadlock_path = linked;
                        reason = 5;
                }
 
@@ -320,19 +330,19 @@ bool __bch2_btree_node_lock(struct btree_trans *trans,
                if (btree_node_locked(linked, level) &&
                    bpos_cmp(pos, btree_node_pos((void *) linked->l[level].b,
                                                 linked->cached)) <= 0) {
-                       deadlock_iter = linked;
+                       deadlock_path = linked;
                        reason = 7;
                }
        }
 
-       if (unlikely(deadlock_iter)) {
+       if (unlikely(deadlock_path)) {
                trace_trans_restart_would_deadlock(trans->ip, ip,
                                trans->in_traverse_all, reason,
-                               deadlock_iter->btree_id,
-                               deadlock_iter->cached,
-                               &deadlock_iter->real_pos,
-                               iter->btree_id,
-                               iter->cached,
+                               deadlock_path->btree_id,
+                               deadlock_path->cached,
+                               &deadlock_path->pos,
+                               path->btree_id,
+                               path->cached,
                                &pos);
                btree_trans_restart(trans);
                return false;
@@ -342,9 +352,9 @@ bool __bch2_btree_node_lock(struct btree_trans *trans,
                return true;
 
 #ifdef CONFIG_BCACHEFS_DEBUG
-       trans->locking_iter_idx = iter->idx;
+       trans->locking_path_idx = path->idx;
        trans->locking_pos      = pos;
-       trans->locking_btree_id = iter->btree_id;
+       trans->locking_btree_id = path->btree_id;
        trans->locking_level    = level;
        trans->locking          = b;
 #endif
@@ -363,54 +373,57 @@ bool __bch2_btree_node_lock(struct btree_trans *trans,
 /* Btree iterator locking: */
 
 #ifdef CONFIG_BCACHEFS_DEBUG
-static void bch2_btree_iter_verify_locks(struct btree_iter *iter)
+
+static void bch2_btree_path_verify_locks(struct btree_path *path)
 {
        unsigned l;
 
-       for (l = 0; btree_iter_node(iter, l); l++) {
-               if (iter->uptodate >= BTREE_ITER_NEED_RELOCK &&
-                   !btree_node_locked(iter, l))
+       for (l = 0; btree_path_node(path, l); l++) {
+               if (path->uptodate >= BTREE_ITER_NEED_RELOCK &&
+                   !btree_node_locked(path, l))
                        continue;
 
-               BUG_ON(btree_lock_want(iter, l) !=
-                      btree_node_locked_type(iter, l));
+               BUG_ON(btree_lock_want(path, l) !=
+                      btree_node_locked_type(path, l));
        }
 }
 
 void bch2_trans_verify_locks(struct btree_trans *trans)
 {
-       struct btree_iter *iter;
+       struct btree_path *path;
 
-       trans_for_each_iter(trans, iter)
-               bch2_btree_iter_verify_locks(iter);
+       trans_for_each_path(trans, path)
+               bch2_btree_path_verify_locks(path);
 }
 #else
-static inline void bch2_btree_iter_verify_locks(struct btree_iter *iter) {}
+static inline void bch2_btree_path_verify_locks(struct btree_path *path) {}
 #endif
 
+/* Btree path locking: */
+
 /*
  * Only for btree_cache.c - only relocks intent locks
  */
-bool bch2_btree_iter_relock_intent(struct btree_trans *trans,
-                                  struct btree_iter *iter)
+bool bch2_btree_path_relock_intent(struct btree_trans *trans,
+                                  struct btree_path *path)
 {
        unsigned l;
 
-       for (l = iter->level;
-            l < iter->locks_want && btree_iter_node(iter, l);
+       for (l = path->level;
+            l < path->locks_want && btree_path_node(path, l);
             l++) {
-               if (!bch2_btree_node_relock(trans, iter, l)) {
+               if (!bch2_btree_node_relock(trans, path, l)) {
                        trace_node_relock_fail(trans->ip, _RET_IP_,
-                                       iter->cached,
-                                       iter->btree_id, &iter->real_pos,
-                                       l, iter->l[l].lock_seq,
-                                       is_btree_node(iter, l)
+                                       path->cached,
+                                       path->btree_id, &path->pos,
+                                       l, path->l[l].lock_seq,
+                                       is_btree_node(path, l)
                                        ? 0
-                                       : (unsigned long) iter->l[l].b,
-                                       is_btree_node(iter, l)
-                                       ? iter->l[l].b->c.lock.state.seq
+                                       : (unsigned long) path->l[l].b,
+                                       is_btree_node(path, l)
+                                       ? path->l[l].b->c.lock.state.seq
                                        : 0);
-                       btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
+                       btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
                        btree_trans_restart(trans);
                        return false;
                }
@@ -420,27 +433,27 @@ bool bch2_btree_iter_relock_intent(struct btree_trans *trans,
 }
 
 __flatten
-static bool bch2_btree_iter_relock(struct btree_trans *trans,
-                       struct btree_iter *iter, unsigned long trace_ip)
+static bool bch2_btree_path_relock(struct btree_trans *trans,
+                       struct btree_path *path, unsigned long trace_ip)
 {
-       bool ret = btree_iter_get_locks(trans, iter, false, trace_ip);
+       bool ret = btree_path_get_locks(trans, path, false, trace_ip);
 
        if (!ret)
                btree_trans_restart(trans);
        return ret;
 }
 
-bool __bch2_btree_iter_upgrade(struct btree_trans *trans,
-                              struct btree_iter *iter,
+bool __bch2_btree_path_upgrade(struct btree_trans *trans,
+                              struct btree_path *path,
                               unsigned new_locks_want)
 {
-       struct btree_iter *linked;
+       struct btree_path *linked;
 
-       EBUG_ON(iter->locks_want >= new_locks_want);
+       EBUG_ON(path->locks_want >= new_locks_want);
 
-       iter->locks_want = new_locks_want;
+       path->locks_want = new_locks_want;
 
-       if (btree_iter_get_locks(trans, iter, true, _THIS_IP_))
+       if (btree_path_get_locks(trans, path, true, _THIS_IP_))
                return true;
 
        /*
@@ -448,7 +461,7 @@ bool __bch2_btree_iter_upgrade(struct btree_trans *trans,
         * iterators in the btree_trans here.
         *
         * On failure to upgrade the iterator, setting iter->locks_want and
-        * calling get_locks() is sufficient to make bch2_btree_iter_traverse()
+        * calling get_locks() is sufficient to make bch2_btree_path_traverse()
         * get the locks we want on transaction restart.
         *
         * But if this iterator was a clone, on transaction restart what we did
@@ -460,75 +473,67 @@ bool __bch2_btree_iter_upgrade(struct btree_trans *trans,
         *
         * The code below used to be needed to ensure ancestor nodes get locked
         * before interior nodes - now that's handled by
-        * bch2_btree_iter_traverse_all().
+        * bch2_btree_path_traverse_all().
         */
-       trans_for_each_iter(trans, linked)
-               if (linked != iter &&
-                   linked->cached == iter->cached &&
-                   linked->btree_id == iter->btree_id &&
+       trans_for_each_path(trans, linked)
+               if (linked != path &&
+                   linked->cached == path->cached &&
+                   linked->btree_id == path->btree_id &&
                    linked->locks_want < new_locks_want) {
                        linked->locks_want = new_locks_want;
-                       btree_iter_get_locks(trans, linked, true, _THIS_IP_);
+                       btree_path_get_locks(trans, linked, true, _THIS_IP_);
                }
 
-       if (iter->should_be_locked)
-               btree_trans_restart(trans);
        return false;
 }
 
-void __bch2_btree_iter_downgrade(struct btree_iter *iter,
+void __bch2_btree_path_downgrade(struct btree_path *path,
                                 unsigned new_locks_want)
 {
        unsigned l;
 
-       EBUG_ON(iter->locks_want < new_locks_want);
+       EBUG_ON(path->locks_want < new_locks_want);
 
-       iter->locks_want = new_locks_want;
+       path->locks_want = new_locks_want;
 
-       while (iter->nodes_locked &&
-              (l = __fls(iter->nodes_locked)) >= iter->locks_want) {
-               if (l > iter->level) {
-                       btree_node_unlock(iter, l);
+       while (path->nodes_locked &&
+              (l = __fls(path->nodes_locked)) >= path->locks_want) {
+               if (l > path->level) {
+                       btree_node_unlock(path, l);
                } else {
-                       if (btree_node_intent_locked(iter, l)) {
-                               six_lock_downgrade(&iter->l[l].b->c.lock);
-                               iter->nodes_intent_locked ^= 1 << l;
+                       if (btree_node_intent_locked(path, l)) {
+                               six_lock_downgrade(&path->l[l].b->c.lock);
+                               path->nodes_intent_locked ^= 1 << l;
                        }
                        break;
                }
        }
 
-       bch2_btree_iter_verify_locks(iter);
+       bch2_btree_path_verify_locks(path);
 }
 
 void bch2_trans_downgrade(struct btree_trans *trans)
 {
-       struct btree_iter *iter;
+       struct btree_path *path;
 
-       trans_for_each_iter(trans, iter)
-               bch2_btree_iter_downgrade(iter);
+       trans_for_each_path(trans, path)
+               bch2_btree_path_downgrade(path);
 }
 
 /* Btree transaction locking: */
 
-static inline bool btree_iter_should_be_locked(struct btree_iter *iter)
-{
-       return (iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) ||
-               iter->should_be_locked;
-}
-
 bool bch2_trans_relock(struct btree_trans *trans)
 {
-       struct btree_iter *iter;
+       struct btree_path *path;
 
        if (unlikely(trans->restarted))
                return false;
 
-       trans_for_each_iter(trans, iter)
-               if (btree_iter_should_be_locked(iter) &&
-                   !bch2_btree_iter_relock(trans, iter, _RET_IP_)) {
+       trans_for_each_path(trans, path)
+               if (path->should_be_locked &&
+                   !bch2_btree_path_relock(trans, path, _RET_IP_)) {
                        trace_trans_restart_relock(trans->ip, _RET_IP_,
-                                       iter->btree_id, &iter->real_pos);
+                                       path->btree_id, &path->pos);
                        BUG_ON(!trans->restarted);
                        return false;
                }
@@ -537,37 +542,37 @@ bool bch2_trans_relock(struct btree_trans *trans)
 
 void bch2_trans_unlock(struct btree_trans *trans)
 {
-       struct btree_iter *iter;
+       struct btree_path *path;
 
-       trans_for_each_iter(trans, iter)
-               __bch2_btree_iter_unlock(iter);
+       trans_for_each_path(trans, path)
+               __bch2_btree_path_unlock(path);
 }
 
 /* Btree iterator: */
 
 #ifdef CONFIG_BCACHEFS_DEBUG
 
-static void bch2_btree_iter_verify_cached(struct btree_trans *trans,
-                                         struct btree_iter *iter)
+static void bch2_btree_path_verify_cached(struct btree_trans *trans,
+                                         struct btree_path *path)
 {
        struct bkey_cached *ck;
-       bool locked = btree_node_locked(iter, 0);
+       bool locked = btree_node_locked(path, 0);
 
-       if (!bch2_btree_node_relock(trans, iter, 0))
+       if (!bch2_btree_node_relock(trans, path, 0))
                return;
 
-       ck = (void *) iter->l[0].b;
-       BUG_ON(ck->key.btree_id != iter->btree_id ||
-              bkey_cmp(ck->key.pos, iter->pos));
+       ck = (void *) path->l[0].b;
+       BUG_ON(ck->key.btree_id != path->btree_id ||
+              bkey_cmp(ck->key.pos, path->pos));
 
        if (!locked)
-               btree_node_unlock(iter, 0);
+               btree_node_unlock(path, 0);
 }
 
-static void bch2_btree_iter_verify_level(struct btree_trans *trans,
-                               struct btree_iter *iter, unsigned level)
+static void bch2_btree_path_verify_level(struct btree_trans *trans,
+                               struct btree_path *path, unsigned level)
 {
-       struct btree_iter_level *l;
+       struct btree_path_level *l;
        struct btree_node_iter tmp;
        bool locked;
        struct bkey_packed *p, *k;
@@ -577,25 +582,23 @@ static void bch2_btree_iter_verify_level(struct btree_trans *trans,
        if (!bch2_debug_check_iterators)
                return;
 
-       l       = &iter->l[level];
+       l       = &path->l[level];
        tmp     = l->iter;
-       locked  = btree_node_locked(iter, level);
+       locked  = btree_node_locked(path, level);
 
-       if (iter->cached) {
+       if (path->cached) {
                if (!level)
-                       bch2_btree_iter_verify_cached(trans, iter);
+                       bch2_btree_path_verify_cached(trans, path);
                return;
        }
 
-       BUG_ON(iter->level < iter->min_depth);
-
-       if (!btree_iter_node(iter, level))
+       if (!btree_path_node(path, level))
                return;
 
-       if (!bch2_btree_node_relock(trans, iter, level))
+       if (!bch2_btree_node_relock(trans, path, level))
                return;
 
-       BUG_ON(!btree_iter_pos_in_node(iter, l->b));
+       BUG_ON(!btree_path_pos_in_node(path, l->b));
 
        bch2_btree_node_iter_verify(&l->iter, l->b);
 
@@ -606,29 +609,29 @@ static void bch2_btree_iter_verify_level(struct btree_trans *trans,
         * For extents, the iterator may have skipped past deleted keys (but not
         * whiteouts)
         */
-       p = level || btree_node_type_is_extents(iter->btree_id)
+       p = level || btree_node_type_is_extents(path->btree_id)
                ? bch2_btree_node_iter_prev(&tmp, l->b)
                : bch2_btree_node_iter_prev_all(&tmp, l->b);
        k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
 
-       if (p && bkey_iter_pos_cmp(l->b, p, &iter->real_pos) >= 0) {
+       if (p && bkey_iter_pos_cmp(l->b, p, &path->pos) >= 0) {
                msg = "before";
                goto err;
        }
 
-       if (k && bkey_iter_pos_cmp(l->b, k, &iter->real_pos) < 0) {
+       if (k && bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) {
                msg = "after";
                goto err;
        }
 
        if (!locked)
-               btree_node_unlock(iter, level);
+               btree_node_unlock(path, level);
        return;
 err:
        strcpy(buf2, "(none)");
        strcpy(buf3, "(none)");
 
-       bch2_bpos_to_text(&PBUF(buf1), iter->real_pos);
+       bch2_bpos_to_text(&PBUF(buf1), path->pos);
 
        if (p) {
                struct bkey uk = bkey_unpack_key(l->b, p);
@@ -640,20 +643,51 @@ err:
                bch2_bkey_to_text(&PBUF(buf3), &uk);
        }
 
-       panic("iterator should be %s key at level %u:\n"
-             "iter pos %s\n"
+       panic("path should be %s key at level %u:\n"
+             "path pos %s\n"
              "prev key %s\n"
              "cur  key %s\n",
              msg, level, buf1, buf2, buf3);
 }
 
-static void bch2_btree_iter_verify(struct btree_iter *iter)
+static void bch2_btree_path_verify(struct btree_trans *trans,
+                                  struct btree_path *path)
 {
-       struct btree_trans *trans = iter->trans;
        struct bch_fs *c = trans->c;
        unsigned i;
 
-       EBUG_ON(iter->btree_id >= BTREE_ID_NR);
+       EBUG_ON(path->btree_id >= BTREE_ID_NR);
+
+       for (i = 0; i < (!path->cached ? BTREE_MAX_DEPTH : 1); i++) {
+               if (!path->l[i].b) {
+                       BUG_ON(c->btree_roots[path->btree_id].b->c.level > i);
+                       break;
+               }
+
+               bch2_btree_path_verify_level(trans, path, i);
+       }
+
+       bch2_btree_path_verify_locks(path);
+}
+
+void bch2_trans_verify_paths(struct btree_trans *trans)
+{
+       struct btree_path *path;
+
+       if (!bch2_debug_check_iterators)
+               return;
+
+       trans_for_each_path(trans, path)
+               bch2_btree_path_verify(trans, path);
+}
+
+static void bch2_btree_iter_verify(struct btree_iter *iter)
+{
+       struct btree_trans *trans = iter->trans;
+
+       BUG_ON(iter->btree_id >= BTREE_ID_NR);
+
+       BUG_ON(!!(iter->flags & BTREE_ITER_CACHED) != iter->path->cached);
 
        BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
               iter->pos.snapshot != iter->snapshot);
@@ -665,16 +699,7 @@ static void bch2_btree_iter_verify(struct btree_iter *iter)
               (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
               !btree_type_has_snapshots(iter->btree_id));
 
-       for (i = 0; i < (!iter->cached ? BTREE_MAX_DEPTH : 1); i++) {
-               if (!iter->l[i].b) {
-                       BUG_ON(c->btree_roots[iter->btree_id].b->c.level > i);
-                       break;
-               }
-
-               bch2_btree_iter_verify_level(trans, iter, i);
-       }
-
-       bch2_btree_iter_verify_locks(iter);
+       bch2_btree_path_verify(trans, iter->path);
 }
 
 static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter)
@@ -686,26 +711,19 @@ static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter)
               bkey_cmp(iter->pos, iter->k.p) > 0);
 }
 
-void bch2_trans_verify_iters(struct btree_trans *trans, struct btree *b)
-{
-       struct btree_iter *iter;
-
-       if (!bch2_debug_check_iterators)
-               return;
-
-       trans_for_each_iter_with_node(trans, b, iter)
-               bch2_btree_iter_verify_level(trans, iter, b->c.level);
-}
-
 #else
 
-static inline void bch2_btree_iter_verify_level(struct btree_trans *trans,
-                                       struct btree_iter *iter, unsigned l) {}
+static inline void bch2_btree_path_verify_level(struct btree_trans *trans,
+                                               struct btree_path *path, unsigned l) {}
+static inline void bch2_btree_path_verify(struct btree_trans *trans,
+                                         struct btree_path *path) {}
 static inline void bch2_btree_iter_verify(struct btree_iter *iter) {}
 static inline void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) {}
 
 #endif
 
+/* Btree path: fixups after btree updates */
+
 static void btree_node_iter_set_set_pos(struct btree_node_iter *iter,
                                        struct btree *b,
                                        struct bset_tree *t,
@@ -723,38 +741,38 @@ static void btree_node_iter_set_set_pos(struct btree_node_iter *iter,
        bch2_btree_node_iter_push(iter, b, k, btree_bkey_last(b, t));
 }
 
-static void __bch2_btree_iter_fix_key_modified(struct btree_iter *iter,
+static void __bch2_btree_path_fix_key_modified(struct btree_path *path,
                                               struct btree *b,
                                               struct bkey_packed *where)
 {
-       struct btree_iter_level *l = &iter->l[b->c.level];
+       struct btree_path_level *l = &path->l[b->c.level];
 
        if (where != bch2_btree_node_iter_peek_all(&l->iter, l->b))
                return;
 
-       if (bkey_iter_pos_cmp(l->b, where, &iter->real_pos) < 0)
+       if (bkey_iter_pos_cmp(l->b, where, &path->pos) < 0)
                bch2_btree_node_iter_advance(&l->iter, l->b);
 }
 
-void bch2_btree_iter_fix_key_modified(struct btree_trans *trans,
+void bch2_btree_path_fix_key_modified(struct btree_trans *trans,
                                      struct btree *b,
                                      struct bkey_packed *where)
 {
-       struct btree_iter *iter;
+       struct btree_path *path;
 
-       trans_for_each_iter_with_node(trans, b, iter) {
-               __bch2_btree_iter_fix_key_modified(iter, b, where);
-               bch2_btree_iter_verify_level(trans, iter, b->c.level);
+       trans_for_each_path_with_node(trans, b, path) {
+               __bch2_btree_path_fix_key_modified(path, b, where);
+               bch2_btree_path_verify_level(trans, path, b->c.level);
        }
 }
 
-static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
-                                     struct btree *b,
-                                     struct btree_node_iter *node_iter,
-                                     struct bset_tree *t,
-                                     struct bkey_packed *where,
-                                     unsigned clobber_u64s,
-                                     unsigned new_u64s)
+static void __bch2_btree_node_iter_fix(struct btree_path *path,
+                                      struct btree *b,
+                                      struct btree_node_iter *node_iter,
+                                      struct bset_tree *t,
+                                      struct bkey_packed *where,
+                                      unsigned clobber_u64s,
+                                      unsigned new_u64s)
 {
        const struct bkey_packed *end = btree_bkey_last(b, t);
        struct btree_node_iter_set *set;
@@ -772,7 +790,7 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
 
        /* didn't find the bset in the iterator - might have to readd it: */
        if (new_u64s &&
-           bkey_iter_pos_cmp(b, where, &iter->real_pos) >= 0) {
+           bkey_iter_pos_cmp(b, where, &path->pos) >= 0) {
                bch2_btree_node_iter_push(node_iter, b, where, end);
                goto fixup_done;
        } else {
@@ -787,7 +805,7 @@ found:
                return;
 
        if (new_u64s &&
-           bkey_iter_pos_cmp(b, where, &iter->real_pos) >= 0) {
+           bkey_iter_pos_cmp(b, where, &path->pos) >= 0) {
                set->k = offset;
        } else if (set->k < offset + clobber_u64s) {
                set->k = offset + new_u64s;
@@ -814,7 +832,7 @@ fixup_done:
        if (!bch2_btree_node_iter_end(node_iter) &&
            iter_current_key_modified &&
            (b->c.level ||
-            btree_node_type_is_extents(iter->btree_id))) {
+            btree_node_type_is_extents(path->btree_id))) {
                struct bset_tree *t;
                struct bkey_packed *k, *k2, *p;
 
@@ -842,7 +860,7 @@ fixup_done:
 }
 
 void bch2_btree_node_iter_fix(struct btree_trans *trans,
-                             struct btree_iter *iter,
+                             struct btree_path *path,
                              struct btree *b,
                              struct btree_node_iter *node_iter,
                              struct bkey_packed *where,
@@ -850,26 +868,28 @@ void bch2_btree_node_iter_fix(struct btree_trans *trans,
                              unsigned new_u64s)
 {
        struct bset_tree *t = bch2_bkey_to_bset_inlined(b, where);
-       struct btree_iter *linked;
+       struct btree_path *linked;
 
-       if (node_iter != &iter->l[b->c.level].iter) {
-               __bch2_btree_node_iter_fix(iter, b, node_iter, t,
+       if (node_iter != &path->l[b->c.level].iter) {
+               __bch2_btree_node_iter_fix(path, b, node_iter, t,
                                           where, clobber_u64s, new_u64s);
 
                if (bch2_debug_check_iterators)
                        bch2_btree_node_iter_verify(node_iter, b);
        }
 
-       trans_for_each_iter_with_node(trans, b, linked) {
+       trans_for_each_path_with_node(trans, b, linked) {
                __bch2_btree_node_iter_fix(linked, b,
                                           &linked->l[b->c.level].iter, t,
                                           where, clobber_u64s, new_u64s);
-               bch2_btree_iter_verify_level(trans, linked, b->c.level);
+               bch2_btree_path_verify_level(trans, linked, b->c.level);
        }
 }
 
-static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter,
-                                                 struct btree_iter_level *l,
+/* Btree path level: pointer to a particular btree node and node iter */
+
+static inline struct bkey_s_c __btree_iter_unpack(struct bch_fs *c,
+                                                 struct btree_path_level *l,
                                                  struct bkey *u,
                                                  struct bkey_packed *k)
 {
@@ -894,50 +914,54 @@ static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter,
         * assertion here:
         */
        if (bch2_debug_check_bkeys && !bkey_deleted(ret.k))
-               bch2_bkey_debugcheck(iter->trans->c, l->b, ret);
+               bch2_bkey_debugcheck(c, l->b, ret);
 
        return ret;
 }
 
-/* peek_all() doesn't skip deleted keys */
-static inline struct bkey_s_c btree_iter_level_peek_all(struct btree_iter *iter,
-                                                       struct btree_iter_level *l)
+static inline struct bkey_s_c btree_path_level_peek_all(struct bch_fs *c,
+                                                       struct btree_path_level *l,
+                                                       struct bkey *u)
 {
-       return __btree_iter_unpack(iter, l, &iter->k,
+       return __btree_iter_unpack(c, l, u,
                        bch2_btree_node_iter_peek_all(&l->iter, l->b));
 }
 
-static inline struct bkey_s_c btree_iter_level_peek(struct btree_iter *iter,
-                                                   struct btree_iter_level *l)
+static inline struct bkey_s_c btree_path_level_peek(struct btree_trans *trans,
+                                                   struct btree_path *path,
+                                                   struct btree_path_level *l,
+                                                   struct bkey *u)
 {
-       struct bkey_s_c k = __btree_iter_unpack(iter, l, &iter->k,
+       struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u,
                        bch2_btree_node_iter_peek(&l->iter, l->b));
 
-       iter->real_pos = k.k ? k.k->p : l->b->key.k.p;
-       iter->trans->iters_sorted = false;
+       path->pos = k.k ? k.k->p : l->b->key.k.p;
+       trans->paths_sorted = false;
        return k;
 }
 
-static inline struct bkey_s_c btree_iter_level_prev(struct btree_iter *iter,
-                                                   struct btree_iter_level *l)
+static inline struct bkey_s_c btree_path_level_prev(struct btree_trans *trans,
+                                                   struct btree_path *path,
+                                                   struct btree_path_level *l,
+                                                   struct bkey *u)
 {
-       struct bkey_s_c k = __btree_iter_unpack(iter, l, &iter->k,
+       struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u,
                        bch2_btree_node_iter_prev(&l->iter, l->b));
 
-       iter->real_pos = k.k ? k.k->p : l->b->data->min_key;
-       iter->trans->iters_sorted = false;
+       path->pos = k.k ? k.k->p : l->b->data->min_key;
+       trans->paths_sorted = false;
        return k;
 }
 
-static inline bool btree_iter_advance_to_pos(struct btree_iter *iter,
-                                            struct btree_iter_level *l,
+static inline bool btree_path_advance_to_pos(struct btree_path *path,
+                                            struct btree_path_level *l,
                                             int max_advance)
 {
        struct bkey_packed *k;
        int nr_advanced = 0;
 
        while ((k = bch2_btree_node_iter_peek_all(&l->iter, l->b)) &&
-              bkey_iter_pos_cmp(l->b, k, &iter->real_pos) < 0) {
+              bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) {
                if (max_advance > 0 && nr_advanced >= max_advance)
                        return false;
 
@@ -951,10 +975,10 @@ static inline bool btree_iter_advance_to_pos(struct btree_iter *iter,
 /*
  * Verify that iterator for parent node points to child node:
  */
-static void btree_iter_verify_new_node(struct btree_trans *trans,
-                                      struct btree_iter *iter, struct btree *b)
+static void btree_path_verify_new_node(struct btree_trans *trans,
+                                      struct btree_path *path, struct btree *b)
 {
-       struct btree_iter_level *l;
+       struct btree_path_level *l;
        unsigned plevel;
        bool parent_locked;
        struct bkey_packed *k;
@@ -963,15 +987,15 @@ static void btree_iter_verify_new_node(struct btree_trans *trans,
                return;
 
        plevel = b->c.level + 1;
-       if (!btree_iter_node(iter, plevel))
+       if (!btree_path_node(path, plevel))
                return;
 
-       parent_locked = btree_node_locked(iter, plevel);
+       parent_locked = btree_node_locked(path, plevel);
 
-       if (!bch2_btree_node_relock(trans, iter, plevel))
+       if (!bch2_btree_node_relock(trans, path, plevel))
                return;
 
-       l = &iter->l[plevel];
+       l = &path->l[plevel];
        k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
        if (!k ||
            bkey_deleted(k) ||
@@ -983,7 +1007,7 @@ static void btree_iter_verify_new_node(struct btree_trans *trans,
                struct bkey uk = bkey_unpack_key(b, k);
 
                bch2_dump_btree_node(trans->c, l->b);
-               bch2_bpos_to_text(&PBUF(buf1), iter->real_pos);
+               bch2_bpos_to_text(&PBUF(buf1), path->pos);
                bch2_bkey_to_text(&PBUF(buf2), &uk);
                bch2_bpos_to_text(&PBUF(buf3), b->data->min_key);
                bch2_bpos_to_text(&PBUF(buf3), b->data->max_key);
@@ -991,20 +1015,20 @@ static void btree_iter_verify_new_node(struct btree_trans *trans,
                      "iter pos %s %s\n"
                      "iter key %s\n"
                      "new node %s-%s\n",
-                     bch2_btree_ids[iter->btree_id], buf1,
+                     bch2_btree_ids[path->btree_id], buf1,
                      buf2, buf3, buf4);
        }
 
        if (!parent_locked)
-               btree_node_unlock(iter, b->c.level + 1);
+               btree_node_unlock(path, b->c.level + 1);
 }
 
-static inline void __btree_iter_level_init(struct btree_iter *iter,
+static inline void __btree_path_level_init(struct btree_path *path,
                                           unsigned level)
 {
-       struct btree_iter_level *l = &iter->l[level];
+       struct btree_path_level *l = &path->l[level];
 
-       bch2_btree_node_iter_init(&l->iter, l->b, &iter->real_pos);
+       bch2_btree_node_iter_init(&l->iter, l->b, &path->pos);
 
        /*
         * Iterators to interior nodes should always be pointed at the first non
@@ -1014,22 +1038,24 @@ static inline void __btree_iter_level_init(struct btree_iter *iter,
                bch2_btree_node_iter_peek(&l->iter, l->b);
 }
 
-static inline void btree_iter_level_init(struct btree_trans *trans,
-                                        struct btree_iter *iter,
+static inline void btree_path_level_init(struct btree_trans *trans,
+                                        struct btree_path *path,
                                         struct btree *b)
 {
-       BUG_ON(iter->cached);
+       BUG_ON(path->cached);
 
-       btree_iter_verify_new_node(trans, iter, b);
+       btree_path_verify_new_node(trans, path, b);
 
-       EBUG_ON(!btree_iter_pos_in_node(iter, b));
+       EBUG_ON(!btree_path_pos_in_node(path, b));
        EBUG_ON(b->c.lock.state.seq & 1);
 
-       iter->l[b->c.level].lock_seq = b->c.lock.state.seq;
-       iter->l[b->c.level].b = b;
-       __btree_iter_level_init(iter, b->c.level);
+       path->l[b->c.level].lock_seq = b->c.lock.state.seq;
+       path->l[b->c.level].b = b;
+       __btree_path_level_init(path, b->c.level);
 }
 
+/* Btree path: fixups after btree node updates: */
+
 /*
  * A btree node is being replaced - update the iterator to point to the new
  * node:
@@ -1037,37 +1063,37 @@ static inline void btree_iter_level_init(struct btree_trans *trans,
 void bch2_trans_node_add(struct btree_trans *trans, struct btree *b)
 {
        enum btree_node_locked_type t;
-       struct btree_iter *iter;
+       struct btree_path *path;
 
-       trans_for_each_iter(trans, iter)
-               if (!iter->cached &&
-                   btree_iter_pos_in_node(iter, b)) {
+       trans_for_each_path(trans, path)
+               if (!path->cached &&
+                   btree_path_pos_in_node(path, b)) {
                        /*
-                        * bch2_trans_node_drop() has already been called -
+                        * bch2_btree_path_node_drop() has already been called -
                         * the old node we're replacing has already been
                         * unlocked and the pointer invalidated
                         */
-                       BUG_ON(btree_node_locked(iter, b->c.level));
+                       BUG_ON(btree_node_locked(path, b->c.level));
 
-                       t = btree_lock_want(iter, b->c.level);
+                       t = btree_lock_want(path, b->c.level);
                        if (t != BTREE_NODE_UNLOCKED) {
                                six_lock_increment(&b->c.lock, (enum six_lock_type) t);
-                               mark_btree_node_locked(iter, b->c.level, (enum six_lock_type) t);
+                               mark_btree_node_locked(path, b->c.level, (enum six_lock_type) t);
                        }
 
-                       btree_iter_level_init(trans, iter, b);
+                       btree_path_level_init(trans, path, b);
                }
 }
 
 void bch2_trans_node_drop(struct btree_trans *trans, struct btree *b)
 {
-       struct btree_iter *iter;
+       struct btree_path *path;
        unsigned level = b->c.level;
 
-       trans_for_each_iter(trans, iter)
-               if (iter->l[level].b == b) {
-                       btree_node_unlock(iter, level);
-                       iter->l[level].b = BTREE_ITER_NO_NODE_DROP;
+       trans_for_each_path(trans, path)
+               if (path->l[level].b == b) {
+                       btree_node_unlock(path, level);
+                       path->l[level].b = BTREE_ITER_NO_NODE_DROP;
                }
 }
 
@@ -1077,12 +1103,14 @@ void bch2_trans_node_drop(struct btree_trans *trans, struct btree *b)
  */
 void bch2_trans_node_reinit_iter(struct btree_trans *trans, struct btree *b)
 {
-       struct btree_iter *iter;
+       struct btree_path *path;
 
-       trans_for_each_iter_with_node(trans, b, iter)
-               __btree_iter_level_init(iter, b->c.level);
+       trans_for_each_path_with_node(trans, b, path)
+               __btree_path_level_init(path, b->c.level);
 }
 
+/* Btree path: traverse, set_pos: */
+
 static int lock_root_check_fn(struct six_lock *lock, void *p)
 {
        struct btree *b = container_of(lock, struct btree, c.lock);
@@ -1091,38 +1119,38 @@ static int lock_root_check_fn(struct six_lock *lock, void *p)
        return b == *rootp ? 0 : -1;
 }
 
-static inline int btree_iter_lock_root(struct btree_trans *trans,
-                                      struct btree_iter *iter,
+static inline int btree_path_lock_root(struct btree_trans *trans,
+                                      struct btree_path *path,
                                       unsigned depth_want,
                                       unsigned long trace_ip)
 {
        struct bch_fs *c = trans->c;
-       struct btree *b, **rootp = &c->btree_roots[iter->btree_id].b;
+       struct btree *b, **rootp = &c->btree_roots[path->btree_id].b;
        enum six_lock_type lock_type;
        unsigned i;
 
-       EBUG_ON(iter->nodes_locked);
+       EBUG_ON(path->nodes_locked);
 
        while (1) {
                b = READ_ONCE(*rootp);
-               iter->level = READ_ONCE(b->c.level);
+               path->level = READ_ONCE(b->c.level);
 
-               if (unlikely(iter->level < depth_want)) {
+               if (unlikely(path->level < depth_want)) {
                        /*
                         * the root is at a lower depth than the depth we want:
                         * got to the end of the btree, or we're walking nodes
                         * greater than some depth and there are no nodes >=
                         * that depth
                         */
-                       iter->level = depth_want;
-                       for (i = iter->level; i < BTREE_MAX_DEPTH; i++)
-                               iter->l[i].b = NULL;
+                       path->level = depth_want;
+                       for (i = path->level; i < BTREE_MAX_DEPTH; i++)
+                               path->l[i].b = NULL;
                        return 1;
                }
 
-               lock_type = __btree_lock_want(iter, iter->level);
-               if (unlikely(!btree_node_lock(trans, iter, b, SPOS_MAX,
-                                             iter->level, lock_type,
+               lock_type = __btree_lock_want(path, path->level);
+               if (unlikely(!btree_node_lock(trans, path, b, SPOS_MAX,
+                                             path->level, lock_type,
                                              lock_root_check_fn, rootp,
                                              trace_ip))) {
                        if (trans->restarted)
@@ -1131,16 +1159,16 @@ static inline int btree_iter_lock_root(struct btree_trans *trans,
                }
 
                if (likely(b == READ_ONCE(*rootp) &&
-                          b->c.level == iter->level &&
+                          b->c.level == path->level &&
                           !race_fault())) {
-                       for (i = 0; i < iter->level; i++)
-                               iter->l[i].b = BTREE_ITER_NO_NODE_LOCK_ROOT;
-                       iter->l[iter->level].b = b;
-                       for (i = iter->level + 1; i < BTREE_MAX_DEPTH; i++)
-                               iter->l[i].b = NULL;
-
-                       mark_btree_node_locked(iter, iter->level, lock_type);
-                       btree_iter_level_init(trans, iter, b);
+                       for (i = 0; i < path->level; i++)
+                               path->l[i].b = BTREE_ITER_NO_NODE_LOCK_ROOT;
+                       path->l[path->level].b = b;
+                       for (i = path->level + 1; i < BTREE_MAX_DEPTH; i++)
+                               path->l[i].b = NULL;
+
+                       mark_btree_node_locked(path, path->level, lock_type);
+                       btree_path_level_init(trans, path, b);
                        return 0;
                }
 
@@ -1149,23 +1177,23 @@ static inline int btree_iter_lock_root(struct btree_trans *trans,
 }
 
 noinline
-static int btree_iter_prefetch(struct btree_trans *trans, struct btree_iter *iter)
+static int btree_path_prefetch(struct btree_trans *trans, struct btree_path *path)
 {
        struct bch_fs *c = trans->c;
-       struct btree_iter_level *l = &iter->l[iter->level];
+       struct btree_path_level *l = path_l(path);
        struct btree_node_iter node_iter = l->iter;
        struct bkey_packed *k;
        struct bkey_buf tmp;
        unsigned nr = test_bit(BCH_FS_STARTED, &c->flags)
-               ? (iter->level > 1 ? 0 :  2)
-               : (iter->level > 1 ? 1 : 16);
-       bool was_locked = btree_node_locked(iter, iter->level);
+               ? (path->level > 1 ? 0 :  2)
+               : (path->level > 1 ? 1 : 16);
+       bool was_locked = btree_node_locked(path, path->level);
        int ret = 0;
 
        bch2_bkey_buf_init(&tmp);
 
        while (nr && !ret) {
-               if (!bch2_btree_node_relock(trans, iter, iter->level))
+               if (!bch2_btree_node_relock(trans, path, path->level))
                        break;
 
                bch2_btree_node_iter_advance(&node_iter, l->b);
@@ -1174,27 +1202,27 @@ static int btree_iter_prefetch(struct btree_trans *trans, struct btree_iter *ite
                        break;
 
                bch2_bkey_buf_unpack(&tmp, c, l->b, k);
-               ret = bch2_btree_node_prefetch(c, trans, iter, tmp.k,
-                                       iter->btree_id, iter->level - 1);
+               ret = bch2_btree_node_prefetch(c, trans, path, tmp.k, path->btree_id,
+                                              path->level - 1);
        }
 
        if (!was_locked)
-               btree_node_unlock(iter, iter->level);
+               btree_node_unlock(path, path->level);
 
        bch2_bkey_buf_exit(&tmp, c);
        return ret;
 }
 
 static noinline void btree_node_mem_ptr_set(struct btree_trans *trans,
-                                           struct btree_iter *iter,
+                                           struct btree_path *path,
                                            unsigned plevel, struct btree *b)
 {
-       struct btree_iter_level *l = &iter->l[plevel];
-       bool locked = btree_node_locked(iter, plevel);
+       struct btree_path_level *l = &path->l[plevel];
+       bool locked = btree_node_locked(path, plevel);
        struct bkey_packed *k;
        struct bch_btree_ptr_v2 *bp;
 
-       if (!bch2_btree_node_relock(trans, iter, plevel))
+       if (!bch2_btree_node_relock(trans, path, plevel))
                return;
 
        k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
@@ -1204,60 +1232,61 @@ static noinline void btree_node_mem_ptr_set(struct btree_trans *trans,
        bp->mem_ptr = (unsigned long)b;
 
        if (!locked)
-               btree_node_unlock(iter, plevel);
+               btree_node_unlock(path, plevel);
 }
 
-static __always_inline int btree_iter_down(struct btree_trans *trans,
-                                          struct btree_iter *iter,
+static __always_inline int btree_path_down(struct btree_trans *trans,
+                                          struct btree_path *path,
+                                          unsigned flags,
                                           unsigned long trace_ip)
 {
        struct bch_fs *c = trans->c;
-       struct btree_iter_level *l = &iter->l[iter->level];
+       struct btree_path_level *l = path_l(path);
        struct btree *b;
-       unsigned level = iter->level - 1;
-       enum six_lock_type lock_type = __btree_lock_want(iter, level);
+       unsigned level = path->level - 1;
+       enum six_lock_type lock_type = __btree_lock_want(path, level);
        struct bkey_buf tmp;
        int ret;
 
-       EBUG_ON(!btree_node_locked(iter, iter->level));
+       EBUG_ON(!btree_node_locked(path, path->level));
 
        bch2_bkey_buf_init(&tmp);
        bch2_bkey_buf_unpack(&tmp, c, l->b,
                         bch2_btree_node_iter_peek(&l->iter, l->b));
 
-       b = bch2_btree_node_get(trans, iter, tmp.k, level, lock_type, trace_ip);
+       b = bch2_btree_node_get(trans, path, tmp.k, level, lock_type, trace_ip);
        ret = PTR_ERR_OR_ZERO(b);
        if (unlikely(ret))
                goto err;
 
-       mark_btree_node_locked(iter, level, lock_type);
-       btree_iter_level_init(trans, iter, b);
+       mark_btree_node_locked(path, level, lock_type);
+       btree_path_level_init(trans, path, b);
 
        if (tmp.k->k.type == KEY_TYPE_btree_ptr_v2 &&
            unlikely(b != btree_node_mem_ptr(tmp.k)))
-               btree_node_mem_ptr_set(trans, iter, level + 1, b);
+               btree_node_mem_ptr_set(trans, path, level + 1, b);
 
-       if (iter->flags & BTREE_ITER_PREFETCH)
-               ret = btree_iter_prefetch(trans, iter);
+       if (flags & BTREE_ITER_PREFETCH)
+               ret = btree_path_prefetch(trans, path);
 
-       if (btree_node_read_locked(iter, level + 1))
-               btree_node_unlock(iter, level + 1);
-       iter->level = level;
+       if (btree_node_read_locked(path, level + 1))
+               btree_node_unlock(path, level + 1);
+       path->level = level;
 
-       bch2_btree_iter_verify_locks(iter);
+       bch2_btree_path_verify_locks(path);
 err:
        bch2_bkey_buf_exit(&tmp, c);
        return ret;
 }
 
-static int btree_iter_traverse_one(struct btree_trans *,
-                       struct btree_iter *, unsigned long);
+static int btree_path_traverse_one(struct btree_trans *, struct btree_path *,
+                                  unsigned, unsigned long);
 
-static int __btree_iter_traverse_all(struct btree_trans *trans, int ret,
+static int __btree_path_traverse_all(struct btree_trans *trans, int ret,
                                     unsigned long trace_ip)
 {
        struct bch_fs *c = trans->c;
-       struct btree_iter *iter, *prev = NULL;
+       struct btree_path *path, *prev = NULL;
        int i;
 
        if (trans->in_traverse_all)
@@ -1267,21 +1296,21 @@ static int __btree_iter_traverse_all(struct btree_trans *trans, int ret,
 retry_all:
        trans->restarted = false;
 
-       trans_for_each_iter(trans, iter)
-               iter->should_be_locked = false;
+       trans_for_each_path(trans, path)
+               path->should_be_locked = false;
 
-       btree_trans_sort_iters(trans);
+       btree_trans_sort_paths(trans);
 
-       trans_for_each_iter_inorder_reverse(trans, iter, i) {
+       trans_for_each_path_inorder_reverse(trans, path, i) {
                if (prev) {
-                       if (iter->btree_id == prev->btree_id &&
-                           iter->locks_want < prev->locks_want)
-                               __bch2_btree_iter_upgrade(trans, iter, prev->locks_want);
-                       else if (!iter->locks_want && prev->locks_want)
-                               __bch2_btree_iter_upgrade(trans, iter, 1);
+                       if (path->btree_id == prev->btree_id &&
+                           path->locks_want < prev->locks_want)
+                               __bch2_btree_path_upgrade(trans, path, prev->locks_want);
+                       else if (!path->locks_want && prev->locks_want)
+                               __bch2_btree_path_upgrade(trans, path, 1);
                }
 
-               prev = iter;
+               prev = path;
        }
 
        bch2_trans_unlock(trans);
@@ -1308,27 +1337,27 @@ retry_all:
        /* Now, redo traversals in correct order: */
        i = 0;
        while (i < trans->nr_sorted) {
-               iter = trans->iters + trans->sorted[i];
+               path = trans->paths + trans->sorted[i];
 
-               EBUG_ON(!(trans->iters_linked & (1ULL << iter->idx)));
+               EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx)));
 
-               ret = btree_iter_traverse_one(trans, iter, _THIS_IP_);
+               ret = btree_path_traverse_one(trans, path, 0, _THIS_IP_);
                if (ret)
                        goto retry_all;
 
-               EBUG_ON(!(trans->iters_linked & (1ULL << iter->idx)));
+               EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx)));
 
-               if (iter->nodes_locked)
+               if (path->nodes_locked)
                        i++;
        }
 
        /*
         * BTREE_ITER_NEED_RELOCK is ok here - if we called bch2_trans_unlock()
-        * and relock(), relock() won't relock since iter->should_be_locked
+        * and relock(), relock() won't relock since path->should_be_locked
         * isn't set yet, which is all fine
         */
-       trans_for_each_iter(trans, iter)
-               BUG_ON(iter->uptodate >= BTREE_ITER_NEED_TRAVERSE);
+       trans_for_each_path(trans, path)
+               BUG_ON(path->uptodate >= BTREE_ITER_NEED_TRAVERSE);
 out:
        bch2_btree_cache_cannibalize_unlock(c);
 
@@ -1338,36 +1367,36 @@ out:
        return ret;
 }
 
-static int bch2_btree_iter_traverse_all(struct btree_trans *trans)
+static int bch2_btree_path_traverse_all(struct btree_trans *trans)
 {
-       return __btree_iter_traverse_all(trans, 0, _RET_IP_);
+       return __btree_path_traverse_all(trans, 0, _RET_IP_);
 }
 
-static inline bool btree_iter_good_node(struct btree_trans *trans,
-                                       struct btree_iter *iter,
+static inline bool btree_path_good_node(struct btree_trans *trans,
+                                       struct btree_path *path,
                                        unsigned l, int check_pos)
 {
-       if (!is_btree_node(iter, l) ||
-           !bch2_btree_node_relock(trans, iter, l))
+       if (!is_btree_node(path, l) ||
+           !bch2_btree_node_relock(trans, path, l))
                return false;
 
-       if (check_pos < 0 && btree_iter_pos_before_node(iter, iter->l[l].b))
+       if (check_pos < 0 && btree_path_pos_before_node(path, path->l[l].b))
                return false;
-       if (check_pos > 0 && btree_iter_pos_after_node(iter, iter->l[l].b))
+       if (check_pos > 0 && btree_path_pos_after_node(path, path->l[l].b))
                return false;
        return true;
 }
 
-static inline unsigned btree_iter_up_until_good_node(struct btree_trans *trans,
-                                                    struct btree_iter *iter,
+static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans,
+                                                    struct btree_path *path,
                                                     int check_pos)
 {
-       unsigned l = iter->level;
+       unsigned l = path->level;
 
-       while (btree_iter_node(iter, l) &&
-              !btree_iter_good_node(trans, iter, l, check_pos)) {
-               btree_node_unlock(iter, l);
-               iter->l[l].b = BTREE_ITER_NO_NODE_UP;
+       while (btree_path_node(path, l) &&
+              !btree_path_good_node(trans, path, l, check_pos)) {
+               btree_node_unlock(path, l);
+               path->l[l].b = BTREE_ITER_NO_NODE_UP;
                l++;
        }
 
@@ -1383,53 +1412,54 @@ static inline unsigned btree_iter_up_until_good_node(struct btree_trans *trans,
  * On error, caller (peek_node()/peek_key()) must return NULL; the error is
  * stashed in the iterator and returned from bch2_trans_exit().
  */
-static int btree_iter_traverse_one(struct btree_trans *trans,
-                                  struct btree_iter *iter,
+static int btree_path_traverse_one(struct btree_trans *trans,
+                                  struct btree_path *path,
+                                  unsigned flags,
                                   unsigned long trace_ip)
 {
-       unsigned l, depth_want = iter->level;
+       unsigned l, depth_want = path->level;
        int ret = 0;
 
        /*
-        * Ensure we obey iter->should_be_locked: if it's set, we can't unlock
-        * and re-traverse the iterator without a transaction restart:
+        * Ensure we obey path->should_be_locked: if it's set, we can't unlock
+        * and re-traverse the path without a transaction restart:
         */
-       if (iter->should_be_locked) {
-               ret = bch2_btree_iter_relock(trans, iter, trace_ip) ? 0 : -EINTR;
+       if (path->should_be_locked) {
+               ret = bch2_btree_path_relock(trans, path, trace_ip) ? 0 : -EINTR;
                goto out;
        }
 
-       if (iter->cached) {
-               ret = bch2_btree_iter_traverse_cached(trans, iter);
+       if (path->cached) {
+               ret = bch2_btree_path_traverse_cached(trans, path, flags);
                goto out;
        }
 
-       if (unlikely(iter->level >= BTREE_MAX_DEPTH))
+       if (unlikely(path->level >= BTREE_MAX_DEPTH))
                goto out;
 
-       iter->level = btree_iter_up_until_good_node(trans, iter, 0);
+       path->level = btree_path_up_until_good_node(trans, path, 0);
 
        /* If we need intent locks, take them too: */
-       for (l = iter->level + 1;
-            l < iter->locks_want && btree_iter_node(iter, l);
+       for (l = path->level + 1;
+            l < path->locks_want && btree_path_node(path, l);
             l++)
-               if (!bch2_btree_node_relock(trans, iter, l))
-                       while (iter->level <= l) {
-                               btree_node_unlock(iter, iter->level);
-                               iter->l[iter->level].b = BTREE_ITER_NO_NODE_UP;
-                               iter->level++;
+               if (!bch2_btree_node_relock(trans, path, l))
+                       while (path->level <= l) {
+                               btree_node_unlock(path, path->level);
+                               path->l[path->level].b = BTREE_ITER_NO_NODE_UP;
+                               path->level++;
                        }
 
        /*
-        * Note: iter->nodes[iter->level] may be temporarily NULL here - that
+        * Note: path->nodes[path->level] may be temporarily NULL here - that
         * would indicate to other code that we got to the end of the btree,
         * here it indicates that relocking the root failed - it's critical that
-        * btree_iter_lock_root() comes next and that it can't fail
+        * btree_path_lock_root() comes next and that it can't fail
         */
-       while (iter->level > depth_want) {
-               ret = btree_iter_node(iter, iter->level)
-                       ? btree_iter_down(trans, iter, trace_ip)
-                       : btree_iter_lock_root(trans, iter, depth_want, trace_ip);
+       while (path->level > depth_want) {
+               ret = btree_path_node(path, path->level)
+                       ? btree_path_down(trans, path, flags, trace_ip)
+                       : btree_path_lock_root(trans, path, depth_want, trace_ip);
                if (unlikely(ret)) {
                        if (ret == 1) {
                                /*
@@ -1440,74 +1470,405 @@ static int btree_iter_traverse_one(struct btree_trans *trans,
                                goto out;
                        }
 
-                       __bch2_btree_iter_unlock(iter);
-                       iter->level = depth_want;
+                       __bch2_btree_path_unlock(path);
+                       path->level = depth_want;
 
-                       if (ret == -EIO) {
-                               iter->flags |= BTREE_ITER_ERROR;
-                               iter->l[iter->level].b =
+                       if (ret == -EIO)
+                               path->l[path->level].b =
                                        BTREE_ITER_NO_NODE_ERROR;
-                       } else {
-                               iter->l[iter->level].b =
+                       else
+                               path->l[path->level].b =
                                        BTREE_ITER_NO_NODE_DOWN;
-                       }
                        goto out;
                }
        }
 
-       iter->uptodate = BTREE_ITER_UPTODATE;
+       path->uptodate = BTREE_ITER_UPTODATE;
 out:
        BUG_ON((ret == -EINTR) != !!trans->restarted);
        trace_iter_traverse(trans->ip, trace_ip,
-                           iter->cached,
-                           iter->btree_id, &iter->real_pos, ret);
-       bch2_btree_iter_verify(iter);
+                           path->cached,
+                           path->btree_id, &path->pos, ret);
+       bch2_btree_path_verify(trans, path);
        return ret;
 }
 
-static int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
+static int __btree_path_traverse_all(struct btree_trans *, int, unsigned long);
+
+int __must_check bch2_btree_path_traverse(struct btree_trans *trans,
+                                         struct btree_path *path, unsigned flags)
 {
-       struct btree_trans *trans = iter->trans;
        int ret;
 
+       if (path->uptodate < BTREE_ITER_NEED_RELOCK)
+               return 0;
+
        ret =   bch2_trans_cond_resched(trans) ?:
-               btree_iter_traverse_one(trans, iter, _RET_IP_);
-       if (unlikely(ret) && hweight64(trans->iters_linked) == 1) {
-               ret = __btree_iter_traverse_all(trans, ret, _RET_IP_);
+               btree_path_traverse_one(trans, path, flags, _RET_IP_);
+       if (unlikely(ret) && hweight64(trans->paths_allocated) == 1) {
+               ret = __btree_path_traverse_all(trans, ret, _RET_IP_);
                BUG_ON(ret == -EINTR);
        }
 
        return ret;
 }
 
-/*
- * Note:
- * bch2_btree_iter_traverse() is for external users, btree_iter_traverse() is
- * for internal btree iterator users
- *
- * bch2_btree_iter_traverse sets iter->real_pos to iter->pos,
- * btree_iter_traverse() does not:
- */
-static inline int __must_check
-btree_iter_traverse(struct btree_iter *iter)
+static void btree_path_copy(struct btree_trans *trans, struct btree_path *dst,
+                           struct btree_path *src)
+{
+       unsigned i, offset = offsetof(struct btree_path, pos);
+
+       memcpy((void *) dst + offset,
+              (void *) src + offset,
+              sizeof(struct btree_path) - offset);
+
+       for (i = 0; i < BTREE_MAX_DEPTH; i++)
+               if (btree_node_locked(dst, i))
+                       six_lock_increment(&dst->l[i].b->c.lock,
+                                          __btree_lock_want(dst, i));
+
+       trans->paths_sorted = false;
+}
+
+struct btree_path * __must_check
+__bch2_btree_path_make_mut(struct btree_trans *trans,
+                        struct btree_path *path, bool intent)
+{
+       struct btree_path *new = btree_path_alloc(trans, path);
+
+       btree_path_copy(trans, new, path);
+       __btree_path_get(new, intent);
+       __btree_path_put(path, intent);
+       path = new;
+       path->preserve = false;
+#ifdef CONFIG_BCACHEFS_DEBUG
+       path->ip_allocated = _RET_IP_;
+#endif
+       return path;
+}
+
+static struct btree_path * __must_check
+__bch2_btree_path_set_pos(struct btree_trans *trans,
+                         struct btree_path *path, struct bpos new_pos,
+                         bool intent, int cmp)
+{
+#ifdef CONFIG_BCACHEFS_DEBUG
+       struct bpos old_pos = path->pos;
+#endif
+       unsigned l = path->level;
+
+       EBUG_ON(trans->restarted);
+       EBUG_ON(!path->ref);
+
+       path = bch2_btree_path_make_mut(trans, path, intent);
+
+       path->pos               = new_pos;
+       path->should_be_locked  = false;
+       trans->paths_sorted     = false;
+
+       if (unlikely(path->cached)) {
+               btree_node_unlock(path, 0);
+               path->l[0].b = BTREE_ITER_NO_NODE_CACHED;
+               btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
+               goto out;
+       }
+
+       l = btree_path_up_until_good_node(trans, path, cmp);
+
+       if (btree_path_node(path, l)) {
+               /*
+                * We might have to skip over many keys, or just a few: try
+                * advancing the node iterator, and if we have to skip over too
+                * many keys just reinit it (or if we're rewinding, since that
+                * is expensive).
+                */
+               if (cmp < 0 ||
+                   !btree_path_advance_to_pos(path, &path->l[l], 8))
+                       __btree_path_level_init(path, l);
+
+               /* Don't leave it locked if we're not supposed to: */
+               if (btree_lock_want(path, l) == BTREE_NODE_UNLOCKED)
+                       btree_node_unlock(path, l);
+       }
+
+       if (l != path->level)
+               btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
+out:
+       bch2_btree_path_verify(trans, path);
+#ifdef CONFIG_BCACHEFS_DEBUG
+       trace_path_set_pos(trans->ip, _RET_IP_, path->btree_id,
+                          &old_pos, &new_pos, l);
+#endif
+       return path;
+}
+
+static inline struct btree_path * __must_check
+btree_path_set_pos(struct btree_trans *trans,
+                  struct btree_path *path, struct bpos new_pos,
+                  bool intent)
+{
+       int cmp = bpos_cmp(new_pos, path->pos);
+
+       return cmp
+               ? __bch2_btree_path_set_pos(trans, path, new_pos, intent, cmp)
+               : path;
+}
+
+/* Btree path: main interface: */
+
+static struct btree_path *have_path_at_pos(struct btree_trans *trans, struct btree_path *path)
+{
+       struct btree_path *next;
+
+       next = prev_btree_path(trans, path);
+       if (next && !btree_path_cmp(next, path))
+               return next;
+
+       next = next_btree_path(trans, path);
+       if (next && !btree_path_cmp(next, path))
+               return next;
+
+       return NULL;
+}
+
+static bool have_node_at_pos(struct btree_trans *trans, struct btree_path *path)
+{
+       struct btree_path *next;
+
+       next = prev_btree_path(trans, path);
+       if (next && path_l(next)->b == path_l(path)->b)
+               return true;
+
+       next = next_btree_path(trans, path);
+       if (next && path_l(next)->b == path_l(path)->b)
+               return true;
+
+       return false;
+}
+
+static inline void __bch2_path_free(struct btree_trans *trans, struct btree_path *path)
 {
-       return iter->uptodate >= BTREE_ITER_NEED_RELOCK
-               ? __bch2_btree_iter_traverse(iter)
-               : 0;
+       __bch2_btree_path_unlock(path);
+       btree_path_list_remove(trans, path);
+       trans->paths_allocated &= ~(1ULL << path->idx);
 }
 
+void bch2_path_put(struct btree_trans *trans, struct btree_path *path, bool intent)
+{
+       struct btree_path *dup;
+
+       EBUG_ON(trans->paths + path->idx != path);
+       EBUG_ON(!path->ref);
+
+       if (!__btree_path_put(path, intent))
+               return;
+
+       /*
+        * Perhaps instead we should check for duplicate paths in traverse_all:
+        */
+       if (path->preserve &&
+           (dup = have_path_at_pos(trans, path))) {
+               dup->preserve = true;
+               path->preserve = false;
+       }
+
+       if (!path->preserve &&
+           have_node_at_pos(trans, path))
+               __bch2_path_free(trans, path);
+}
+
+noinline __cold
+void bch2_dump_trans_paths_updates(struct btree_trans *trans)
+{
+       struct btree_path *path;
+       struct btree_insert_entry *i;
+       unsigned idx;
+       char buf[300];
+
+       btree_trans_sort_paths(trans);
+
+       trans_for_each_path_inorder(trans, path, idx)
+               printk(KERN_ERR "path: idx %u ref %u:%u%s btree %s pos %s %pS\n",
+                      path->idx, path->ref, path->intent_ref,
+                      path->preserve ? " preserve" : "",
+                      bch2_btree_ids[path->btree_id],
+                      (bch2_bpos_to_text(&PBUF(buf), path->pos), buf),
+#ifdef CONFIG_BCACHEFS_DEBUG
+                      (void *) path->ip_allocated
+#else
+                      NULL
+#endif
+                      );
+
+       trans_for_each_update(trans, i)
+               printk(KERN_ERR "update: btree %s %s %pS\n",
+                      bch2_btree_ids[i->btree_id],
+                      (bch2_bkey_val_to_text(&PBUF(buf), trans->c, bkey_i_to_s_c(i->k)), buf),
+                      (void *) i->ip_allocated);
+}
+
+static struct btree_path *btree_path_alloc(struct btree_trans *trans,
+                                          struct btree_path *pos)
+{
+       struct btree_path *path;
+       unsigned idx;
+
+       if (unlikely(trans->paths_allocated ==
+                    ~((~0ULL << 1) << (BTREE_ITER_MAX - 1)))) {
+               bch2_dump_trans_paths_updates(trans);
+               panic("trans path oveflow\n");
+       }
+
+       idx = __ffs64(~trans->paths_allocated);
+       trans->paths_allocated |= 1ULL << idx;
+
+       path = &trans->paths[idx];
+
+       path->idx               = idx;
+       path->ref               = 0;
+       path->intent_ref        = 0;
+       path->nodes_locked      = 0;
+       path->nodes_intent_locked = 0;
+
+       btree_path_list_add(trans, pos, path);
+       return path;
+}
+
+struct btree_path *bch2_path_get(struct btree_trans *trans, bool cached,
+                                enum btree_id btree_id, struct bpos pos,
+                                unsigned locks_want, unsigned level,
+                                bool intent)
+{
+       struct btree_path *path, *best = NULL;
+       struct bpos pos_min = POS_MIN;
+       int i;
+
+       BUG_ON(trans->restarted);
+
+       trans_for_each_path(trans, path) {
+               if (path->cached != cached ||
+                   path->btree_id != btree_id ||
+                   path->level != level)
+                       continue;
+
+               if (best) {
+                       int cmp = bkey_cmp(bpos_diff(best->pos, pos),
+                                          bpos_diff(path->pos, pos));
+
+                       if (cmp < 0 ||
+                           ((cmp == 0 && (path->ref || path->preserve))))
+                               continue;
+               }
+
+               best = path;
+       }
+
+       if (best) {
+               __btree_path_get(best, intent);
+               path = btree_path_set_pos(trans, best, pos, intent);
+               path->preserve = true;
+       } else {
+               path = btree_path_alloc(trans, NULL);
+
+               __btree_path_get(path, intent);
+               path->pos                       = pos;
+               path->btree_id                  = btree_id;
+               path->cached                    = cached;
+               path->preserve                  = true;
+               path->uptodate                  = BTREE_ITER_NEED_TRAVERSE;
+               path->should_be_locked          = false;
+               path->level                     = level;
+               path->locks_want                = locks_want;
+               path->nodes_locked              = 0;
+               path->nodes_intent_locked       = 0;
+               for (i = 0; i < ARRAY_SIZE(path->l); i++)
+                       path->l[i].b            = BTREE_ITER_NO_NODE_INIT;
+#ifdef CONFIG_BCACHEFS_DEBUG
+               path->ip_allocated              = _RET_IP_;
+#endif
+               trans->paths_sorted             = false;
+       }
+
+       if (path->intent_ref)
+               locks_want = max(locks_want, level + 1);
+
+       /*
+        * If the path has locks_want greater than requested, we don't downgrade
+        * it here - on transaction restart because btree node split needs to
+        * upgrade locks, we might be putting/getting the iterator again.
+        * Downgrading iterators only happens via bch2_trans_downgrade(), after
+        * a successful transaction commit.
+        */
+
+       locks_want = min(locks_want, BTREE_MAX_DEPTH);
+       if (locks_want > path->locks_want) {
+               path->locks_want = locks_want;
+               btree_path_get_locks(trans, path, true, _THIS_IP_);
+       }
+
+       trace_trans_get_path(_RET_IP_, trans->ip, btree_id,
+                            &pos, locks_want, path->uptodate,
+                            best ? &best->pos          : &pos_min,
+                            best ? best->locks_want    : U8_MAX,
+                            best ? best->uptodate      : U8_MAX);
+
+       return path;
+}
+
+inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey *u)
+{
+
+       struct bkey_s_c k;
+
+       BUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
+
+       if (!path->cached) {
+               struct btree_path_level *l = path_l(path);
+               struct bkey_packed *_k =
+                       bch2_btree_node_iter_peek_all(&l->iter, l->b);
+
+               k = _k ? bkey_disassemble(l->b, _k, u) : bkey_s_c_null;
+
+               EBUG_ON(k.k && bkey_deleted(k.k) && bpos_cmp(k.k->p, path->pos) == 0);
+
+               if (!k.k || bpos_cmp(path->pos, k.k->p))
+                       goto hole;
+       } else {
+               struct bkey_cached *ck = (void *) path->l[0].b;
+
+               EBUG_ON(path->btree_id != ck->key.btree_id ||
+                       bkey_cmp(path->pos, ck->key.pos));
+
+               /* BTREE_ITER_CACHED_NOFILL? */
+               if (unlikely(!ck->valid))
+                       goto hole;
+
+               k = bkey_i_to_s_c(ck->k);
+       }
+
+       return k;
+hole:
+       bkey_init(u);
+       u->p = path->pos;
+       return (struct bkey_s_c) { u, NULL };
+}
+
+/* Btree iterators: */
+
 int __must_check
 bch2_btree_iter_traverse(struct btree_iter *iter)
 {
        int ret;
 
-       btree_iter_set_search_pos(iter, btree_iter_search_key(iter));
+       iter->path = btree_path_set_pos(iter->trans, iter->path,
+                                       btree_iter_search_key(iter),
+                                       iter->flags & BTREE_ITER_INTENT);
 
-       ret = btree_iter_traverse(iter);
+       ret = bch2_btree_path_traverse(iter->trans, iter->path, iter->flags);
        if (ret)
                return ret;
 
-       iter->should_be_locked = true;
+       iter->path->should_be_locked = true;
        return 0;
 }
 
@@ -1518,23 +1879,22 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
        struct btree *b = NULL;
        int ret;
 
-       EBUG_ON(iter->cached);
+       EBUG_ON(iter->path->cached);
        bch2_btree_iter_verify(iter);
 
-       ret = btree_iter_traverse(iter);
+       ret = bch2_btree_path_traverse(iter->trans, iter->path, iter->flags);
        if (ret)
                goto out;
 
-       b = btree_iter_node(iter, iter->level);
+       b = btree_path_node(iter->path, iter->path->level);
        if (!b)
                goto out;
 
        BUG_ON(bpos_cmp(b->key.k.p, iter->pos) < 0);
 
        bkey_init(&iter->k);
-       iter->k.p = iter->pos = iter->real_pos = b->key.k.p;
-       iter->trans->iters_sorted = false;
-       iter->should_be_locked = true;
+       iter->k.p = iter->pos = b->key.k.p;
+       iter->path->should_be_locked = true;
 out:
        bch2_btree_iter_verify_entry_exit(iter);
        bch2_btree_iter_verify(iter);
@@ -1544,29 +1904,31 @@ out:
 
 struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
 {
+       struct btree_trans *trans = iter->trans;
+       struct btree_path *path = iter->path;
        struct btree *b = NULL;
        int ret;
 
-       EBUG_ON(iter->cached);
+       EBUG_ON(iter->path->cached);
        bch2_btree_iter_verify(iter);
 
        /* already got to end? */
-       if (!btree_iter_node(iter, iter->level))
+       if (!btree_path_node(path, path->level))
                goto out;
 
-       bch2_trans_cond_resched(iter->trans);
+       bch2_trans_cond_resched(trans);
 
-       btree_node_unlock(iter, iter->level);
-       iter->l[iter->level].b = BTREE_ITER_NO_NODE_UP;
-       iter->level++;
+       btree_node_unlock(path, path->level);
+       path->l[path->level].b = BTREE_ITER_NO_NODE_UP;
+       path->level++;
 
-       btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
-       ret = btree_iter_traverse(iter);
+       btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
+       ret = bch2_btree_path_traverse(trans, path, iter->flags);
        if (ret)
                goto out;
 
        /* got to end? */
-       b = btree_iter_node(iter, iter->level);
+       b = btree_path_node(path, path->level);
        if (!b)
                goto out;
 
@@ -1575,28 +1937,29 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
                 * Haven't gotten to the end of the parent node: go back down to
                 * the next child node
                 */
-               btree_iter_set_search_pos(iter, bpos_successor(iter->pos));
+               path = iter->path =
+                       btree_path_set_pos(trans, path, bpos_successor(iter->pos),
+                                          iter->flags & BTREE_ITER_INTENT);
 
                /* Unlock to avoid screwing up our lock invariants: */
-               btree_node_unlock(iter, iter->level);
+               btree_node_unlock(path, path->level);
 
-               iter->level = iter->min_depth;
-               btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
+               path->level = iter->min_depth;
+               btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
                bch2_btree_iter_verify(iter);
 
-               ret = btree_iter_traverse(iter);
+               ret = bch2_btree_path_traverse(trans, path, iter->flags);
                if (ret) {
                        b = NULL;
                        goto out;
                }
 
-               b = iter->l[iter->level].b;
+               b = path->l[path->level].b;
        }
 
        bkey_init(&iter->k);
-       iter->k.p = iter->pos = iter->real_pos = b->key.k.p;
-       iter->trans->iters_sorted = false;
-       iter->should_be_locked = true;
+       iter->k.p = iter->pos = b->key.k.p;
+       iter->path->should_be_locked = true;
 out:
        bch2_btree_iter_verify_entry_exit(iter);
        bch2_btree_iter_verify(iter);
@@ -1606,60 +1969,6 @@ out:
 
 /* Iterate across keys (in leaf nodes only) */
 
-static void btree_iter_set_search_pos(struct btree_iter *iter, struct bpos new_pos)
-{
-       struct btree_trans *trans = iter->trans;
-#ifdef CONFIG_BCACHEFS_DEBUG
-       struct bpos old_pos = iter->real_pos;
-#endif
-       int cmp = bpos_cmp(new_pos, iter->real_pos);
-       unsigned l = iter->level;
-
-       EBUG_ON(trans->restarted);
-
-       if (!cmp)
-               goto out;
-
-       iter->real_pos = new_pos;
-       iter->should_be_locked = false;
-       trans->iters_sorted = false;
-
-       if (unlikely(iter->cached)) {
-               btree_node_unlock(iter, 0);
-               iter->l[0].b = BTREE_ITER_NO_NODE_CACHED;
-               btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
-               return;
-       }
-
-       l = btree_iter_up_until_good_node(trans, iter, cmp);
-
-       if (btree_iter_node(iter, l)) {
-               /*
-                * We might have to skip over many keys, or just a few: try
-                * advancing the node iterator, and if we have to skip over too
-                * many keys just reinit it (or if we're rewinding, since that
-                * is expensive).
-                */
-               if (cmp < 0 ||
-                   !btree_iter_advance_to_pos(iter, &iter->l[l], 8))
-                       __btree_iter_level_init(iter, l);
-
-               /* Don't leave it locked if we're not supposed to: */
-               if (btree_lock_want(iter, l) == BTREE_NODE_UNLOCKED)
-                       btree_node_unlock(iter, l);
-       }
-out:
-       if (l != iter->level)
-               btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
-
-       bch2_btree_iter_verify(iter);
-#ifdef CONFIG_BCACHEFS_DEBUG
-       trace_iter_set_search_pos(trans->ip, _RET_IP_,
-                                 iter->btree_id,
-                                 &old_pos, &new_pos, l);
-#endif
-}
-
 inline bool bch2_btree_iter_advance(struct btree_iter *iter)
 {
        struct bpos pos = iter->k.p;
@@ -1684,7 +1993,7 @@ inline bool bch2_btree_iter_rewind(struct btree_iter *iter)
        return ret;
 }
 
-static noinline struct bkey_i *__btree_trans_peek_updates(struct btree_iter *iter)
+struct bkey_i *__bch2_btree_trans_peek_updates(struct btree_iter *iter)
 {
        struct btree_insert_entry *i;
        struct bkey_i *ret = NULL;
@@ -1694,7 +2003,7 @@ static noinline struct bkey_i *__btree_trans_peek_updates(struct btree_iter *ite
                        continue;
                if (i->btree_id > iter->btree_id)
                        break;
-               if (bpos_cmp(i->k->k.p, iter->real_pos) < 0)
+               if (bpos_cmp(i->k->k.p, iter->path->pos) < 0)
                        continue;
                if (!ret || bpos_cmp(i->k->k.p, ret->k.p) < 0)
                        ret = i->k;
@@ -1703,33 +2012,27 @@ static noinline struct bkey_i *__btree_trans_peek_updates(struct btree_iter *ite
        return ret;
 }
 
-static inline struct bkey_i *btree_trans_peek_updates(struct btree_iter *iter)
-{
-       return iter->flags & BTREE_ITER_WITH_UPDATES
-               ? __btree_trans_peek_updates(iter)
-               : NULL;
-}
-
 /**
  * bch2_btree_iter_peek: returns first key greater than or equal to iterator's
  * current position
  */
 struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
 {
-       struct btree_iter_level *l = &iter->l[0];
+       struct btree_trans *trans = iter->trans;
        struct bpos search_key = btree_iter_search_key(iter);
        struct bkey_i *next_update;
        struct bkey_s_c k;
-       int ret;
+       int ret, cmp;
 
-       EBUG_ON(iter->cached || iter->level);
+       EBUG_ON(iter->path->cached || iter->path->level);
        bch2_btree_iter_verify(iter);
        bch2_btree_iter_verify_entry_exit(iter);
 
        while (1) {
-               btree_iter_set_search_pos(iter, search_key);
+               iter->path = btree_path_set_pos(trans, iter->path, search_key,
+                                  iter->flags & BTREE_ITER_INTENT);
 
-               ret = btree_iter_traverse(iter);
+               ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
                if (unlikely(ret)) {
                        /* ensure that iter->k is consistent with iter->pos: */
                        bch2_btree_iter_set_pos(iter, iter->pos);
@@ -1738,7 +2041,7 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
                }
 
                next_update = btree_trans_peek_updates(iter);
-               k = btree_iter_level_peek_all(iter, l);
+               k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k);
 
                /* * In the btree, deleted keys sort before non deleted: */
                if (k.k && bkey_deleted(k.k) &&
@@ -1750,7 +2053,7 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
 
                if (next_update &&
                    bpos_cmp(next_update->k.p,
-                            k.k ? k.k->p : l->b->key.k.p) <= 0) {
+                            k.k ? k.k->p : iter->path->l[0].b->key.k.p) <= 0) {
                        iter->k = next_update->k;
                        k = bkey_i_to_s_c(next_update);
                }
@@ -1761,13 +2064,12 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
 
                        /* Advance to next key: */
                        search_key = bkey_successor(iter, k.k->p);
-               } else if (likely(bpos_cmp(l->b->key.k.p, SPOS_MAX))) {
+               } else if (likely(bpos_cmp(iter->path->l[0].b->key.k.p, SPOS_MAX))) {
                        /* Advance to next leaf node: */
-                       search_key = bpos_successor(l->b->key.k.p);
+                       search_key = bpos_successor(iter->path->l[0].b->key.k.p);
                } else {
                        /* End of btree: */
                        bch2_btree_iter_set_pos(iter, SPOS_MAX);
-                       iter->real_pos = SPOS_MAX;
                        k = bkey_s_c_null;
                        goto out;
                }
@@ -1781,9 +2083,15 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
                iter->pos = k.k->p;
        else if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
                iter->pos = bkey_start_pos(k.k);
-       iter->real_pos = k.k->p;
+
+       cmp = bpos_cmp(k.k->p, iter->path->pos);
+       if (cmp) {
+               iter->path->pos = k.k->p;
+               trans->paths_sorted = false;
+       }
 out:
-       iter->should_be_locked = true;
+       iter->path->should_be_locked = true;
+
        bch2_btree_iter_verify_entry_exit(iter);
        bch2_btree_iter_verify(iter);
        return k;
@@ -1807,20 +2115,21 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
  */
 struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
 {
+       struct btree_trans *trans = iter->trans;
        struct bpos search_key = iter->pos;
-       struct btree_iter_level *l = &iter->l[0];
        struct bkey_s_c k;
        int ret;
 
-       EBUG_ON(iter->cached || iter->level);
+       EBUG_ON(iter->path->cached || iter->path->level);
        EBUG_ON(iter->flags & BTREE_ITER_WITH_UPDATES);
        bch2_btree_iter_verify(iter);
        bch2_btree_iter_verify_entry_exit(iter);
 
        while (1) {
-               btree_iter_set_search_pos(iter, search_key);
+               iter->path = btree_path_set_pos(trans, iter->path, search_key,
+                                               iter->flags & BTREE_ITER_INTENT);
 
-               ret = btree_iter_traverse(iter);
+               ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
                if (unlikely(ret)) {
                        /* ensure that iter->k is consistent with iter->pos: */
                        bch2_btree_iter_set_pos(iter, iter->pos);
@@ -1828,18 +2137,20 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
                        goto out;
                }
 
-               k = btree_iter_level_peek(iter, l);
+               k = btree_path_level_peek(trans, iter->path,
+                                         &iter->path->l[0], &iter->k);
                if (!k.k ||
                    ((iter->flags & BTREE_ITER_IS_EXTENTS)
                     ? bkey_cmp(bkey_start_pos(k.k), iter->pos) >= 0
                     : bkey_cmp(k.k->p, iter->pos) > 0))
-                       k = btree_iter_level_prev(iter, l);
+                       k = btree_path_level_prev(trans, iter->path,
+                                                 &iter->path->l[0], &iter->k);
 
                if (likely(k.k)) {
                        break;
-               } else if (likely(bpos_cmp(l->b->data->min_key, POS_MIN))) {
+               } else if (likely(bpos_cmp(iter->path->l[0].b->data->min_key, POS_MIN))) {
                        /* Advance to previous leaf node: */
-                       search_key = bpos_predecessor(l->b->data->min_key);
+                       search_key = bpos_predecessor(iter->path->l[0].b->data->min_key);
                } else {
                        /* Start of btree: */
                        bch2_btree_iter_set_pos(iter, POS_MIN);
@@ -1854,9 +2165,11 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
        if (bkey_cmp(k.k->p, iter->pos) < 0)
                iter->pos = k.k->p;
 out:
-       iter->should_be_locked = true;
+       iter->path->should_be_locked = true;
+
        bch2_btree_iter_verify_entry_exit(iter);
        bch2_btree_iter_verify(iter);
+
        return k;
 }
 
@@ -1879,7 +2192,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
        struct bkey_s_c k;
        int ret;
 
-       EBUG_ON(iter->level);
+       EBUG_ON(iter->path->level);
        bch2_btree_iter_verify(iter);
        bch2_btree_iter_verify_entry_exit(iter);
 
@@ -1893,9 +2206,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
        }
 
        search_key = btree_iter_search_key(iter);
-       btree_iter_set_search_pos(iter, search_key);
+       iter->path = btree_path_set_pos(trans, iter->path, search_key,
+                                       iter->flags & BTREE_ITER_INTENT);
 
-       ret = btree_iter_traverse(iter);
+       ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
        if (unlikely(ret))
                return bkey_s_c_err(ret);
 
@@ -1903,23 +2217,12 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
                struct bkey_i *next_update;
 
                next_update = btree_trans_peek_updates(iter);
-
-               if (!iter->cached) {
-                       k = btree_iter_level_peek_all(iter, &iter->l[0]);
-                       EBUG_ON(k.k && bkey_deleted(k.k) && bpos_cmp(k.k->p, iter->pos) == 0);
-               } else {
-                       struct bkey_cached *ck = (void *) iter->l[0].b;
-                       EBUG_ON(iter->btree_id != ck->key.btree_id ||
-                               bkey_cmp(iter->pos, ck->key.pos));
-                       BUG_ON(!ck->valid);
-
-                       k = bkey_i_to_s_c(ck->k);
-               }
-
                if (next_update &&
-                   (!k.k || bpos_cmp(next_update->k.p, k.k->p) <= 0)) {
+                   !bpos_cmp(next_update->k.p, iter->pos)) {
                        iter->k = next_update->k;
                        k = bkey_i_to_s_c(next_update);
+               } else {
+                       k = bch2_btree_path_peek_slot(iter->path, &iter->k);
                }
 
                if (!k.k ||
@@ -1934,14 +2237,16 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
                struct bpos next;
 
                if (iter->flags & BTREE_ITER_INTENT) {
-                       struct btree_iter *child =
-                               btree_iter_child_alloc(trans, iter, _THIS_IP_);
+                       struct btree_iter iter2;
 
-                       btree_iter_copy(trans, child, iter);
-                       k = bch2_btree_iter_peek(child);
+                       bch2_trans_copy_iter(&iter2, iter);
+                       k = bch2_btree_iter_peek(&iter2);
 
-                       if (k.k && !bkey_err(k))
-                               iter->k = child->k;
+                       if (k.k && !bkey_err(k)) {
+                               iter->k = iter2.k;
+                               k.k = &iter->k;
+                       }
+                       bch2_trans_iter_exit(trans, &iter2);
                } else {
                        struct bpos pos = iter->pos;
 
@@ -1969,9 +2274,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
                }
        }
 
+       iter->path->should_be_locked = true;
+
        bch2_btree_iter_verify_entry_exit(iter);
        bch2_btree_iter_verify(iter);
-       iter->should_be_locked = true;
 
        return k;
 }
@@ -1992,47 +2298,26 @@ struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter)
        return bch2_btree_iter_peek_slot(iter);
 }
 
-static inline void bch2_btree_iter_init(struct btree_trans *trans,
-                       struct btree_iter *iter, enum btree_id btree_id)
-{
-       struct bch_fs *c = trans->c;
-       unsigned i;
-
-       iter->trans                     = trans;
-       iter->uptodate                  = BTREE_ITER_NEED_TRAVERSE;
-       iter->btree_id                  = btree_id;
-       iter->real_pos                  = POS_MIN;
-       iter->level                     = 0;
-       iter->min_depth                 = 0;
-       iter->locks_want                = 0;
-       iter->nodes_locked              = 0;
-       iter->nodes_intent_locked       = 0;
-       for (i = 0; i < ARRAY_SIZE(iter->l); i++)
-               iter->l[i].b            = BTREE_ITER_NO_NODE_INIT;
-
-       prefetch(c->btree_roots[btree_id].b);
-}
-
 /* new transactional stuff: */
 
 #ifdef CONFIG_BCACHEFS_DEBUG
 static void btree_trans_verify_sorted_refs(struct btree_trans *trans)
 {
-       struct btree_iter *iter;
+       struct btree_path *path;
        unsigned i;
 
-       BUG_ON(trans->nr_sorted != hweight64(trans->iters_linked));
+       BUG_ON(trans->nr_sorted != hweight64(trans->paths_allocated));
 
-       trans_for_each_iter(trans, iter) {
-               BUG_ON(iter->sorted_idx >= trans->nr_sorted);
-               BUG_ON(trans->sorted[iter->sorted_idx] != iter->idx);
+       trans_for_each_path(trans, path) {
+               BUG_ON(path->sorted_idx >= trans->nr_sorted);
+               BUG_ON(trans->sorted[path->sorted_idx] != path->idx);
        }
 
        for (i = 0; i < trans->nr_sorted; i++) {
                unsigned idx = trans->sorted[i];
 
-               EBUG_ON(!(trans->iters_linked & (1ULL << idx)));
-               BUG_ON(trans->iters[idx].sorted_idx != i);
+               EBUG_ON(!(trans->paths_allocated & (1ULL << idx)));
+               BUG_ON(trans->paths[idx].sorted_idx != i);
        }
 }
 #else
@@ -2042,17 +2327,17 @@ static inline void btree_trans_verify_sorted_refs(struct btree_trans *trans) {}
 static void btree_trans_verify_sorted(struct btree_trans *trans)
 {
 #ifdef CONFIG_BCACHEFS_DEBUG
-       struct btree_iter *iter, *prev = NULL;
+       struct btree_path *path, *prev = NULL;
        unsigned i;
 
-       trans_for_each_iter_inorder(trans, iter, i) {
-               BUG_ON(prev && btree_iter_cmp(prev, iter) > 0);
-               prev = iter;
+       trans_for_each_path_inorder(trans, path, i) {
+               BUG_ON(prev && btree_path_cmp(prev, path) > 0);
+               prev = path;
        }
 #endif
 }
 
-static noinline void __btree_trans_sort_iters(struct btree_trans *trans)
+static noinline void __btree_trans_sort_paths(struct btree_trans *trans)
 {
        int i, l = 0, r = trans->nr_sorted, inc = 1;
        bool swapped;
@@ -2067,11 +2352,11 @@ static noinline void __btree_trans_sort_iters(struct btree_trans *trans)
                for (i = inc > 0 ? l : r - 2;
                     i + 1 < r && i >= l;
                     i += inc) {
-                       if (btree_iter_cmp(trans->iters + trans->sorted[i],
-                                          trans->iters + trans->sorted[i + 1]) > 0) {
+                       if (btree_path_cmp(trans->paths + trans->sorted[i],
+                                          trans->paths + trans->sorted[i + 1]) > 0) {
                                swap(trans->sorted[i], trans->sorted[i + 1]);
-                               trans->iters[trans->sorted[i]].sorted_idx = i;
-                               trans->iters[trans->sorted[i + 1]].sorted_idx = i + 1;
+                               trans->paths[trans->sorted[i]].sorted_idx = i;
+                               trans->paths[trans->sorted[i + 1]].sorted_idx = i + 1;
                                swapped = true;
                        }
                }
@@ -2083,246 +2368,82 @@ static noinline void __btree_trans_sort_iters(struct btree_trans *trans)
                inc = -inc;
        } while (swapped);
 
-       trans->iters_sorted = true;
+       trans->paths_sorted = true;
 
        btree_trans_verify_sorted(trans);
 }
 
-static inline void btree_trans_sort_iters(struct btree_trans *trans)
+static inline void btree_trans_sort_paths(struct btree_trans *trans)
 {
        btree_trans_verify_sorted_refs(trans);
 
-       if (trans->iters_sorted) {
+       if (trans->paths_sorted) {
                if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
                        btree_trans_verify_sorted(trans);
                return;
        }
-       __btree_trans_sort_iters(trans);
+       __btree_trans_sort_paths(trans);
 }
 
-static inline void btree_iter_list_remove(struct btree_trans *trans,
-                                         struct btree_iter *iter)
+static inline void btree_path_list_remove(struct btree_trans *trans,
+                                         struct btree_path *path)
 {
        unsigned i;
 
-       EBUG_ON(iter->sorted_idx >= trans->nr_sorted);
+       EBUG_ON(path->sorted_idx >= trans->nr_sorted);
 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
        trans->nr_sorted--;
-       memmove_u64s_down_small(trans->sorted + iter->sorted_idx,
-                               trans->sorted + iter->sorted_idx + 1,
-                               DIV_ROUND_UP(trans->nr_sorted - iter->sorted_idx, 8));
+       memmove_u64s_down_small(trans->sorted + path->sorted_idx,
+                               trans->sorted + path->sorted_idx + 1,
+                               DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8));
 #else
-       array_remove_item(trans->sorted, trans->nr_sorted, iter->sorted_idx);
+       array_remove_item(trans->sorted, trans->nr_sorted, path->sorted_idx);
 #endif
-       for (i = iter->sorted_idx; i < trans->nr_sorted; i++)
-               trans->iters[trans->sorted[i]].sorted_idx = i;
+       for (i = path->sorted_idx; i < trans->nr_sorted; i++)
+               trans->paths[trans->sorted[i]].sorted_idx = i;
 
-       iter->sorted_idx = U8_MAX;
+       path->sorted_idx = U8_MAX;
 }
 
-static inline void btree_iter_list_add(struct btree_trans *trans,
-                                      struct btree_iter *pos,
-                                      struct btree_iter *iter)
+static inline void btree_path_list_add(struct btree_trans *trans,
+                                      struct btree_path *pos,
+                                      struct btree_path *path)
 {
        unsigned i;
 
-       iter->sorted_idx = pos ? pos->sorted_idx + 1 : 0;
+       path->sorted_idx = pos ? pos->sorted_idx + 1 : 0;
 
 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-       memmove_u64s_up_small(trans->sorted + iter->sorted_idx + 1,
-                             trans->sorted + iter->sorted_idx,
-                             DIV_ROUND_UP(trans->nr_sorted - iter->sorted_idx, 8));
+       memmove_u64s_up_small(trans->sorted + path->sorted_idx + 1,
+                             trans->sorted + path->sorted_idx,
+                             DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8));
        trans->nr_sorted++;
-       trans->sorted[iter->sorted_idx] = iter->idx;
+       trans->sorted[path->sorted_idx] = path->idx;
 #else
-       array_insert_item(trans->sorted, trans->nr_sorted, iter->sorted_idx, iter->idx);
+       array_insert_item(trans->sorted, trans->nr_sorted, path->sorted_idx, path->idx);
 #endif
 
-       for (i = iter->sorted_idx; i < trans->nr_sorted; i++)
-               trans->iters[trans->sorted[i]].sorted_idx = i;
-
-       btree_trans_verify_sorted_refs(trans);
-}
-
-static void btree_iter_child_free(struct btree_trans *trans, struct btree_iter *iter)
-{
-       struct btree_iter *child = btree_iter_child(trans, iter);
-
-       if (child) {
-               bch2_trans_iter_free(trans, child);
-               iter->child_idx = U8_MAX;
-       }
-}
-
-static struct btree_iter *btree_iter_child_alloc(struct btree_trans *trans,
-                                                struct btree_iter *iter,
-                                                unsigned long ip)
-{
-       struct btree_iter *child = btree_iter_child(trans, iter);
-
-       if (!child) {
-               child = btree_trans_iter_alloc(trans, iter);
-               child->ip_allocated     = ip;
-               iter->child_idx         = child->idx;
-
-               trans->iters_live       |= 1ULL << child->idx;
-               trans->iters_touched    |= 1ULL << child->idx;
-       }
-
-       return child;
-}
-
-static inline void __bch2_trans_iter_free(struct btree_trans *trans,
-                                         unsigned idx)
-{
-       btree_iter_child_free(trans, &trans->iters[idx]);
-
-       btree_iter_list_remove(trans, &trans->iters[idx]);
-
-       __bch2_btree_iter_unlock(&trans->iters[idx]);
-       trans->iters_linked             &= ~(1ULL << idx);
-       trans->iters_live               &= ~(1ULL << idx);
-       trans->iters_touched            &= ~(1ULL << idx);
-
-       btree_trans_verify_sorted_refs(trans);
-}
-
-static bool have_iter_at_pos(struct btree_trans *trans,
-                            struct btree_iter *iter)
-{
-       struct btree_iter *n;
-
-       n = prev_btree_iter(trans, iter);
-       if (n && !btree_iter_cmp(n, iter))
-               return true;
-
-       n = next_btree_iter(trans, iter);
-       if (n && !btree_iter_cmp(n, iter))
-               return true;
-
-       return false;
-}
-
-int bch2_trans_iter_put(struct btree_trans *trans,
-                       struct btree_iter *iter)
-{
-       int ret;
-
-       if (IS_ERR_OR_NULL(iter))
-               return 0;
-
-       BUG_ON(trans->iters + iter->idx != iter);
-       BUG_ON(!btree_iter_live(trans, iter));
-
-       ret = btree_iter_err(iter);
-
-       if (!(iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) &&
-           (!(trans->iters_touched & (1ULL << iter->idx)) ||
-            have_iter_at_pos(trans, iter)))
-               __bch2_trans_iter_free(trans, iter->idx);
-
-       trans->iters_live       &= ~(1ULL << iter->idx);
-       return ret;
-}
-
-int bch2_trans_iter_free(struct btree_trans *trans,
-                        struct btree_iter *iter)
-{
-       if (IS_ERR_OR_NULL(iter))
-               return 0;
-
-       set_btree_iter_dontneed(trans, iter);
-
-       return bch2_trans_iter_put(trans, iter);
-}
-
-noinline __cold
-void bch2_dump_trans_iters_updates(struct btree_trans *trans)
-{
-       struct btree_iter *iter;
-       struct btree_insert_entry *i;
-       unsigned idx;
-       char buf1[300], buf2[100];
-
-       btree_trans_sort_iters(trans);
-
-       trans_for_each_iter_inorder(trans, iter, idx)
-               printk(KERN_ERR "iter: btree %s pos %s real_pos %s%s%s%s %pS\n",
-                      bch2_btree_ids[iter->btree_id],
-                      (bch2_bpos_to_text(&PBUF(buf1), iter->pos), buf1),
-                      (bch2_bpos_to_text(&PBUF(buf2), iter->real_pos), buf2),
-                      btree_iter_live(trans, iter) ? " live" : "",
-                      (trans->iters_touched & (1ULL << iter->idx)) ? " touched" : "",
-                      iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT ? " keep" : "",
-                      (void *) iter->ip_allocated);
-
-       trans_for_each_update(trans, i)
-               printk(KERN_ERR "update: btree %s %s %pS\n",
-                      bch2_btree_ids[i->btree_id],
-                      (bch2_bkey_val_to_text(&PBUF(buf1), trans->c, bkey_i_to_s_c(i->k)), buf1),
-                      (void *) i->ip_allocated);
-}
-
-static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans,
-                                                struct btree_iter *pos)
-{
-       struct btree_iter *iter;
-       unsigned idx;
+       for (i = path->sorted_idx; i < trans->nr_sorted; i++)
+               trans->paths[trans->sorted[i]].sorted_idx = i;
 
        btree_trans_verify_sorted_refs(trans);
-
-       if (unlikely(trans->iters_linked ==
-                    ~((~0ULL << 1) << (BTREE_ITER_MAX - 1)))) {
-               bch2_dump_trans_iters_updates(trans);
-               panic("trans iter oveflow\n");
-       }
-
-       idx = __ffs64(~trans->iters_linked);
-       iter = &trans->iters[idx];
-
-       iter->trans             = trans;
-       iter->idx               = idx;
-       iter->child_idx         = U8_MAX;
-       iter->sorted_idx        = U8_MAX;
-       iter->flags             = 0;
-       iter->nodes_locked      = 0;
-       iter->nodes_intent_locked = 0;
-       trans->iters_linked     |= 1ULL << idx;
-
-       btree_iter_list_add(trans, pos, iter);
-       return iter;
 }
 
-static void btree_iter_copy(struct btree_trans *trans, struct btree_iter *dst,
-                           struct btree_iter *src)
+void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter)
 {
-       unsigned i, offset = offsetof(struct btree_iter, flags);
-
-       __bch2_btree_iter_unlock(dst);
-       btree_iter_child_free(trans, dst);
-
-       memcpy((void *) dst + offset,
-              (void *) src + offset,
-              sizeof(struct btree_iter) - offset);
-
-       for (i = 0; i < BTREE_MAX_DEPTH; i++)
-               if (btree_node_locked(dst, i))
-                       six_lock_increment(&dst->l[i].b->c.lock,
-                                          __btree_lock_want(dst, i));
-
-       dst->flags &= ~BTREE_ITER_KEEP_UNTIL_COMMIT;
-       trans->iters_sorted = false;
+       if (iter->path)
+               bch2_path_put(trans, iter->path,
+                             iter->flags & BTREE_ITER_INTENT);
+       iter->path = NULL;
 }
 
-struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
-                                        enum btree_id btree_id, struct bpos pos,
-                                        unsigned locks_want,
-                                        unsigned depth,
-                                        unsigned flags)
+static void __bch2_trans_iter_init(struct btree_trans *trans,
+                                  struct btree_iter *iter,
+                                  enum btree_id btree_id, struct bpos pos,
+                                  unsigned locks_want,
+                                  unsigned depth,
+                                  unsigned flags)
 {
-       struct btree_iter *iter, *best = NULL;
-       struct bpos real_pos, pos_min = POS_MIN;
-
        EBUG_ON(trans->restarted);
 
        if (!(flags & (BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_NOT_EXTENTS)) &&
@@ -2337,127 +2458,58 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
                pos.snapshot = btree_type_has_snapshots(btree_id)
                        ? U32_MAX : 0;
 
-       real_pos = pos;
-
-       if ((flags & BTREE_ITER_IS_EXTENTS) &&
-           bkey_cmp(pos, POS_MAX))
-               real_pos = bpos_nosnap_successor(pos);
-
-       trans_for_each_iter(trans, iter) {
-               if (iter->cached != (flags & BTREE_ITER_CACHED))
-                       continue;
-
-               if (iter->btree_id != btree_id)
-                       continue;
-
-               if (best) {
-                       int cmp = bkey_cmp(bpos_diff(best->real_pos, real_pos),
-                                          bpos_diff(iter->real_pos, real_pos));
-
-                       if (cmp < 0 ||
-                           ((cmp == 0 && btree_iter_keep(trans, iter))))
-                               continue;
-               }
-
-               best = iter;
-       }
-
-       if (!best) {
-               iter = btree_trans_iter_alloc(trans, best);
-               bch2_btree_iter_init(trans, iter, btree_id);
-       } else if (btree_iter_keep(trans, best)) {
-               iter = btree_trans_iter_alloc(trans, best);
-               btree_iter_copy(trans, iter, best);
-       } else {
-               iter = best;
-       }
-
-       trans->iters_live       |= 1ULL << iter->idx;
-       trans->iters_touched    |= 1ULL << iter->idx;
-
-       iter->cached    = flags & BTREE_ITER_CACHED;
+       iter->trans     = trans;
+       iter->path      = NULL;
+       iter->btree_id  = btree_id;
+       iter->min_depth = depth;
        iter->flags     = flags;
        iter->snapshot  = pos.snapshot;
+       iter->pos       = pos;
+       iter->k.type    = KEY_TYPE_deleted;
+       iter->k.p       = pos;
+       iter->k.size    = 0;
 
-       /*
-        * If the iterator has locks_want greater than requested, we explicitly
-        * do not downgrade it here - on transaction restart because btree node
-        * split needs to upgrade locks, we might be putting/getting the
-        * iterator again. Downgrading iterators only happens via an explicit
-        * bch2_trans_downgrade().
-        */
-
-       locks_want = min(locks_want, BTREE_MAX_DEPTH);
-       if (locks_want > iter->locks_want) {
-               iter->locks_want = locks_want;
-               btree_iter_get_locks(trans, iter, true, _THIS_IP_);
-       }
-
-       while (iter->level != depth) {
-               btree_node_unlock(iter, iter->level);
-               iter->l[iter->level].b = BTREE_ITER_NO_NODE_INIT;
-               iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
-               if (iter->level < depth)
-                       iter->level++;
-               else
-                       iter->level--;
-       }
-
-       iter->min_depth = depth;
-
-       bch2_btree_iter_set_pos(iter, pos);
-       btree_iter_set_search_pos(iter, real_pos);
-
-       trace_trans_get_iter(_RET_IP_, trans->ip,
-                            btree_id,
-                            &real_pos, locks_want, iter->uptodate,
-                            best ? &best->real_pos     : &pos_min,
-                            best ? best->locks_want    : U8_MAX,
-                            best ? best->uptodate      : U8_MAX);
-
-       return iter;
+       iter->path = bch2_path_get(trans,
+                                  flags & BTREE_ITER_CACHED,
+                                  btree_id,
+                                  btree_iter_search_key(iter),
+                                  locks_want,
+                                  depth,
+                                  flags & BTREE_ITER_INTENT);
 }
 
-struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *trans,
-                                           enum btree_id btree_id,
-                                           struct bpos pos,
-                                           unsigned locks_want,
-                                           unsigned depth,
-                                           unsigned flags)
+void bch2_trans_iter_init(struct btree_trans *trans,
+                         struct btree_iter *iter,
+                         unsigned btree_id, struct bpos pos,
+                         unsigned flags)
 {
-       struct btree_iter *iter =
-               __bch2_trans_get_iter(trans, btree_id, pos,
-                                     locks_want, depth,
-                                     BTREE_ITER_NOT_EXTENTS|
-                                     __BTREE_ITER_ALL_SNAPSHOTS|
-                                     BTREE_ITER_ALL_SNAPSHOTS|
-                                     flags);
-
-       BUG_ON(bkey_cmp(iter->pos, pos));
-       BUG_ON(iter->locks_want != min(locks_want, BTREE_MAX_DEPTH));
-       BUG_ON(iter->level      != depth);
-       BUG_ON(iter->min_depth  != depth);
-       iter->ip_allocated = _RET_IP_;
-
-       return iter;
+       __bch2_trans_iter_init(trans, iter, btree_id, pos,
+                              0, 0, flags);
 }
 
-struct btree_iter *__bch2_trans_copy_iter(struct btree_trans *trans,
-                                         struct btree_iter *src)
+void bch2_trans_node_iter_init(struct btree_trans *trans,
+                              struct btree_iter *iter,
+                              enum btree_id btree_id,
+                              struct bpos pos,
+                              unsigned locks_want,
+                              unsigned depth,
+                              unsigned flags)
 {
-       struct btree_iter *iter;
-
-       iter = btree_trans_iter_alloc(trans, src);
-       btree_iter_copy(trans, iter, src);
-
-       trans->iters_live |= 1ULL << iter->idx;
-       /*
-        * We don't need to preserve this iter since it's cheap to copy it
-        * again - this will cause trans_iter_put() to free it right away:
-        */
-       set_btree_iter_dontneed(trans, iter);
+       __bch2_trans_iter_init(trans, iter, btree_id, pos, locks_want, depth,
+                              BTREE_ITER_NOT_EXTENTS|
+                              __BTREE_ITER_ALL_SNAPSHOTS|
+                              BTREE_ITER_ALL_SNAPSHOTS|
+                              flags);
+       BUG_ON(iter->path->locks_want    < min(locks_want, BTREE_MAX_DEPTH));
+       BUG_ON(iter->path->level        != depth);
+       BUG_ON(iter->min_depth          != depth);
+}
 
-       return iter;
+void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src)
+{
+       *dst = *src;
+       if (src->path)
+               __btree_path_get(src->path, src->flags & BTREE_ITER_INTENT);
 }
 
 void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
@@ -2498,20 +2550,6 @@ void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
        return p;
 }
 
-inline void bch2_trans_unlink_iters(struct btree_trans *trans)
-{
-       u64 iters = trans->iters_linked &
-               ~trans->iters_touched &
-               ~trans->iters_live;
-
-       while (iters) {
-               unsigned idx = __ffs64(iters);
-
-               iters &= ~(1ULL << idx);
-               __bch2_trans_iter_free(trans, idx);
-       }
-}
-
 /**
  * bch2_trans_begin() - reset a transaction after a interrupted attempt
  * @trans: transaction to reset
@@ -2522,17 +2560,11 @@ inline void bch2_trans_unlink_iters(struct btree_trans *trans)
  */
 void bch2_trans_begin(struct btree_trans *trans)
 {
-       struct btree_iter *iter;
-
-       trans_for_each_iter(trans, iter)
-               iter->flags &= ~BTREE_ITER_KEEP_UNTIL_COMMIT;
+       struct btree_insert_entry *i;
+       struct btree_path *path;
 
-       /*
-        * XXX: we shouldn't be doing this if the transaction was restarted, but
-        * currently we still overflow transaction iterators if we do that
-        * */
-       bch2_trans_unlink_iters(trans);
-       trans->iters_touched &= trans->iters_live;
+       trans_for_each_update(trans, i)
+               __btree_path_put(i->path, true);
 
        trans->extra_journal_res        = 0;
        trans->nr_updates               = 0;
@@ -2550,29 +2582,41 @@ void bch2_trans_begin(struct btree_trans *trans)
                       (void *) &trans->fs_usage_deltas->memset_start);
        }
 
+       trans_for_each_path(trans, path) {
+               /*
+                * XXX: we probably shouldn't be doing this if the transaction
+                * was restarted, but currently we still overflow transaction
+                * iterators if we do that
+                */
+               if (!path->ref && !path->preserve)
+                       __bch2_path_free(trans, path);
+               else
+                       path->preserve = path->should_be_locked = false;
+       }
+
        bch2_trans_cond_resched(trans);
 
        if (trans->restarted)
-               bch2_btree_iter_traverse_all(trans);
+               bch2_btree_path_traverse_all(trans);
 
        trans->restarted = false;
 }
 
-static void bch2_trans_alloc_iters(struct btree_trans *trans, struct bch_fs *c)
+static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c)
 {
-       size_t iters_bytes      = sizeof(struct btree_iter) * BTREE_ITER_MAX;
+       size_t paths_bytes      = sizeof(struct btree_path) * BTREE_ITER_MAX;
        size_t updates_bytes    = sizeof(struct btree_insert_entry) * BTREE_ITER_MAX;
        void *p = NULL;
 
        BUG_ON(trans->used_mempool);
 
 #ifdef __KERNEL__
-       p = this_cpu_xchg(c->btree_iters_bufs->iter, NULL);
+       p = this_cpu_xchg(c->btree_paths_bufs->path , NULL);
 #endif
        if (!p)
-               p = mempool_alloc(&trans->c->btree_iters_pool, GFP_NOFS);
+               p = mempool_alloc(&trans->c->btree_paths_pool, GFP_NOFS);
 
-       trans->iters            = p; p += iters_bytes;
+       trans->paths            = p; p += paths_bytes;
        trans->updates          = p; p += updates_bytes;
 }
 
@@ -2585,11 +2629,7 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
        trans->c                = c;
        trans->ip               = _RET_IP_;
 
-       /*
-        * reallocating iterators currently completely breaks
-        * bch2_trans_iter_put(), we always allocate the max:
-        */
-       bch2_trans_alloc_iters(trans, c);
+       bch2_trans_alloc_paths(trans, c);
 
        if (expected_mem_bytes) {
                expected_mem_bytes = roundup_pow_of_two(expected_mem_bytes);
@@ -2613,54 +2653,63 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
 #endif
 }
 
+static void check_btree_paths_leaked(struct btree_trans *trans)
+{
+#ifdef CONFIG_BCACHEFS_DEBUG
+       struct bch_fs *c = trans->c;
+       struct btree_path *path;
+
+       trans_for_each_path(trans, path)
+               if (path->ref)
+                       goto leaked;
+       return;
+leaked:
+       bch_err(c, "btree paths leaked from %pS!", (void *) trans->ip);
+       trans_for_each_path(trans, path)
+               if (path->ref)
+                       printk(KERN_ERR "  btree %s %pS\n",
+                              bch2_btree_ids[path->btree_id],
+                              (void *) path->ip_allocated);
+       /* Be noisy about this: */
+       bch2_fatal_error(c);
+#endif
+}
+
 int bch2_trans_exit(struct btree_trans *trans)
        __releases(&c->btree_trans_barrier)
 {
+       struct btree_insert_entry *i;
        struct bch_fs *c = trans->c;
 
        bch2_trans_unlock(trans);
 
-#ifdef CONFIG_BCACHEFS_DEBUG
-       if (trans->iters_live) {
-               struct btree_iter *iter;
-
-               trans_for_each_iter(trans, iter)
-                       btree_iter_child_free(trans, iter);
-       }
+       trans_for_each_update(trans, i)
+               __btree_path_put(i->path, true);
+       trans->nr_updates               = 0;
 
-       if (trans->iters_live) {
-               struct btree_iter *iter;
-
-               bch_err(c, "btree iterators leaked!");
-               trans_for_each_iter(trans, iter)
-                       if (btree_iter_live(trans, iter))
-                               printk(KERN_ERR "  btree %s allocated at %pS\n",
-                                      bch2_btree_ids[iter->btree_id],
-                                      (void *) iter->ip_allocated);
-               /* Be noisy about this: */
-               bch2_fatal_error(c);
-       }
+       check_btree_paths_leaked(trans);
 
-       mutex_lock(&trans->c->btree_trans_lock);
+#ifdef CONFIG_BCACHEFS_DEBUG
+       mutex_lock(&c->btree_trans_lock);
        list_del(&trans->list);
-       mutex_unlock(&trans->c->btree_trans_lock);
+       mutex_unlock(&c->btree_trans_lock);
 #endif
 
        srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
 
-       bch2_journal_preres_put(&trans->c->journal, &trans->journal_preres);
+       bch2_journal_preres_put(&c->journal, &trans->journal_preres);
 
        if (trans->fs_usage_deltas) {
                if (trans->fs_usage_deltas->size + sizeof(trans->fs_usage_deltas) ==
                    REPLICAS_DELTA_LIST_MAX)
                        mempool_free(trans->fs_usage_deltas,
-                                    &trans->c->replicas_delta_pool);
+                                    &c->replicas_delta_pool);
                else
                        kfree(trans->fs_usage_deltas);
        }
 
        if (trans->mem_bytes == BTREE_TRANS_MEM_MAX)
-               mempool_free(trans->mem, &trans->c->btree_trans_mem_pool);
+               mempool_free(trans->mem, &c->btree_trans_mem_pool);
        else
                kfree(trans->mem);
 
@@ -2668,20 +2717,20 @@ int bch2_trans_exit(struct btree_trans *trans)
        /*
         * Userspace doesn't have a real percpu implementation:
         */
-       trans->iters = this_cpu_xchg(c->btree_iters_bufs->iter, trans->iters);
+       trans->paths = this_cpu_xchg(c->btree_paths_bufs->path, trans->paths);
 #endif
 
-       if (trans->iters)
-               mempool_free(trans->iters, &trans->c->btree_iters_pool);
+       if (trans->paths)
+               mempool_free(trans->paths, &c->btree_paths_pool);
 
        trans->mem      = (void *) 0x1;
-       trans->iters    = (void *) 0x1;
+       trans->paths    = (void *) 0x1;
 
        return trans->error ? -EIO : 0;
 }
 
 static void __maybe_unused
-bch2_btree_iter_node_to_text(struct printbuf *out,
+bch2_btree_path_node_to_text(struct printbuf *out,
                             struct btree_bkey_cached_common *_b,
                             bool cached)
 {
@@ -2693,10 +2742,10 @@ bch2_btree_iter_node_to_text(struct printbuf *out,
 #ifdef CONFIG_BCACHEFS_DEBUG
 static bool trans_has_locks(struct btree_trans *trans)
 {
-       struct btree_iter *iter;
+       struct btree_path *path;
 
-       trans_for_each_iter(trans, iter)
-               if (iter->nodes_locked)
+       trans_for_each_path(trans, path)
+               if (path->nodes_locked)
                        return true;
        return false;
 }
@@ -2706,7 +2755,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
 {
 #ifdef CONFIG_BCACHEFS_DEBUG
        struct btree_trans *trans;
-       struct btree_iter *iter;
+       struct btree_path *path;
        struct btree *b;
        unsigned l;
 
@@ -2717,24 +2766,24 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
 
                pr_buf(out, "%i %ps\n", trans->pid, (void *) trans->ip);
 
-               trans_for_each_iter(trans, iter) {
-                       if (!iter->nodes_locked)
+               trans_for_each_path(trans, path) {
+                       if (!path->nodes_locked)
                                continue;
 
-                       pr_buf(out, "  iter %u %c %s:",
-                              iter->idx,
-                              iter->cached ? 'c' : 'b',
-                              bch2_btree_ids[iter->btree_id]);
-                       bch2_bpos_to_text(out, iter->pos);
+                       pr_buf(out, "  path %u %c %s:",
+                              path->idx,
+                              path->cached ? 'c' : 'b',
+                              bch2_btree_ids[path->btree_id]);
+                       bch2_bpos_to_text(out, path->pos);
                        pr_buf(out, "\n");
 
                        for (l = 0; l < BTREE_MAX_DEPTH; l++) {
-                               if (btree_node_locked(iter, l)) {
+                               if (btree_node_locked(path, l)) {
                                        pr_buf(out, "    %s l=%u ",
-                                              btree_node_intent_locked(iter, l) ? "i" : "r", l);
-                                       bch2_btree_iter_node_to_text(out,
-                                                       (void *) iter->l[l].b,
-                                                       iter->cached);
+                                              btree_node_intent_locked(path, l) ? "i" : "r", l);
+                                       bch2_btree_path_node_to_text(out,
+                                                       (void *) path->l[l].b,
+                                                       path->cached);
                                        pr_buf(out, "\n");
                                }
                        }
@@ -2742,18 +2791,17 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
 
                b = READ_ONCE(trans->locking);
                if (b) {
-                       iter = &trans->iters[trans->locking_iter_idx];
-                       pr_buf(out, "  locking iter %u %c l=%u %s:",
-                              trans->locking_iter_idx,
-                              iter->cached ? 'c' : 'b',
+                       path = &trans->paths[trans->locking_path_idx];
+                       pr_buf(out, "  locking path %u %c l=%u %s:",
+                              trans->locking_path_idx,
+                              path->cached ? 'c' : 'b',
                               trans->locking_level,
                               bch2_btree_ids[trans->locking_btree_id]);
                        bch2_bpos_to_text(out, trans->locking_pos);
 
                        pr_buf(out, " node ");
-                       bch2_btree_iter_node_to_text(out,
-                                       (void *) b,
-                                       iter->cached);
+                       bch2_btree_path_node_to_text(out,
+                                       (void *) b, path->cached);
                        pr_buf(out, "\n");
                }
        }
@@ -2764,7 +2812,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
 void bch2_fs_btree_iter_exit(struct bch_fs *c)
 {
        mempool_exit(&c->btree_trans_mem_pool);
-       mempool_exit(&c->btree_iters_pool);
+       mempool_exit(&c->btree_paths_pool);
        cleanup_srcu_struct(&c->btree_trans_barrier);
 }
 
@@ -2776,8 +2824,8 @@ int bch2_fs_btree_iter_init(struct bch_fs *c)
        mutex_init(&c->btree_trans_lock);
 
        return  init_srcu_struct(&c->btree_trans_barrier) ?:
-               mempool_init_kmalloc_pool(&c->btree_iters_pool, 1,
-                       sizeof(struct btree_iter) * nr +
+               mempool_init_kmalloc_pool(&c->btree_paths_pool, 1,
+                       sizeof(struct btree_path) * nr +
                        sizeof(struct btree_insert_entry) * nr) ?:
                mempool_init_kmalloc_pool(&c->btree_trans_mem_pool, 1,
                                          BTREE_TRANS_MEM_MAX);
index 4ba55e02d4b74f49d5eb44ab3484a9393a4c0158..983d611224580fbac654d749a07decc15df1a3e3 100644 (file)
@@ -5,40 +5,49 @@
 #include "bset.h"
 #include "btree_types.h"
 
-static inline void btree_iter_set_dirty(struct btree_iter *iter,
-                                       enum btree_iter_uptodate u)
+static inline void __btree_path_get(struct btree_path *path, bool intent)
 {
-       iter->uptodate = max_t(unsigned, iter->uptodate, u);
+       path->ref++;
+       path->intent_ref += intent;
 }
 
-static inline struct btree *btree_iter_node(struct btree_iter *iter,
+static inline bool __btree_path_put(struct btree_path *path, bool intent)
+{
+       EBUG_ON(!path->ref);
+       EBUG_ON(!path->intent_ref && intent);
+       path->intent_ref -= intent;
+       return --path->ref == 0;
+}
+
+static inline void btree_path_set_dirty(struct btree_path *path,
+                                       enum btree_path_uptodate u)
+{
+       path->uptodate = max_t(unsigned, path->uptodate, u);
+}
+
+static inline struct btree *btree_path_node(struct btree_path *path,
                                            unsigned level)
 {
-       return level < BTREE_MAX_DEPTH ? iter->l[level].b : NULL;
+       return level < BTREE_MAX_DEPTH ? path->l[level].b : NULL;
 }
 
-static inline bool btree_node_lock_seq_matches(const struct btree_iter *iter,
+static inline bool btree_node_lock_seq_matches(const struct btree_path *path,
                                        const struct btree *b, unsigned level)
 {
        /*
         * We don't compare the low bits of the lock sequence numbers because
-        * @iter might have taken a write lock on @b, and we don't want to skip
-        * the linked iterator if the sequence numbers were equal before taking
-        * that write lock. The lock sequence number is incremented by taking
-        * and releasing write locks and is even when unlocked:
+        * @path might have taken a write lock on @b, and we don't want to skip
+        * the linked path if the sequence numbers were equal before taking that
+        * write lock. The lock sequence number is incremented by taking and
+        * releasing write locks and is even when unlocked:
         */
-       return iter->l[level].lock_seq >> 1 == b->c.lock.state.seq >> 1;
+       return path->l[level].lock_seq >> 1 == b->c.lock.state.seq >> 1;
 }
 
-static inline struct btree *btree_node_parent(struct btree_iter *iter,
+static inline struct btree *btree_node_parent(struct btree_path *path,
                                              struct btree *b)
 {
-       return btree_iter_node(iter, b->c.level + 1);
-}
-
-static inline bool btree_trans_has_multiple_iters(const struct btree_trans *trans)
-{
-       return hweight64(trans->iters_linked) > 1;
+       return btree_path_node(path, b->c.level + 1);
 }
 
 static inline int btree_iter_err(const struct btree_iter *iter)
@@ -46,102 +55,121 @@ static inline int btree_iter_err(const struct btree_iter *iter)
        return iter->flags & BTREE_ITER_ERROR ? -EIO : 0;
 }
 
-/* Iterate over iters within a transaction: */
+/* Iterate over paths within a transaction: */
 
-static inline struct btree_iter *
-__trans_next_iter(struct btree_trans *trans, unsigned idx)
+static inline struct btree_path *
+__trans_next_path(struct btree_trans *trans, unsigned idx)
 {
        u64 l;
 
        if (idx == BTREE_ITER_MAX)
                return NULL;
 
-       l = trans->iters_linked >> idx;
+       l = trans->paths_allocated >> idx;
        if (!l)
                return NULL;
 
        idx += __ffs64(l);
        EBUG_ON(idx >= BTREE_ITER_MAX);
-       EBUG_ON(trans->iters[idx].idx != idx);
-       return &trans->iters[idx];
+       EBUG_ON(trans->paths[idx].idx != idx);
+       return &trans->paths[idx];
 }
 
-#define trans_for_each_iter(_trans, _iter)                             \
-       for (_iter = __trans_next_iter((_trans), 0);                    \
-            (_iter);                                                   \
-            _iter = __trans_next_iter((_trans), (_iter)->idx + 1))
+#define trans_for_each_path(_trans, _path)                             \
+       for (_path = __trans_next_path((_trans), 0);                    \
+            (_path);                                                   \
+            _path = __trans_next_path((_trans), (_path)->idx + 1))
 
-static inline struct btree_iter *next_btree_iter(struct btree_trans *trans, struct btree_iter *iter)
+static inline struct btree_path *next_btree_path(struct btree_trans *trans, struct btree_path *path)
 {
-       unsigned idx = iter ? iter->sorted_idx + 1 : 0;
+       unsigned idx = path ? path->sorted_idx + 1 : 0;
 
        EBUG_ON(idx > trans->nr_sorted);
 
        return idx < trans->nr_sorted
-               ? trans->iters + trans->sorted[idx]
+               ? trans->paths + trans->sorted[idx]
                : NULL;
 }
 
-static inline struct btree_iter *prev_btree_iter(struct btree_trans *trans, struct btree_iter *iter)
+static inline struct btree_path *prev_btree_path(struct btree_trans *trans, struct btree_path *path)
 {
-       unsigned idx = iter ? iter->sorted_idx : trans->nr_sorted;
+       unsigned idx = path ? path->sorted_idx : trans->nr_sorted;
 
        return idx
-               ? trans->iters + trans->sorted[idx - 1]
+               ? trans->paths + trans->sorted[idx - 1]
                : NULL;
 }
 
-#define trans_for_each_iter_inorder(_trans, _iter, _i)                 \
+#define trans_for_each_path_inorder(_trans, _path, _i)                 \
        for (_i = 0;                                                    \
-            ((_iter) = (_trans)->iters + trans->sorted[_i]), (_i) < (_trans)->nr_sorted;\
+            ((_path) = (_trans)->paths + trans->sorted[_i]), (_i) < (_trans)->nr_sorted;\
             _i++)
 
-#define trans_for_each_iter_inorder_reverse(_trans, _iter, _i)         \
+#define trans_for_each_path_inorder_reverse(_trans, _path, _i)         \
        for (_i = trans->nr_sorted - 1;                                 \
-            ((_iter) = (_trans)->iters + trans->sorted[_i]), (_i) >= 0;\
+            ((_path) = (_trans)->paths + trans->sorted[_i]), (_i) >= 0;\
             --_i)
 
-static inline bool __iter_has_node(const struct btree_iter *iter,
+static inline bool __path_has_node(const struct btree_path *path,
                                   const struct btree *b)
 {
-       return iter->l[b->c.level].b == b &&
-               btree_node_lock_seq_matches(iter, b, b->c.level);
+       return path->l[b->c.level].b == b &&
+               btree_node_lock_seq_matches(path, b, b->c.level);
 }
 
-static inline struct btree_iter *
-__trans_next_iter_with_node(struct btree_trans *trans, struct btree *b,
+static inline struct btree_path *
+__trans_next_path_with_node(struct btree_trans *trans, struct btree *b,
                            unsigned idx)
 {
-       struct btree_iter *iter = __trans_next_iter(trans, idx);
+       struct btree_path *path = __trans_next_path(trans, idx);
+
+       while (path && !__path_has_node(path, b))
+               path = __trans_next_path(trans, path->idx + 1);
 
-       while (iter && !__iter_has_node(iter, b))
-               iter = __trans_next_iter(trans, iter->idx + 1);
+       return path;
+}
+
+#define trans_for_each_path_with_node(_trans, _b, _path)               \
+       for (_path = __trans_next_path_with_node((_trans), (_b), 0);    \
+            (_path);                                                   \
+            _path = __trans_next_path_with_node((_trans), (_b),        \
+                                                (_path)->idx + 1))
+
+struct btree_path *__bch2_btree_path_make_mut(struct btree_trans *,
+                                             struct btree_path *, bool);
 
-       return iter;
+static inline struct btree_path * __must_check
+bch2_btree_path_make_mut(struct btree_trans *trans,
+                        struct btree_path *path, bool intent)
+{
+       if (path->ref > 1 || path->preserve)
+               path = __bch2_btree_path_make_mut(trans, path, intent);
+       return path;
 }
 
-#define trans_for_each_iter_with_node(_trans, _b, _iter)               \
-       for (_iter = __trans_next_iter_with_node((_trans), (_b), 0);    \
-            (_iter);                                                   \
-            _iter = __trans_next_iter_with_node((_trans), (_b),        \
-                                                (_iter)->idx + 1))
+int __must_check bch2_btree_path_traverse(struct btree_trans *,
+                                         struct btree_path *, unsigned);
+struct btree_path *bch2_path_get(struct btree_trans *, bool, enum btree_id,
+                                struct bpos, unsigned, unsigned, bool);
+inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *, struct bkey *);
 
 #ifdef CONFIG_BCACHEFS_DEBUG
-void bch2_trans_verify_iters(struct btree_trans *, struct btree *);
+void bch2_trans_verify_paths(struct btree_trans *);
 void bch2_trans_verify_locks(struct btree_trans *);
 #else
-static inline void bch2_trans_verify_iters(struct btree_trans *trans,
-                                          struct btree *b) {}
-static inline void bch2_trans_verify_locks(struct btree_trans *iter) {}
+static inline void bch2_trans_verify_paths(struct btree_trans *trans) {}
+static inline void bch2_trans_verify_locks(struct btree_trans *trans) {}
 #endif
 
-void bch2_btree_iter_fix_key_modified(struct btree_trans *trans,
+void bch2_btree_path_fix_key_modified(struct btree_trans *trans,
                                      struct btree *, struct bkey_packed *);
-void bch2_btree_node_iter_fix(struct btree_trans *trans, struct btree_iter *,
+void bch2_btree_node_iter_fix(struct btree_trans *trans, struct btree_path *,
                              struct btree *, struct btree_node_iter *,
                              struct bkey_packed *, unsigned, unsigned);
 
-bool bch2_btree_iter_relock_intent(struct btree_trans *, struct btree_iter *);
+bool bch2_btree_path_relock_intent(struct btree_trans *, struct btree_path *);
+
+void bch2_path_put(struct btree_trans *, struct btree_path *, bool);
 
 bool bch2_trans_relock(struct btree_trans *);
 void bch2_trans_unlock(struct btree_trans *);
@@ -154,28 +182,28 @@ static inline int btree_trans_restart(struct btree_trans *trans)
        return -EINTR;
 }
 
-bool __bch2_btree_iter_upgrade(struct btree_trans *,
-                              struct btree_iter *, unsigned);
+bool __bch2_btree_path_upgrade(struct btree_trans *,
+                              struct btree_path *, unsigned);
 
-static inline bool bch2_btree_iter_upgrade(struct btree_trans *trans,
-                                          struct btree_iter *iter,
+static inline bool bch2_btree_path_upgrade(struct btree_trans *trans,
+                                          struct btree_path *path,
                                           unsigned new_locks_want)
 {
        new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH);
 
-       return iter->locks_want < new_locks_want
-               ? __bch2_btree_iter_upgrade(trans, iter, new_locks_want)
-               : iter->uptodate == BTREE_ITER_UPTODATE;
+       return path->locks_want < new_locks_want
+               ? __bch2_btree_path_upgrade(trans, path, new_locks_want)
+               : path->uptodate == BTREE_ITER_UPTODATE;
 }
 
-void __bch2_btree_iter_downgrade(struct btree_iter *, unsigned);
+void __bch2_btree_path_downgrade(struct btree_path *, unsigned);
 
-static inline void bch2_btree_iter_downgrade(struct btree_iter *iter)
+static inline void bch2_btree_path_downgrade(struct btree_path *path)
 {
-       unsigned new_locks_want = iter->level + !!(iter->flags & BTREE_ITER_INTENT);
+       unsigned new_locks_want = path->level + !!path->intent_ref;
 
-       if (iter->locks_want > new_locks_want)
-               __bch2_btree_iter_downgrade(iter, new_locks_want);
+       if (path->locks_want > new_locks_want)
+               __bch2_btree_path_downgrade(path, new_locks_want);
 }
 
 void bch2_trans_downgrade(struct btree_trans *);
@@ -212,7 +240,8 @@ static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos
        iter->k.p.offset        = iter->pos.offset      = new_pos.offset;
        iter->k.p.snapshot      = iter->pos.snapshot    = new_pos.snapshot;
        iter->k.size = 0;
-       iter->should_be_locked = false;
+       if (iter->path->ref == 1)
+               iter->path->should_be_locked = false;
 }
 
 static inline void bch2_btree_iter_set_pos_to_extent_start(struct btree_iter *iter)
@@ -221,17 +250,6 @@ static inline void bch2_btree_iter_set_pos_to_extent_start(struct btree_iter *it
        iter->pos = bkey_start_pos(&iter->k);
 }
 
-static inline struct btree_iter *idx_to_btree_iter(struct btree_trans *trans, unsigned idx)
-{
-       return idx != U8_MAX ? trans->iters + idx : NULL;
-}
-
-static inline struct btree_iter *btree_iter_child(struct btree_trans *trans,
-                                                 struct btree_iter *iter)
-{
-       return idx_to_btree_iter(trans, iter->child_idx);
-}
-
 /*
  * Unlocks before scheduling
  * Note: does not revalidate iterator
@@ -249,11 +267,11 @@ static inline int bch2_trans_cond_resched(struct btree_trans *trans)
 
 #define __for_each_btree_node(_trans, _iter, _btree_id, _start,        \
                              _locks_want, _depth, _flags, _b)          \
-       for (iter = bch2_trans_get_node_iter((_trans), (_btree_id),     \
+       for (bch2_trans_node_iter_init((_trans), &(_iter), (_btree_id), \
                                _start, _locks_want, _depth, _flags),   \
-            _b = bch2_btree_iter_peek_node(_iter);                     \
+            _b = bch2_btree_iter_peek_node(&(_iter));                  \
             (_b);                                                      \
-            (_b) = bch2_btree_iter_next_node(_iter))
+            (_b) = bch2_btree_iter_next_node(&(_iter)))
 
 #define for_each_btree_node(_trans, _iter, _btree_id, _start,          \
                            _flags, _b)                                 \
@@ -283,77 +301,36 @@ static inline int bkey_err(struct bkey_s_c k)
 
 #define for_each_btree_key(_trans, _iter, _btree_id,                   \
                           _start, _flags, _k, _ret)                    \
-       for ((_iter) = bch2_trans_get_iter((_trans), (_btree_id),       \
-                                          (_start), (_flags)),         \
-            (_k) = __bch2_btree_iter_peek(_iter, _flags);              \
+       for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id),      \
+                                 (_start), (_flags)),                  \
+            (_k) = __bch2_btree_iter_peek(&(_iter), _flags);           \
             !((_ret) = bkey_err(_k)) && (_k).k;                        \
-            (_k) = __bch2_btree_iter_next(_iter, _flags))
+            (_k) = __bch2_btree_iter_next(&(_iter), _flags))
 
 #define for_each_btree_key_continue(_iter, _flags, _k, _ret)           \
-       for ((_k) = __bch2_btree_iter_peek(_iter, _flags);              \
+       for ((_k) = __bch2_btree_iter_peek(&(_iter), _flags);           \
             !((_ret) = bkey_err(_k)) && (_k).k;                        \
-            (_k) = __bch2_btree_iter_next(_iter, _flags))
+            (_k) = __bch2_btree_iter_next(&(_iter), _flags))
 
 /* new multiple iterator interface: */
 
-void bch2_dump_trans_iters_updates(struct btree_trans *);
-
-int bch2_trans_iter_put(struct btree_trans *, struct btree_iter *);
-int bch2_trans_iter_free(struct btree_trans *, struct btree_iter *);
-
-void bch2_trans_unlink_iters(struct btree_trans *);
-
-struct btree_iter *__bch2_trans_get_iter(struct btree_trans *, enum btree_id,
-                                        struct bpos, unsigned,
-                                        unsigned, unsigned);
-
-static inline struct btree_iter *
-bch2_trans_get_iter(struct btree_trans *trans, enum btree_id btree_id,
-                   struct bpos pos, unsigned flags)
-{
-       struct btree_iter *iter =
-               __bch2_trans_get_iter(trans, btree_id, pos,
-                                     (flags & BTREE_ITER_INTENT) != 0, 0,
-                                     flags);
-       iter->ip_allocated = _THIS_IP_;
-       return iter;
-}
-
-struct btree_iter *__bch2_trans_copy_iter(struct btree_trans *,
-                                         struct btree_iter *);
-static inline struct btree_iter *
-bch2_trans_copy_iter(struct btree_trans *trans, struct btree_iter *src)
-{
-       struct btree_iter *iter =
-               __bch2_trans_copy_iter(trans, src);
-
-       iter->ip_allocated = _THIS_IP_;
-       return iter;
-}
-
-struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *,
-                               enum btree_id, struct bpos,
-                               unsigned, unsigned, unsigned);
-
-static inline bool btree_iter_live(struct btree_trans *trans, struct btree_iter *iter)
-{
-       return (trans->iters_live & (1ULL << iter->idx)) != 0;
-}
+void bch2_dump_trans_paths_updates(struct btree_trans *);
 
-static inline bool btree_iter_keep(struct btree_trans *trans, struct btree_iter *iter)
-{
-       return btree_iter_live(trans, iter) ||
-               (iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT);
-}
+void bch2_trans_iter_exit(struct btree_trans *, struct btree_iter *);
+void bch2_trans_iter_init(struct btree_trans *, struct btree_iter *,
+                         unsigned, struct bpos, unsigned);
+void bch2_trans_node_iter_init(struct btree_trans *, struct btree_iter *,
+                              enum btree_id, struct bpos,
+                              unsigned, unsigned, unsigned);
+void bch2_trans_copy_iter(struct btree_iter *, struct btree_iter *);
 
-static inline void set_btree_iter_dontneed(struct btree_trans *trans, struct btree_iter *iter)
+static inline void set_btree_iter_dontneed(struct btree_iter *iter)
 {
-       trans->iters_touched &= ~(1ULL << iter->idx);
+       iter->path->preserve = false;
 }
 
-void bch2_trans_begin(struct btree_trans *);
-
 void *bch2_trans_kmalloc(struct btree_trans *, size_t);
+void bch2_trans_begin(struct btree_trans *);
 void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t);
 int bch2_trans_exit(struct btree_trans *);
 
index 61210db57f56472cd55caf70876f1c343b1e6d87..9bdc2c3f21bf4869bf9af0ca45c8080edca6cef2 100644 (file)
@@ -197,23 +197,23 @@ btree_key_cache_create(struct btree_key_cache *c,
 }
 
 static int btree_key_cache_fill(struct btree_trans *trans,
-                               struct btree_iter *ck_iter,
+                               struct btree_path *ck_path,
                                struct bkey_cached *ck)
 {
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        unsigned new_u64s = 0;
        struct bkey_i *new_k = NULL;
        int ret;
 
-       iter = bch2_trans_get_iter(trans, ck->key.btree_id,
-                                  ck->key.pos, BTREE_ITER_SLOTS);
-       k = bch2_btree_iter_peek_slot(iter);
+       bch2_trans_iter_init(trans, &iter, ck->key.btree_id,
+                            ck->key.pos, BTREE_ITER_SLOTS);
+       k = bch2_btree_iter_peek_slot(&iter);
        ret = bkey_err(k);
        if (ret)
                goto err;
 
-       if (!bch2_btree_node_relock(trans, ck_iter, 0)) {
+       if (!bch2_btree_node_relock(trans, ck_path, 0)) {
                trace_transaction_restart_ip(trans->ip, _THIS_IP_);
                ret = btree_trans_restart(trans);
                goto err;
@@ -238,7 +238,7 @@ static int btree_key_cache_fill(struct btree_trans *trans,
         * XXX: not allowed to be holding read locks when we take a write lock,
         * currently
         */
-       bch2_btree_node_lock_write(trans, ck_iter, ck_iter->l[0].b);
+       bch2_btree_node_lock_write(trans, ck_path, ck_path->l[0].b);
        if (new_k) {
                kfree(ck->k);
                ck->u64s = new_u64s;
@@ -247,62 +247,64 @@ static int btree_key_cache_fill(struct btree_trans *trans,
 
        bkey_reassemble(ck->k, k);
        ck->valid = true;
-       bch2_btree_node_unlock_write(trans, ck_iter, ck_iter->l[0].b);
+       bch2_btree_node_unlock_write(trans, ck_path, ck_path->l[0].b);
 
        /* We're not likely to need this iterator again: */
-       set_btree_iter_dontneed(trans, iter);
+       set_btree_iter_dontneed(&iter);
 err:
-       bch2_trans_iter_put(trans, iter);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
 static int bkey_cached_check_fn(struct six_lock *lock, void *p)
 {
        struct bkey_cached *ck = container_of(lock, struct bkey_cached, c.lock);
-       const struct btree_iter *iter = p;
+       const struct btree_path *path = p;
 
-       return ck->key.btree_id == iter->btree_id &&
-               !bpos_cmp(ck->key.pos, iter->pos) ? 0 : -1;
+       return ck->key.btree_id == path->btree_id &&
+               !bpos_cmp(ck->key.pos, path->pos) ? 0 : -1;
 }
 
 __flatten
-int bch2_btree_iter_traverse_cached(struct btree_trans *trans, struct btree_iter *iter)
+int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path *path,
+                                   unsigned flags)
 {
        struct bch_fs *c = trans->c;
        struct bkey_cached *ck;
        int ret = 0;
 
-       BUG_ON(iter->level);
+       BUG_ON(path->level);
 
-       iter->l[1].b = NULL;
+       path->l[1].b = NULL;
 
-       if (bch2_btree_node_relock(trans, iter, 0)) {
-               ck = (void *) iter->l[0].b;
+       if (bch2_btree_node_relock(trans, path, 0)) {
+               ck = (void *) path->l[0].b;
                goto fill;
        }
 retry:
-       ck = bch2_btree_key_cache_find(c, iter->btree_id, iter->pos);
+       ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos);
        if (!ck) {
-               if (iter->flags & BTREE_ITER_CACHED_NOCREATE) {
-                       iter->l[0].b = NULL;
+               if (flags & BTREE_ITER_CACHED_NOCREATE) {
+                       path->l[0].b = NULL;
                        return 0;
                }
 
                ck = btree_key_cache_create(&c->btree_key_cache,
-                                           iter->btree_id, iter->pos);
+                                           path->btree_id, path->pos);
                ret = PTR_ERR_OR_ZERO(ck);
                if (ret)
                        goto err;
                if (!ck)
                        goto retry;
 
-               mark_btree_node_locked(iter, 0, SIX_LOCK_intent);
-               iter->locks_want = 1;
+               mark_btree_node_locked(path, 0, SIX_LOCK_intent);
+               path->locks_want = 1;
        } else {
-               enum six_lock_type lock_want = __btree_lock_want(iter, 0);
+               enum six_lock_type lock_want = __btree_lock_want(path, 0);
 
-               if (!btree_node_lock(trans, iter, (void *) ck, iter->pos, 0, lock_want,
-                                    bkey_cached_check_fn, iter, _THIS_IP_)) {
+               if (!btree_node_lock(trans, path, (void *) ck, path->pos, 0,
+                                    lock_want,
+                                    bkey_cached_check_fn, path, _THIS_IP_)) {
                        if (!trans->restarted)
                                goto retry;
 
@@ -311,28 +313,27 @@ retry:
                        goto err;
                }
 
-               if (ck->key.btree_id != iter->btree_id ||
-                   bpos_cmp(ck->key.pos, iter->pos)) {
+               if (ck->key.btree_id != path->btree_id ||
+                   bpos_cmp(ck->key.pos, path->pos)) {
                        six_unlock_type(&ck->c.lock, lock_want);
                        goto retry;
                }
 
-               mark_btree_node_locked(iter, 0, lock_want);
+               mark_btree_node_locked(path, 0, lock_want);
        }
 
-       iter->l[0].lock_seq     = ck->c.lock.state.seq;
-       iter->l[0].b            = (void *) ck;
+       path->l[0].lock_seq     = ck->c.lock.state.seq;
+       path->l[0].b            = (void *) ck;
 fill:
-       if (!ck->valid && !(iter->flags & BTREE_ITER_CACHED_NOFILL)) {
-               if (!iter->locks_want &&
-                   !!__bch2_btree_iter_upgrade(trans, iter, 1)) {
+       if (!ck->valid && !(flags & BTREE_ITER_CACHED_NOFILL)) {
+               if (!path->locks_want &&
+                   !__bch2_btree_path_upgrade(trans, path, 1)) {
                        trace_transaction_restart_ip(trans->ip, _THIS_IP_);
-                       BUG_ON(!trans->restarted);
-                       ret = -EINTR;
+                       ret = btree_trans_restart(trans);
                        goto err;
                }
 
-               ret = btree_key_cache_fill(trans, iter, ck);
+               ret = btree_key_cache_fill(trans, path, ck);
                if (ret)
                        goto err;
        }
@@ -340,22 +341,14 @@ fill:
        if (!test_bit(BKEY_CACHED_ACCESSED, &ck->flags))
                set_bit(BKEY_CACHED_ACCESSED, &ck->flags);
 
-       iter->uptodate = BTREE_ITER_UPTODATE;
-
-       if ((iter->flags & BTREE_ITER_INTENT) &&
-           !bch2_btree_iter_upgrade(trans, iter, 1)) {
-               BUG_ON(!trans->restarted);
-               ret = -EINTR;
-       }
-
-       BUG_ON(!ret && !btree_node_locked(iter, 0));
+       path->uptodate = BTREE_ITER_UPTODATE;
+       BUG_ON(btree_node_locked_type(path, 0) != btree_lock_want(path, 0));
 
        return ret;
 err:
        if (ret != -EINTR) {
-               btree_node_unlock(iter, 0);
-               iter->flags |= BTREE_ITER_ERROR;
-               iter->l[0].b = BTREE_ITER_NO_NODE_ERROR;
+               btree_node_unlock(path, 0);
+               path->l[0].b = BTREE_ITER_NO_NODE_ERROR;
        }
        return ret;
 }
@@ -368,23 +361,23 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
 {
        struct bch_fs *c = trans->c;
        struct journal *j = &c->journal;
-       struct btree_iter *c_iter = NULL, *b_iter = NULL;
+       struct btree_iter c_iter, b_iter;
        struct bkey_cached *ck = NULL;
        int ret;
 
-       b_iter = bch2_trans_get_iter(trans, key.btree_id, key.pos,
-                                    BTREE_ITER_SLOTS|
-                                    BTREE_ITER_INTENT);
-       c_iter = bch2_trans_get_iter(trans, key.btree_id, key.pos,
-                                    BTREE_ITER_CACHED|
-                                    BTREE_ITER_CACHED_NOFILL|
-                                    BTREE_ITER_CACHED_NOCREATE|
-                                    BTREE_ITER_INTENT);
-       ret = bch2_btree_iter_traverse(c_iter);
+       bch2_trans_iter_init(trans, &b_iter, key.btree_id, key.pos,
+                            BTREE_ITER_SLOTS|
+                            BTREE_ITER_INTENT);
+       bch2_trans_iter_init(trans, &c_iter, key.btree_id, key.pos,
+                            BTREE_ITER_CACHED|
+                            BTREE_ITER_CACHED_NOFILL|
+                            BTREE_ITER_CACHED_NOCREATE|
+                            BTREE_ITER_INTENT);
+       ret = bch2_btree_iter_traverse(&c_iter);
        if (ret)
                goto out;
 
-       ck = (void *) c_iter->l[0].b;
+       ck = (void *) c_iter.path->l[0].b;
        if (!ck ||
            (journal_seq && ck->journal.seq != journal_seq))
                goto out;
@@ -400,8 +393,8 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
         * allocator/copygc depend on journal reclaim making progress, we need
         * to be using alloc reserves:
         * */
-       ret   = bch2_btree_iter_traverse(b_iter) ?:
-               bch2_trans_update(trans, b_iter, ck->k,
+       ret   = bch2_btree_iter_traverse(&b_iter) ?:
+               bch2_trans_update(trans, &b_iter, ck->k,
                                  BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
                                  BTREE_TRIGGER_NORUN) ?:
                bch2_trans_commit(trans, NULL, NULL,
@@ -423,7 +416,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
        bch2_journal_pin_drop(j, &ck->journal);
        bch2_journal_preres_put(j, &ck->res);
 
-       BUG_ON(!btree_node_locked(c_iter, 0));
+       BUG_ON(!btree_node_locked(c_iter.path, 0));
 
        if (!evict) {
                if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
@@ -432,10 +425,10 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
                }
        } else {
 evict:
-               BUG_ON(!btree_node_intent_locked(c_iter, 0));
+               BUG_ON(!btree_node_intent_locked(c_iter.path, 0));
 
-               mark_btree_node_unlocked(c_iter, 0);
-               c_iter->l[0].b = NULL;
+               mark_btree_node_unlocked(c_iter.path, 0);
+               c_iter.path->l[0].b = NULL;
 
                six_lock_write(&ck->c.lock, NULL, NULL);
 
@@ -451,8 +444,8 @@ evict:
                mutex_unlock(&c->btree_key_cache.lock);
        }
 out:
-       bch2_trans_iter_put(trans, b_iter);
-       bch2_trans_iter_put(trans, c_iter);
+       bch2_trans_iter_exit(trans, &b_iter);
+       bch2_trans_iter_exit(trans, &c_iter);
        return ret;
 }
 
@@ -503,11 +496,11 @@ int bch2_btree_key_cache_flush(struct btree_trans *trans,
 }
 
 bool bch2_btree_insert_key_cached(struct btree_trans *trans,
-                                 struct btree_iter *iter,
+                                 struct btree_path *path,
                                  struct bkey_i *insert)
 {
        struct bch_fs *c = trans->c;
-       struct bkey_cached *ck = (void *) iter->l[0].b;
+       struct bkey_cached *ck = (void *) path->l[0].b;
        bool kick_reclaim = false;
 
        BUG_ON(insert->u64s > ck->u64s);
index d890632e44253af5c47193bfdcf03b6c9eff16df..0768ef3ca77600d96b7b49e6bd101dcd09e4ff87 100644 (file)
@@ -26,10 +26,11 @@ int bch2_btree_key_cache_journal_flush(struct journal *,
 struct bkey_cached *
 bch2_btree_key_cache_find(struct bch_fs *, enum btree_id, struct bpos);
 
-int bch2_btree_iter_traverse_cached(struct btree_trans *, struct btree_iter *);
+int bch2_btree_path_traverse_cached(struct btree_trans *, struct btree_path *,
+                                   unsigned);
 
 bool bch2_btree_insert_key_cached(struct btree_trans *,
-                       struct btree_iter *, struct bkey_i *);
+                       struct btree_path *, struct bkey_i *);
 int bch2_btree_key_cache_flush(struct btree_trans *,
                               enum btree_id, struct bpos);
 #ifdef CONFIG_BCACHEFS_DEBUG
index b490e48086311c3e5b4e8b0a79b8e70b476d16b0..d05689180c63e855cbd4d34ae303206cb2194e09 100644 (file)
@@ -20,7 +20,7 @@ enum btree_node_locked_type {
        BTREE_NODE_INTENT_LOCKED        = SIX_LOCK_intent,
 };
 
-static inline int btree_node_locked_type(struct btree_iter *iter,
+static inline int btree_node_locked_type(struct btree_path *path,
                                         unsigned level)
 {
        /*
@@ -29,35 +29,35 @@ static inline int btree_node_locked_type(struct btree_iter *iter,
         * branches:
         */
        return BTREE_NODE_UNLOCKED +
-               ((iter->nodes_locked >> level) & 1) +
-               ((iter->nodes_intent_locked >> level) & 1);
+               ((path->nodes_locked >> level) & 1) +
+               ((path->nodes_intent_locked >> level) & 1);
 }
 
-static inline bool btree_node_intent_locked(struct btree_iter *iter,
+static inline bool btree_node_intent_locked(struct btree_path *path,
                                            unsigned level)
 {
-       return btree_node_locked_type(iter, level) == BTREE_NODE_INTENT_LOCKED;
+       return btree_node_locked_type(path, level) == BTREE_NODE_INTENT_LOCKED;
 }
 
-static inline bool btree_node_read_locked(struct btree_iter *iter,
+static inline bool btree_node_read_locked(struct btree_path *path,
                                          unsigned level)
 {
-       return btree_node_locked_type(iter, level) == BTREE_NODE_READ_LOCKED;
+       return btree_node_locked_type(path, level) == BTREE_NODE_READ_LOCKED;
 }
 
-static inline bool btree_node_locked(struct btree_iter *iter, unsigned level)
+static inline bool btree_node_locked(struct btree_path *path, unsigned level)
 {
-       return iter->nodes_locked & (1 << level);
+       return path->nodes_locked & (1 << level);
 }
 
-static inline void mark_btree_node_unlocked(struct btree_iter *iter,
+static inline void mark_btree_node_unlocked(struct btree_path *path,
                                            unsigned level)
 {
-       iter->nodes_locked &= ~(1 << level);
-       iter->nodes_intent_locked &= ~(1 << level);
+       path->nodes_locked &= ~(1 << level);
+       path->nodes_intent_locked &= ~(1 << level);
 }
 
-static inline void mark_btree_node_locked(struct btree_iter *iter,
+static inline void mark_btree_node_locked(struct btree_path *path,
                                          unsigned level,
                                          enum six_lock_type type)
 {
@@ -65,52 +65,52 @@ static inline void mark_btree_node_locked(struct btree_iter *iter,
        BUILD_BUG_ON(SIX_LOCK_read   != 0);
        BUILD_BUG_ON(SIX_LOCK_intent != 1);
 
-       iter->nodes_locked |= 1 << level;
-       iter->nodes_intent_locked |= type << level;
+       path->nodes_locked |= 1 << level;
+       path->nodes_intent_locked |= type << level;
 }
 
-static inline void mark_btree_node_intent_locked(struct btree_iter *iter,
+static inline void mark_btree_node_intent_locked(struct btree_path *path,
                                                 unsigned level)
 {
-       mark_btree_node_locked(iter, level, SIX_LOCK_intent);
+       mark_btree_node_locked(path, level, SIX_LOCK_intent);
 }
 
-static inline enum six_lock_type __btree_lock_want(struct btree_iter *iter, int level)
+static inline enum six_lock_type __btree_lock_want(struct btree_path *path, int level)
 {
-       return level < iter->locks_want
+       return level < path->locks_want
                ? SIX_LOCK_intent
                : SIX_LOCK_read;
 }
 
 static inline enum btree_node_locked_type
-btree_lock_want(struct btree_iter *iter, int level)
+btree_lock_want(struct btree_path *path, int level)
 {
-       if (level < iter->level)
+       if (level < path->level)
                return BTREE_NODE_UNLOCKED;
-       if (level < iter->locks_want)
+       if (level < path->locks_want)
                return BTREE_NODE_INTENT_LOCKED;
-       if (level == iter->level)
+       if (level == path->level)
                return BTREE_NODE_READ_LOCKED;
        return BTREE_NODE_UNLOCKED;
 }
 
-static inline void btree_node_unlock(struct btree_iter *iter, unsigned level)
+static inline void btree_node_unlock(struct btree_path *path, unsigned level)
 {
-       int lock_type = btree_node_locked_type(iter, level);
+       int lock_type = btree_node_locked_type(path, level);
 
        EBUG_ON(level >= BTREE_MAX_DEPTH);
 
        if (lock_type != BTREE_NODE_UNLOCKED)
-               six_unlock_type(&iter->l[level].b->c.lock, lock_type);
-       mark_btree_node_unlocked(iter, level);
+               six_unlock_type(&path->l[level].b->c.lock, lock_type);
+       mark_btree_node_unlocked(path, level);
 }
 
-static inline void __bch2_btree_iter_unlock(struct btree_iter *iter)
+static inline void __bch2_btree_path_unlock(struct btree_path *path)
 {
-       btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK);
+       btree_path_set_dirty(path, BTREE_ITER_NEED_RELOCK);
 
-       while (iter->nodes_locked)
-               btree_node_unlock(iter, __ffs(iter->nodes_locked));
+       while (path->nodes_locked)
+               btree_node_unlock(path, __ffs(path->nodes_locked));
 }
 
 static inline enum bch_time_stats lock_to_time_stat(enum six_lock_type type)
@@ -154,11 +154,11 @@ static inline bool btree_node_lock_increment(struct btree_trans *trans,
                                             struct btree *b, unsigned level,
                                             enum btree_node_locked_type want)
 {
-       struct btree_iter *iter;
+       struct btree_path *path;
 
-       trans_for_each_iter(trans, iter)
-               if (iter->l[level].b == b &&
-                   btree_node_locked_type(iter, level) >= want) {
+       trans_for_each_path(trans, path)
+               if (path->l[level].b == b &&
+                   btree_node_locked_type(path, level) >= want) {
                        six_lock_increment(&b->c.lock, want);
                        return true;
                }
@@ -166,38 +166,39 @@ static inline bool btree_node_lock_increment(struct btree_trans *trans,
        return false;
 }
 
-bool __bch2_btree_node_lock(struct btree_trans *, struct btree_iter *,
+bool __bch2_btree_node_lock(struct btree_trans *, struct btree_path *,
                            struct btree *, struct bpos, unsigned,
-                           enum six_lock_type, six_lock_should_sleep_fn,
-                           void *, unsigned long);
+                           enum six_lock_type,
+                           six_lock_should_sleep_fn, void *,
+                           unsigned long);
 
 static inline bool btree_node_lock(struct btree_trans *trans,
-                       struct btree_iter *iter,
+                       struct btree_path *path,
                        struct btree *b, struct bpos pos, unsigned level,
                        enum six_lock_type type,
                        six_lock_should_sleep_fn should_sleep_fn, void *p,
                        unsigned long ip)
 {
        EBUG_ON(level >= BTREE_MAX_DEPTH);
-       EBUG_ON(!(trans->iters_linked & (1ULL << iter->idx)));
+       EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx)));
 
        return likely(six_trylock_type(&b->c.lock, type)) ||
                btree_node_lock_increment(trans, b, level, type) ||
-               __bch2_btree_node_lock(trans, iter, b, pos, level, type,
+               __bch2_btree_node_lock(trans, path, b, pos, level, type,
                                       should_sleep_fn, p, ip);
 }
 
-bool __bch2_btree_node_relock(struct btree_trans *, struct btree_iter *, unsigned);
+bool __bch2_btree_node_relock(struct btree_trans *, struct btree_path *, unsigned);
 
 static inline bool bch2_btree_node_relock(struct btree_trans *trans,
-                                         struct btree_iter *iter, unsigned level)
+                                         struct btree_path *path, unsigned level)
 {
-       EBUG_ON(btree_node_locked(iter, level) &&
-               btree_node_locked_type(iter, level) !=
-               __btree_lock_want(iter, level));
+       EBUG_ON(btree_node_locked(path, level) &&
+               btree_node_locked_type(path, level) !=
+               __btree_lock_want(path, level));
 
-       return likely(btree_node_locked(iter, level)) ||
-               __bch2_btree_node_relock(trans, iter, level);
+       return likely(btree_node_locked(path, level)) ||
+               __bch2_btree_node_relock(trans, path, level);
 }
 
 /*
@@ -205,32 +206,32 @@ static inline bool bch2_btree_node_relock(struct btree_trans *trans,
  * succeed:
  */
 static inline void
-bch2_btree_node_unlock_write_inlined(struct btree_trans *trans, struct btree_iter *iter,
+bch2_btree_node_unlock_write_inlined(struct btree_trans *trans, struct btree_path *path,
                                     struct btree *b)
 {
-       struct btree_iter *linked;
+       struct btree_path *linked;
 
-       EBUG_ON(iter->l[b->c.level].b != b);
-       EBUG_ON(iter->l[b->c.level].lock_seq + 1 != b->c.lock.state.seq);
+       EBUG_ON(path->l[b->c.level].b != b);
+       EBUG_ON(path->l[b->c.level].lock_seq + 1 != b->c.lock.state.seq);
 
-       trans_for_each_iter_with_node(trans, b, linked)
+       trans_for_each_path_with_node(trans, b, linked)
                linked->l[b->c.level].lock_seq += 2;
 
        six_unlock_write(&b->c.lock);
 }
 
 void bch2_btree_node_unlock_write(struct btree_trans *,
-                       struct btree_iter *, struct btree *);
+                       struct btree_path *, struct btree *);
 
 void __bch2_btree_node_lock_write(struct btree_trans *, struct btree *);
 
 static inline void bch2_btree_node_lock_write(struct btree_trans *trans,
-                                             struct btree_iter *iter,
+                                             struct btree_path *path,
                                              struct btree *b)
 {
-       EBUG_ON(iter->l[b->c.level].b != b);
-       EBUG_ON(iter->l[b->c.level].lock_seq != b->c.lock.state.seq);
-       EBUG_ON(!btree_node_intent_locked(iter, b->c.level));
+       EBUG_ON(path->l[b->c.level].b != b);
+       EBUG_ON(path->l[b->c.level].lock_seq != b->c.lock.state.seq);
+       EBUG_ON(!btree_node_intent_locked(path, b->c.level));
 
        if (unlikely(!six_trylock_write(&b->c.lock)))
                __bch2_btree_node_lock_write(trans, b);
index 56dc5fbb7c91e729ad9e716cb5bf04935be0010f..b7cded2095fffa4069694ee0a0ded9608e4bda48 100644 (file)
@@ -210,7 +210,7 @@ struct btree_node_iter {
 #define __BTREE_ITER_ALL_SNAPSHOTS     (1 << 11)
 #define BTREE_ITER_ALL_SNAPSHOTS       (1 << 12)
 
-enum btree_iter_uptodate {
+enum btree_path_uptodate {
        BTREE_ITER_UPTODATE             = 0,
        BTREE_ITER_NEED_RELOCK          = 1,
        BTREE_ITER_NEED_TRAVERSE        = 2,
@@ -225,51 +225,66 @@ enum btree_iter_uptodate {
 #define BTREE_ITER_NO_NODE_ERROR       ((struct btree *) 7)
 #define BTREE_ITER_NO_NODE_CACHED      ((struct btree *) 8)
 
-/*
- * @pos                        - iterator's current position
- * @level              - current btree depth
- * @locks_want         - btree level below which we start taking intent locks
- * @nodes_locked       - bitmask indicating which nodes in @nodes are locked
- * @nodes_intent_locked        - bitmask indicating which locks are intent locks
- */
-struct btree_iter {
-       struct btree_trans      *trans;
-       unsigned long           ip_allocated;
-
+struct btree_path {
        u8                      idx;
-       u8                      child_idx;
        u8                      sorted_idx;
+       u8                      ref;
+       u8                      intent_ref;
 
        /* btree_iter_copy starts here: */
-       u16                     flags;
-
-       /* When we're filtering by snapshot, the snapshot ID we're looking for: */
-       unsigned                snapshot;
-
        struct bpos             pos;
-       struct bpos             real_pos;
 
        enum btree_id           btree_id:4;
        bool                    cached:1;
-       enum btree_iter_uptodate uptodate:2;
+       bool                    preserve:1;
+       enum btree_path_uptodate uptodate:2;
        /*
-        * True if we've returned a key (and thus are expected to keep it
-        * locked), false after set_pos - for avoiding spurious transaction
-        * restarts in bch2_trans_relock():
+        * When true, failing to relock this path will cause the transaction to
+        * restart:
         */
        bool                    should_be_locked:1;
-       unsigned                level:4,
-                               min_depth:4,
+       unsigned                level:3,
                                locks_want:4,
                                nodes_locked:4,
                                nodes_intent_locked:4;
 
-       struct btree_iter_level {
+       struct btree_path_level {
                struct btree    *b;
                struct btree_node_iter iter;
                u32             lock_seq;
        }                       l[BTREE_MAX_DEPTH];
+#ifdef CONFIG_BCACHEFS_DEBUG
+       unsigned long           ip_allocated;
+#endif
+};
 
+static inline struct btree_path_level *path_l(struct btree_path *path)
+{
+       return path->l + path->level;
+}
+
+/*
+ * @pos                        - iterator's current position
+ * @level              - current btree depth
+ * @locks_want         - btree level below which we start taking intent locks
+ * @nodes_locked       - bitmask indicating which nodes in @nodes are locked
+ * @nodes_intent_locked        - bitmask indicating which locks are intent locks
+ */
+struct btree_iter {
+       struct btree_trans      *trans;
+       struct btree_path       *path;
+
+       enum btree_id           btree_id:4;
+       unsigned                min_depth:4;
+
+       /* btree_iter_copy starts here: */
+       u16                     flags;
+
+       /* When we're filtering by snapshot, the snapshot ID we're looking for: */
+       unsigned                snapshot;
+
+       struct bpos             pos;
+       struct bpos             pos_after_commit;
        /*
         * Current unpacked key - so that bch2_btree_iter_next()/
         * bch2_btree_iter_next_slot() can correctly advance pos.
@@ -277,11 +292,6 @@ struct btree_iter {
        struct bkey             k;
 };
 
-static inline struct btree_iter_level *iter_l(struct btree_iter *iter)
-{
-       return iter->l + iter->level;
-}
-
 struct btree_key_cache {
        struct mutex            lock;
        struct rhashtable       table;
@@ -329,7 +339,7 @@ struct btree_insert_entry {
        bool                    cached:1;
        bool                    trans_triggers_run:1;
        struct bkey_i           *k;
-       struct btree_iter       *iter;
+       struct btree_path       *path;
        unsigned long           ip_allocated;
 };
 
@@ -354,7 +364,7 @@ struct btree_trans {
 #ifdef CONFIG_BCACHEFS_DEBUG
        struct list_head        list;
        struct btree            *locking;
-       unsigned                locking_iter_idx;
+       unsigned                locking_path_idx;
        struct bpos             locking_pos;
        u8                      locking_btree_id;
        u8                      locking_level;
@@ -369,23 +379,21 @@ struct btree_trans {
        bool                    error:1;
        bool                    in_traverse_all:1;
        bool                    restarted:1;
-       bool                    iters_sorted:1;
+       bool                    paths_sorted:1;
        /*
         * For when bch2_trans_update notices we'll be splitting a compressed
         * extent:
         */
        unsigned                extra_journal_res;
 
-       u64                     iters_linked;
-       u64                     iters_live;
-       u64                     iters_touched;
+       u64                     paths_allocated;
 
        unsigned                mem_top;
        unsigned                mem_bytes;
        void                    *mem;
 
        u8                      sorted[BTREE_ITER_MAX + 8];
-       struct btree_iter       *iters;
+       struct btree_path       *paths;
        struct btree_insert_entry *updates;
 
        /* update path: */
@@ -589,16 +597,6 @@ static inline bool btree_node_is_extents(struct btree *b)
        return btree_node_type_is_extents(btree_node_type(b));
 }
 
-static inline enum btree_node_type btree_iter_key_type(struct btree_iter *iter)
-{
-       return __btree_node_type(iter->level, iter->btree_id);
-}
-
-static inline bool btree_iter_is_extents(struct btree_iter *iter)
-{
-       return btree_node_type_is_extents(btree_iter_key_type(iter));
-}
-
 #define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS             \
        ((1U << BKEY_TYPE_extents)|                     \
         (1U << BKEY_TYPE_inodes)|                      \
index 5707baf10262298d94fc2bb8e1a5a5a40133dacd..058d283a105c58159ad7b9297330d54dd502aecd 100644 (file)
@@ -8,9 +8,9 @@
 struct bch_fs;
 struct btree;
 
-void bch2_btree_node_lock_for_insert(struct btree_trans *, struct btree_iter *,
+void bch2_btree_node_lock_for_insert(struct btree_trans *, struct btree_path *,
                                     struct btree *);
-bool bch2_btree_bset_insert_key(struct btree_trans *, struct btree_iter *,
+bool bch2_btree_bset_insert_key(struct btree_trans *, struct btree_path *,
                                struct btree *, struct btree_node_iter *,
                                struct bkey_i *);
 void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64);
@@ -135,4 +135,13 @@ static inline int bch2_trans_commit(struct btree_trans *trans,
             (_i) < (_trans)->updates + (_trans)->nr_updates;           \
             (_i)++)
 
+struct bkey_i *__bch2_btree_trans_peek_updates(struct btree_iter *);
+
+static inline struct bkey_i *btree_trans_peek_updates(struct btree_iter *iter)
+{
+       return iter->flags & BTREE_ITER_WITH_UPDATES
+               ? __bch2_btree_trans_peek_updates(iter)
+               : NULL;
+}
+
 #endif /* _BCACHEFS_BTREE_UPDATE_H */
index 652f08dea8040baf1d5d55ffa4323d063be7ff7f..6dcce175fd8bb06ce87d9afbc593dc5cda1d54d2 100644 (file)
@@ -23,7 +23,7 @@
 #include <linux/random.h>
 
 static void bch2_btree_insert_node(struct btree_update *, struct btree_trans *,
-                                  struct btree_iter *, struct btree *,
+                                  struct btree_path *, struct btree *,
                                   struct keylist *, unsigned);
 static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *);
 
@@ -162,10 +162,10 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans,
                                       struct btree *b)
 {
        struct bch_fs *c = trans->c;
-       struct btree_iter *iter;
+       struct btree_path *path;
 
-       trans_for_each_iter(trans, iter)
-               BUG_ON(iter->l[b->c.level].b == b);
+       trans_for_each_path(trans, path)
+               BUG_ON(path->l[b->c.level].b == b);
 
        six_lock_write(&b->c.lock, NULL, NULL);
 
@@ -914,7 +914,7 @@ static void bch2_btree_update_done(struct btree_update *as)
 }
 
 static struct btree_update *
-bch2_btree_update_start(struct btree_trans *trans, struct btree_iter *iter,
+bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
                        unsigned level, unsigned nr_nodes, unsigned flags)
 {
        struct bch_fs *c = trans->c;
@@ -925,7 +925,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_iter *iter,
        int journal_flags = 0;
        int ret = 0;
 
-       BUG_ON(!iter->should_be_locked);
+       BUG_ON(!path->should_be_locked);
 
        if (flags & BTREE_INSERT_JOURNAL_RESERVED)
                journal_flags |= JOURNAL_RES_GET_RESERVED;
@@ -937,11 +937,11 @@ retry:
         * XXX: figure out how far we might need to split,
         * instead of locking/reserving all the way to the root:
         */
-       if (!bch2_btree_iter_upgrade(trans, iter, U8_MAX)) {
+       if (!bch2_btree_path_upgrade(trans, path, U8_MAX)) {
                trace_trans_restart_iter_upgrade(trans->ip, _RET_IP_,
-                                                iter->btree_id,
-                                                &iter->real_pos);
-               return ERR_PTR(-EINTR);
+                                                path->btree_id, &path->pos);
+               ret = btree_trans_restart(trans);
+               return ERR_PTR(ret);
        }
 
        if (flags & BTREE_INSERT_GC_LOCK_HELD)
@@ -961,7 +961,7 @@ retry:
        as->c           = c;
        as->mode        = BTREE_INTERIOR_NO_UPDATE;
        as->took_gc_lock = !(flags & BTREE_INSERT_GC_LOCK_HELD);
-       as->btree_id    = iter->btree_id;
+       as->btree_id    = path->btree_id;
        INIT_LIST_HEAD(&as->list);
        INIT_LIST_HEAD(&as->unwritten_list);
        INIT_LIST_HEAD(&as->write_blocked_list);
@@ -1081,7 +1081,7 @@ static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b)
  */
 static void bch2_btree_set_root(struct btree_update *as,
                                struct btree_trans *trans,
-                               struct btree_iter *iter,
+                               struct btree_path *path,
                                struct btree *b)
 {
        struct bch_fs *c = as->c;
@@ -1097,7 +1097,7 @@ static void bch2_btree_set_root(struct btree_update *as,
         * Ensure no one is using the old root while we switch to the
         * new root:
         */
-       bch2_btree_node_lock_write(trans, iter, old);
+       bch2_btree_node_lock_write(trans, path, old);
 
        bch2_btree_set_root_inmem(c, b);
 
@@ -1110,14 +1110,14 @@ static void bch2_btree_set_root(struct btree_update *as,
         * an intent lock on the new root, and any updates that would
         * depend on the new root would have to update the new root.
         */
-       bch2_btree_node_unlock_write(trans, iter, old);
+       bch2_btree_node_unlock_write(trans, path, old);
 }
 
 /* Interior node updates: */
 
 static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
                                        struct btree_trans *trans,
-                                       struct btree_iter *iter,
+                                       struct btree_path *path,
                                        struct btree *b,
                                        struct btree_node_iter *node_iter,
                                        struct bkey_i *insert)
@@ -1152,7 +1152,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
               bkey_iter_pos_cmp(b, k, &insert->k.p) < 0)
                bch2_btree_node_iter_advance(node_iter, b);
 
-       bch2_btree_bset_insert_key(trans, iter, b, node_iter, insert);
+       bch2_btree_bset_insert_key(trans, path, b, node_iter, insert);
        set_btree_node_dirty(c, b);
        set_btree_node_need_write(b);
 }
@@ -1160,7 +1160,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
 static void
 __bch2_btree_insert_keys_interior(struct btree_update *as,
                                  struct btree_trans *trans,
-                                 struct btree_iter *iter,
+                                 struct btree_path *path,
                                  struct btree *b,
                                  struct btree_node_iter node_iter,
                                  struct keylist *keys)
@@ -1175,7 +1175,7 @@ __bch2_btree_insert_keys_interior(struct btree_update *as,
                ;
 
        while (!bch2_keylist_empty(keys)) {
-               bch2_insert_fixup_btree_ptr(as, trans, iter, b,
+               bch2_insert_fixup_btree_ptr(as, trans, path, b,
                                &node_iter, bch2_keylist_front(keys));
                bch2_keylist_pop_front(keys);
        }
@@ -1186,8 +1186,7 @@ __bch2_btree_insert_keys_interior(struct btree_update *as,
  * node)
  */
 static struct btree *__btree_split_node(struct btree_update *as,
-                                       struct btree *n1,
-                                       struct btree_iter *iter)
+                                       struct btree *n1)
 {
        struct bkey_format_state s;
        size_t nr_packed = 0, nr_unpacked = 0;
@@ -1304,7 +1303,7 @@ static struct btree *__btree_split_node(struct btree_update *as,
  */
 static void btree_split_insert_keys(struct btree_update *as,
                                    struct btree_trans *trans,
-                                   struct btree_iter *iter,
+                                   struct btree_path *path,
                                    struct btree *b,
                                    struct keylist *keys)
 {
@@ -1315,7 +1314,7 @@ static void btree_split_insert_keys(struct btree_update *as,
 
        bch2_btree_node_iter_init(&node_iter, b, &k->k.p);
 
-       __bch2_btree_insert_keys_interior(as, trans, iter, b, node_iter, keys);
+       __bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys);
 
        /*
         * We can't tolerate whiteouts here - with whiteouts there can be
@@ -1345,18 +1344,17 @@ static void btree_split_insert_keys(struct btree_update *as,
        btree_node_interior_verify(as->c, b);
 }
 
-static void btree_split(struct btree_update *as,
-                       struct btree_trans *trans, struct btree_iter *iter,
-                       struct btree *b, struct keylist *keys,
-                       unsigned flags)
+static void btree_split(struct btree_update *as, struct btree_trans *trans,
+                       struct btree_path *path, struct btree *b,
+                       struct keylist *keys, unsigned flags)
 {
        struct bch_fs *c = as->c;
-       struct btree *parent = btree_node_parent(iter, b);
+       struct btree *parent = btree_node_parent(path, b);
        struct btree *n1, *n2 = NULL, *n3 = NULL;
        u64 start_time = local_clock();
 
        BUG_ON(!parent && (b != btree_node_root(c, b)));
-       BUG_ON(!btree_node_intent_locked(iter, btree_node_root(c, b)->c.level));
+       BUG_ON(!btree_node_intent_locked(path, btree_node_root(c, b)->c.level));
 
        bch2_btree_interior_update_will_free_node(as, b);
 
@@ -1364,12 +1362,12 @@ static void btree_split(struct btree_update *as,
        bch2_btree_update_add_new_node(as, n1);
 
        if (keys)
-               btree_split_insert_keys(as, trans, iter, n1, keys);
+               btree_split_insert_keys(as, trans, path, n1, keys);
 
        if (bset_u64s(&n1->set[0]) > BTREE_SPLIT_THRESHOLD(c)) {
                trace_btree_split(c, b);
 
-               n2 = __btree_split_node(as, n1, iter);
+               n2 = __btree_split_node(as, n1);
 
                bch2_btree_build_aux_trees(n2);
                bch2_btree_build_aux_trees(n1);
@@ -1394,7 +1392,7 @@ static void btree_split(struct btree_update *as,
                        n3->sib_u64s[0] = U16_MAX;
                        n3->sib_u64s[1] = U16_MAX;
 
-                       btree_split_insert_keys(as, trans, iter, n3, &as->parent_keys);
+                       btree_split_insert_keys(as, trans, path, n3, &as->parent_keys);
 
                        bch2_btree_node_write(c, n3, SIX_LOCK_intent);
                }
@@ -1414,12 +1412,12 @@ static void btree_split(struct btree_update *as,
 
        if (parent) {
                /* Split a non root node */
-               bch2_btree_insert_node(as, trans, iter, parent, &as->parent_keys, flags);
+               bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags);
        } else if (n3) {
-               bch2_btree_set_root(as, trans, iter, n3);
+               bch2_btree_set_root(as, trans, path, n3);
        } else {
                /* Root filled up but didn't need to be split */
-               bch2_btree_set_root(as, trans, iter, n1);
+               bch2_btree_set_root(as, trans, path, n1);
        }
 
        bch2_btree_update_get_open_buckets(as, n1);
@@ -1428,7 +1426,7 @@ static void btree_split(struct btree_update *as,
        if (n3)
                bch2_btree_update_get_open_buckets(as, n3);
 
-       /* Successful split, update the iterator to point to the new nodes: */
+       /* Successful split, update the path to point to the new nodes: */
 
        six_lock_increment(&b->c.lock, SIX_LOCK_intent);
        bch2_trans_node_drop(trans, b);
@@ -1461,21 +1459,21 @@ static void btree_split(struct btree_update *as,
 static void
 bch2_btree_insert_keys_interior(struct btree_update *as,
                                struct btree_trans *trans,
-                               struct btree_iter *iter,
+                               struct btree_path *path,
                                struct btree *b,
                                struct keylist *keys)
 {
-       struct btree_iter *linked;
+       struct btree_path *linked;
 
-       __bch2_btree_insert_keys_interior(as, trans, iter, b,
-                                         iter->l[b->c.level].iter, keys);
+       __bch2_btree_insert_keys_interior(as, trans, path, b,
+                                         path->l[b->c.level].iter, keys);
 
        btree_update_updated_node(as, b);
 
-       trans_for_each_iter_with_node(trans, b, linked)
+       trans_for_each_path_with_node(trans, b, linked)
                bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b);
 
-       bch2_trans_verify_iters(trans, b);
+       bch2_trans_verify_paths(trans);
 }
 
 /**
@@ -1490,10 +1488,9 @@ bch2_btree_insert_keys_interior(struct btree_update *as,
  * If a split occurred, this function will return early. This can only happen
  * for leaf nodes -- inserts into interior nodes have to be atomic.
  */
-static void bch2_btree_insert_node(struct btree_update *as,
-                                  struct btree_trans *trans, struct btree_iter *iter,
-                                  struct btree *b, struct keylist *keys,
-                                  unsigned flags)
+static void bch2_btree_insert_node(struct btree_update *as, struct btree_trans *trans,
+                                  struct btree_path *path, struct btree *b,
+                                  struct keylist *keys, unsigned flags)
 {
        struct bch_fs *c = as->c;
        int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s);
@@ -1501,21 +1498,21 @@ static void bch2_btree_insert_node(struct btree_update *as,
        int live_u64s_added, u64s_added;
 
        lockdep_assert_held(&c->gc_lock);
-       BUG_ON(!btree_node_intent_locked(iter, btree_node_root(c, b)->c.level));
+       BUG_ON(!btree_node_intent_locked(path, btree_node_root(c, b)->c.level));
        BUG_ON(!b->c.level);
        BUG_ON(!as || as->b);
        bch2_verify_keylist_sorted(keys);
 
-       bch2_btree_node_lock_for_insert(trans, iter, b);
+       bch2_btree_node_lock_for_insert(trans, path, b);
 
        if (!bch2_btree_node_insert_fits(c, b, bch2_keylist_u64s(keys))) {
-               bch2_btree_node_unlock_write(trans, iter, b);
+               bch2_btree_node_unlock_write(trans, path, b);
                goto split;
        }
 
        btree_node_interior_verify(c, b);
 
-       bch2_btree_insert_keys_interior(as, trans, iter, b, keys);
+       bch2_btree_insert_keys_interior(as, trans, path, b, keys);
 
        live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;
        u64s_added = (int) le16_to_cpu(btree_bset_last(b)->u64s) - old_u64s;
@@ -1529,46 +1526,46 @@ static void bch2_btree_insert_node(struct btree_update *as,
            bch2_maybe_compact_whiteouts(c, b))
                bch2_trans_node_reinit_iter(trans, b);
 
-       bch2_btree_node_unlock_write(trans, iter, b);
+       bch2_btree_node_unlock_write(trans, path, b);
 
        btree_node_interior_verify(c, b);
        return;
 split:
-       btree_split(as, trans, iter, b, keys, flags);
+       btree_split(as, trans, path, b, keys, flags);
 }
 
 int bch2_btree_split_leaf(struct btree_trans *trans,
-                         struct btree_iter *iter,
+                         struct btree_path *path,
                          unsigned flags)
 {
        struct bch_fs *c = trans->c;
-       struct btree *b = iter_l(iter)->b;
+       struct btree *b = path_l(path)->b;
        struct btree_update *as;
        unsigned l;
        int ret = 0;
 
-       as = bch2_btree_update_start(trans, iter, iter->level,
+       as = bch2_btree_update_start(trans, path, path->level,
                btree_update_reserve_required(c, b), flags);
        if (IS_ERR(as))
                return PTR_ERR(as);
 
-       btree_split(as, trans, iter, b, NULL, flags);
+       btree_split(as, trans, path, b, NULL, flags);
        bch2_btree_update_done(as);
 
-       for (l = iter->level + 1; btree_iter_node(iter, l) && !ret; l++)
-               ret = bch2_foreground_maybe_merge(trans, iter, l, flags);
+       for (l = path->level + 1; btree_path_node(path, l) && !ret; l++)
+               ret = bch2_foreground_maybe_merge(trans, path, l, flags);
 
        return ret;
 }
 
 int __bch2_foreground_maybe_merge(struct btree_trans *trans,
-                                 struct btree_iter *iter,
+                                 struct btree_path *path,
                                  unsigned level,
                                  unsigned flags,
                                  enum btree_node_sibling sib)
 {
        struct bch_fs *c = trans->c;
-       struct btree_iter *sib_iter = NULL;
+       struct btree_path *sib_path = NULL;
        struct btree_update *as;
        struct bkey_format_state new_s;
        struct bkey_format new_f;
@@ -1579,14 +1576,14 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
        int ret = 0, ret2 = 0;
 
 retry:
-       ret = bch2_btree_iter_traverse(iter);
+       ret = bch2_btree_path_traverse(trans, path, false);
        if (ret)
                return ret;
 
-       BUG_ON(!iter->should_be_locked);
-       BUG_ON(!btree_node_locked(iter, level));
+       BUG_ON(!path->should_be_locked);
+       BUG_ON(!btree_node_locked(path, level));
 
-       b = iter->l[level].b;
+       b = path->l[level].b;
 
        if ((sib == btree_prev_sib && !bpos_cmp(b->data->min_key, POS_MIN)) ||
            (sib == btree_next_sib && !bpos_cmp(b->data->max_key, SPOS_MAX))) {
@@ -1598,17 +1595,18 @@ retry:
                ? bpos_predecessor(b->data->min_key)
                : bpos_successor(b->data->max_key);
 
-       sib_iter = bch2_trans_get_node_iter(trans, iter->btree_id,
-                                           sib_pos, U8_MAX, level,
-                                           BTREE_ITER_INTENT);
-       ret = bch2_btree_iter_traverse(sib_iter);
+       sib_path = bch2_path_get(trans, false, path->btree_id,
+                                sib_pos, U8_MAX, level, true);
+       ret = bch2_btree_path_traverse(trans, sib_path, false);
        if (ret)
                goto err;
 
-       m = sib_iter->l[level].b;
+       sib_path->should_be_locked = true;
+
+       m = sib_path->l[level].b;
 
-       if (btree_node_parent(iter, b) !=
-           btree_node_parent(sib_iter, m)) {
+       if (btree_node_parent(path, b) !=
+           btree_node_parent(sib_path, m)) {
                b->sib_u64s[sib] = U16_MAX;
                goto out;
        }
@@ -1659,8 +1657,8 @@ retry:
        if (b->sib_u64s[sib] > c->btree_foreground_merge_threshold)
                goto out;
 
-       parent = btree_node_parent(iter, b);
-       as = bch2_btree_update_start(trans, iter, level,
+       parent = btree_node_parent(path, b);
+       as = bch2_btree_update_start(trans, path, level,
                         btree_update_reserve_required(c, parent) + 1,
                         flags|
                         BTREE_INSERT_NOFAIL|
@@ -1696,7 +1694,7 @@ retry:
        bch2_keylist_add(&as->parent_keys, &delete);
        bch2_keylist_add(&as->parent_keys, &n->key);
 
-       bch2_btree_insert_node(as, trans, iter, parent, &as->parent_keys, flags);
+       bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags);
 
        bch2_btree_update_get_open_buckets(as, n);
 
@@ -1707,7 +1705,7 @@ retry:
 
        bch2_trans_node_add(trans, n);
 
-       bch2_trans_verify_iters(trans, n);
+       bch2_trans_verify_paths(trans);
 
        bch2_btree_node_free_inmem(trans, b);
        bch2_btree_node_free_inmem(trans, m);
@@ -1717,7 +1715,8 @@ retry:
        bch2_btree_update_done(as);
 out:
        bch2_trans_verify_locks(trans);
-       bch2_trans_iter_free(trans, sib_iter);
+       if (sib_path)
+               bch2_path_put(trans, sib_path, true);
 
        /*
         * Don't downgrade locks here: we're called after successful insert,
@@ -1730,8 +1729,9 @@ out:
         */
        return ret ?: ret2;
 err:
-       bch2_trans_iter_put(trans, sib_iter);
-       sib_iter = NULL;
+       if (sib_path)
+               bch2_path_put(trans, sib_path, true);
+       sib_path = NULL;
 
        if (ret == -EINTR && bch2_trans_relock(trans))
                goto retry;
@@ -1761,8 +1761,8 @@ retry:
        if (!b || b->data->keys.seq != seq)
                goto out;
 
-       parent = btree_node_parent(iter, b);
-       as = bch2_btree_update_start(trans, iter, b->c.level,
+       parent = btree_node_parent(iter->path, b);
+       as = bch2_btree_update_start(trans, iter->path, b->c.level,
                (parent
                 ? btree_update_reserve_required(c, parent)
                 : 0) + 1,
@@ -1789,10 +1789,10 @@ retry:
 
        if (parent) {
                bch2_keylist_add(&as->parent_keys, &n->key);
-               bch2_btree_insert_node(as, trans, iter, parent,
+               bch2_btree_insert_node(as, trans, iter->path, parent,
                                       &as->parent_keys, flags);
        } else {
-               bch2_btree_set_root(as, trans, iter, n);
+               bch2_btree_set_root(as, trans, iter->path, n);
        }
 
        bch2_btree_update_get_open_buckets(as, n);
@@ -1805,7 +1805,7 @@ retry:
 
        bch2_btree_update_done(as);
 out:
-       bch2_btree_iter_downgrade(iter);
+       bch2_btree_path_downgrade(iter->path);
        return ret;
 }
 
@@ -1824,13 +1824,13 @@ void async_btree_node_rewrite_work(struct work_struct *work)
                container_of(work, struct async_btree_rewrite, work);
        struct bch_fs *c = a->c;
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
 
        bch2_trans_init(&trans, c, 0, 0);
-       iter = bch2_trans_get_node_iter(&trans, a->btree_id, a->pos,
+       bch2_trans_node_iter_init(&trans, &iter, a->btree_id, a->pos,
                                        BTREE_MAX_DEPTH, a->level, 0);
-       bch2_btree_node_rewrite(&trans, iter, a->seq, 0);
-       bch2_trans_iter_put(&trans, iter);
+       bch2_btree_node_rewrite(&trans, &iter, a->seq, 0);
+       bch2_trans_iter_exit(&trans, &iter);
        bch2_trans_exit(&trans);
        percpu_ref_put(&c->writes);
        kfree(a);
@@ -1869,7 +1869,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
                                        bool skip_triggers)
 {
        struct bch_fs *c = trans->c;
-       struct btree_iter *iter2 = NULL;
+       struct btree_iter iter2 = { NULL };
        struct btree *parent;
        u64 journal_entries[BKEY_BTREE_PTR_U64s_MAX];
        int ret;
@@ -1897,19 +1897,22 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
                BUG_ON(ret);
        }
 
-       parent = btree_node_parent(iter, b);
+       parent = btree_node_parent(iter->path, b);
        if (parent) {
-               iter2 = bch2_trans_copy_iter(trans, iter);
+               bch2_trans_copy_iter(&iter2, iter);
 
-               BUG_ON(iter2->level != b->c.level);
-               BUG_ON(bpos_cmp(iter2->pos, new_key->k.p));
+               iter2.path = bch2_btree_path_make_mut(trans, iter2.path,
+                               iter2.flags & BTREE_ITER_INTENT);
 
-               btree_node_unlock(iter2, iter2->level);
-               iter2->l[iter2->level].b = BTREE_ITER_NO_NODE_UP;
-               iter2->level++;
+               BUG_ON(iter2.path->level != b->c.level);
+               BUG_ON(bpos_cmp(iter2.path->pos, new_key->k.p));
 
-               ret   = bch2_btree_iter_traverse(iter2) ?:
-                       bch2_trans_update(trans, iter2, new_key, BTREE_TRIGGER_NORUN);
+               btree_node_unlock(iter2.path, iter2.path->level);
+               path_l(iter2.path)->b = BTREE_ITER_NO_NODE_UP;
+               iter2.path->level++;
+
+               ret   = bch2_btree_iter_traverse(&iter2) ?:
+                       bch2_trans_update(trans, &iter2, new_key, BTREE_TRIGGER_NORUN);
                if (ret)
                        goto err;
        } else {
@@ -1931,7 +1934,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
        if (ret)
                goto err;
 
-       bch2_btree_node_lock_write(trans, iter, b);
+       bch2_btree_node_lock_write(trans, iter->path, b);
 
        if (new_hash) {
                mutex_lock(&c->btree_cache.lock);
@@ -1946,9 +1949,9 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
                bkey_copy(&b->key, new_key);
        }
 
-       bch2_btree_node_unlock_write(trans, iter, b);
+       bch2_btree_node_unlock_write(trans, iter->path, b);
 out:
-       bch2_trans_iter_put(trans, iter2);
+       bch2_trans_iter_exit(trans, &iter2);
        return ret;
 err:
        if (new_hash) {
@@ -2006,18 +2009,18 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *trans,
                                        struct btree *b, struct bkey_i *new_key,
                                        bool skip_triggers)
 {
-       struct btree_iter *iter;
+       struct btree_iter iter;
        int ret;
 
-       iter = bch2_trans_get_node_iter(trans, b->c.btree_id, b->key.k.p,
-                                       BTREE_MAX_DEPTH, b->c.level,
-                                       BTREE_ITER_INTENT);
-       ret = bch2_btree_iter_traverse(iter);
+       bch2_trans_node_iter_init(trans, &iter, b->c.btree_id, b->key.k.p,
+                                 BTREE_MAX_DEPTH, b->c.level,
+                                 BTREE_ITER_INTENT);
+       ret = bch2_btree_iter_traverse(&iter);
        if (ret)
                goto out;
 
        /* has node been freed? */
-       if (iter->l[b->c.level].b != b) {
+       if (iter.path->l[b->c.level].b != b) {
                /* node has been freed: */
                BUG_ON(!btree_node_dying(b));
                goto out;
@@ -2025,9 +2028,9 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *trans,
 
        BUG_ON(!btree_node_hashed(b));
 
-       ret = bch2_btree_node_update_key(trans, iter, b, new_key, skip_triggers);
+       ret = bch2_btree_node_update_key(trans, &iter, b, new_key, skip_triggers);
 out:
-       bch2_trans_iter_put(trans, iter);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
index 13b3a1bf0f4fbb6fce8d11f42f4887ccf4dc09fc..c06cfcc66db7d15a6e1cde1c51e5c5a29ac25266 100644 (file)
@@ -117,39 +117,39 @@ struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *,
                                                  struct btree *,
                                                  struct bkey_format);
 
-int bch2_btree_split_leaf(struct btree_trans *, struct btree_iter *, unsigned);
+int bch2_btree_split_leaf(struct btree_trans *, struct btree_path *, unsigned);
 
-int __bch2_foreground_maybe_merge(struct btree_trans *, struct btree_iter *,
+int __bch2_foreground_maybe_merge(struct btree_trans *, struct btree_path *,
                                  unsigned, unsigned, enum btree_node_sibling);
 
 static inline int bch2_foreground_maybe_merge_sibling(struct btree_trans *trans,
-                                       struct btree_iter *iter,
+                                       struct btree_path *path,
                                        unsigned level, unsigned flags,
                                        enum btree_node_sibling sib)
 {
        struct btree *b;
 
-       if (iter->uptodate >= BTREE_ITER_NEED_TRAVERSE)
+       if (path->uptodate >= BTREE_ITER_NEED_TRAVERSE)
                return 0;
 
-       if (!bch2_btree_node_relock(trans, iter, level))
+       if (!bch2_btree_node_relock(trans, path, level))
                return 0;
 
-       b = iter->l[level].b;
+       b = path->l[level].b;
        if (b->sib_u64s[sib] > trans->c->btree_foreground_merge_threshold)
                return 0;
 
-       return __bch2_foreground_maybe_merge(trans, iter, level, flags, sib);
+       return __bch2_foreground_maybe_merge(trans, path, level, flags, sib);
 }
 
 static inline int bch2_foreground_maybe_merge(struct btree_trans *trans,
-                                             struct btree_iter *iter,
+                                             struct btree_path *path,
                                              unsigned level,
                                              unsigned flags)
 {
-       return  bch2_foreground_maybe_merge_sibling(trans, iter, level, flags,
+       return  bch2_foreground_maybe_merge_sibling(trans, path, level, flags,
                                                    btree_prev_sib) ?:
-               bch2_foreground_maybe_merge_sibling(trans, iter, level, flags,
+               bch2_foreground_maybe_merge_sibling(trans, path, level, flags,
                                                    btree_next_sib);
 }
 
index 5e57ff5a5ceba279845ea83e37e93d29ac7d9bd5..4fb5a5666e200a8450d32c33e1c609e5f0eb3797 100644 (file)
@@ -29,9 +29,9 @@ static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l,
                 bpos_cmp(l->k->k.p,    r->k->k.p);
 }
 
-static inline struct btree_iter_level *insert_l(struct btree_insert_entry *i)
+static inline struct btree_path_level *insert_l(struct btree_insert_entry *i)
 {
-       return i->iter->l + i->level;
+       return i->path->l + i->level;
 }
 
 static inline bool same_leaf_as_prev(struct btree_trans *trans,
@@ -49,14 +49,14 @@ static inline bool same_leaf_as_next(struct btree_trans *trans,
 }
 
 inline void bch2_btree_node_lock_for_insert(struct btree_trans *trans,
-                                           struct btree_iter *iter,
+                                           struct btree_path *path,
                                            struct btree *b)
 {
        struct bch_fs *c = trans->c;
 
-       bch2_btree_node_lock_write(trans, iter, b);
+       bch2_btree_node_lock_write(trans, path, b);
 
-       if (iter->cached)
+       if (path->cached)
                return;
 
        if (unlikely(btree_node_just_written(b)) &&
@@ -75,7 +75,7 @@ inline void bch2_btree_node_lock_for_insert(struct btree_trans *trans,
 
 /* Handle overwrites and do insert, for non extents: */
 bool bch2_btree_bset_insert_key(struct btree_trans *trans,
-                               struct btree_iter *iter,
+                               struct btree_path *path,
                                struct btree *b,
                                struct btree_node_iter *node_iter,
                                struct bkey_i *insert)
@@ -116,7 +116,7 @@ bool bch2_btree_bset_insert_key(struct btree_trans *trans,
                        bch2_bset_delete(b, k, clobber_u64s);
                        goto fix_iter;
                } else {
-                       bch2_btree_iter_fix_key_modified(trans, b, k);
+                       bch2_btree_path_fix_key_modified(trans, b, k);
                }
 
                return true;
@@ -134,7 +134,7 @@ bool bch2_btree_bset_insert_key(struct btree_trans *trans,
                        clobber_u64s = k->u64s;
                        goto overwrite;
                } else {
-                       bch2_btree_iter_fix_key_modified(trans, b, k);
+                       bch2_btree_path_fix_key_modified(trans, b, k);
                }
        }
 
@@ -144,7 +144,7 @@ overwrite:
        new_u64s = k->u64s;
 fix_iter:
        if (clobber_u64s != new_u64s)
-               bch2_btree_node_iter_fix(trans, iter, b, node_iter, k,
+               bch2_btree_node_iter_fix(trans, path, b, node_iter, k,
                                         clobber_u64s, new_u64s);
        return true;
 }
@@ -201,7 +201,7 @@ static bool btree_insert_key_leaf(struct btree_trans *trans,
        EBUG_ON(!insert->level &&
                !test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags));
 
-       if (unlikely(!bch2_btree_bset_insert_key(trans, insert->iter, b,
+       if (unlikely(!bch2_btree_bset_insert_key(trans, insert->path, b,
                                        &insert_l(insert)->iter, insert->k)))
                return false;
 
@@ -236,9 +236,10 @@ static bool btree_insert_key_leaf(struct btree_trans *trans,
 static inline void btree_insert_entry_checks(struct btree_trans *trans,
                                             struct btree_insert_entry *i)
 {
-       BUG_ON(bpos_cmp(i->k->k.p, i->iter->real_pos));
-       BUG_ON(i->level         != i->iter->level);
-       BUG_ON(i->btree_id      != i->iter->btree_id);
+       BUG_ON(bpos_cmp(i->k->k.p, i->path->pos));
+       BUG_ON(i->cached        != i->path->cached);
+       BUG_ON(i->level         != i->path->level);
+       BUG_ON(i->btree_id      != i->path->btree_id);
 }
 
 static noinline int
@@ -293,14 +294,14 @@ btree_key_can_insert(struct btree_trans *trans,
 
 static enum btree_insert_ret
 btree_key_can_insert_cached(struct btree_trans *trans,
-                           struct btree_iter *iter,
+                           struct btree_path *path,
                            unsigned u64s)
 {
-       struct bkey_cached *ck = (void *) iter->l[0].b;
+       struct bkey_cached *ck = (void *) path->l[0].b;
        unsigned new_u64s;
        struct bkey_i *new_k;
 
-       EBUG_ON(iter->level);
+       EBUG_ON(path->level);
 
        if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) &&
            bch2_btree_key_cache_must_wait(trans->c) &&
@@ -340,7 +341,7 @@ static inline void do_btree_insert_one(struct btree_trans *trans,
 
        did_work = !i->cached
                ? btree_insert_key_leaf(trans, i)
-               : bch2_btree_insert_key_cached(trans, i->iter, i->k);
+               : bch2_btree_insert_key_cached(trans, i->path, i->k);
        if (!did_work)
                return;
 
@@ -366,11 +367,12 @@ static noinline void bch2_trans_mark_gc(struct btree_trans *trans)
        trans_for_each_update(trans, i) {
                /*
                 * XXX: synchronization of cached update triggers with gc
+                * XXX: synchronization of interior node updates with gc
                 */
                BUG_ON(i->cached || i->level);
 
                if (gc_visited(c, gc_pos_btree_node(insert_l(i)->b)))
-                       bch2_mark_update(trans, i->iter, i->k,
+                       bch2_mark_update(trans, i->path, i->k,
                                         i->flags|BTREE_TRIGGER_GC);
        }
 }
@@ -417,7 +419,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
                u64s += i->k->k.u64s;
                ret = !i->cached
                        ? btree_key_can_insert(trans, insert_l(i)->b, u64s)
-                       : btree_key_can_insert_cached(trans, i->iter, u64s);
+                       : btree_key_can_insert_cached(trans, i->path, u64s);
                if (ret) {
                        *stopped_at = i;
                        return ret;
@@ -476,7 +478,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
 
        trans_for_each_update(trans, i)
                if (BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & (1U << i->bkey_type))
-                       bch2_mark_update(trans, i->iter, i->k, i->flags);
+                       bch2_mark_update(trans, i->path, i->k, i->flags);
 
        if (marking && trans->fs_usage_deltas)
                bch2_trans_fs_usage_apply(trans, trans->fs_usage_deltas);
@@ -503,11 +505,13 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
 {
        struct bch_fs *c = trans->c;
        struct btree_insert_entry *i;
-       struct btree_iter *iter;
+       struct btree_path *path;
        struct bkey_s_c old;
        int ret, u64s_delta = 0;
 
        trans_for_each_update(trans, i) {
+               struct bkey u;
+
                /*
                 * peek_slot() doesn't yet work on iterators that point to
                 * interior nodes:
@@ -515,7 +519,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
                if (i->cached || i->level)
                        continue;
 
-               old = bch2_btree_iter_peek_slot(i->iter);
+               old = bch2_btree_path_peek_slot(i->path, &u);
                ret = bkey_err(old);
                if (unlikely(ret))
                        return ret;
@@ -525,7 +529,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
 
                if (!same_leaf_as_next(trans, i)) {
                        if (u64s_delta <= 0) {
-                               ret = bch2_foreground_maybe_merge(trans, i->iter,
+                               ret = bch2_foreground_maybe_merge(trans, i->path,
                                                        i->level, trans->flags);
                                if (unlikely(ret))
                                        return ret;
@@ -536,7 +540,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
        }
 
        trans_for_each_update(trans, i)
-               BUG_ON(!btree_node_intent_locked(i->iter, i->level));
+               BUG_ON(!btree_node_intent_locked(i->path, i->level));
 
        ret = bch2_journal_preres_get(&c->journal,
                        &trans->journal_preres, trans->journal_preres_u64s,
@@ -560,14 +564,12 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
         * or anything else that might call bch2_trans_relock(), since that
         * would just retake the read locks:
         */
-       trans_for_each_iter(trans, iter)
-               if (iter->nodes_locked != iter->nodes_intent_locked &&
-                   !bch2_btree_iter_upgrade(trans, iter, 1)) {
+       trans_for_each_path(trans, path)
+               if (path->nodes_locked != path->nodes_intent_locked &&
+                   !bch2_btree_path_upgrade(trans, path, path->level + 1)) {
                        trace_trans_restart_upgrade(trans->ip, trace_ip,
-                                                   iter->btree_id,
-                                                   &iter->real_pos);
-                       trans->restarted = true;
-                       return -EINTR;
+                                                   path->btree_id, &path->pos);
+                       return btree_trans_restart(trans);
                }
 
        trans_for_each_update(trans, i) {
@@ -581,6 +583,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
                                buf, (void *) trans->ip,
                                (void *) i->ip_allocated, invalid);
                        bch2_fatal_error(c);
+                       return -EINVAL;
                }
                btree_insert_entry_checks(trans, i);
        }
@@ -588,14 +591,14 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
 
        trans_for_each_update(trans, i)
                if (!same_leaf_as_prev(trans, i))
-                       bch2_btree_node_lock_for_insert(trans, i->iter,
+                       bch2_btree_node_lock_for_insert(trans, i->path,
                                        insert_l(i)->b);
 
        ret = bch2_trans_commit_write_locked(trans, stopped_at, trace_ip);
 
        trans_for_each_update(trans, i)
                if (!same_leaf_as_prev(trans, i))
-                       bch2_btree_node_unlock_write_inlined(trans, i->iter,
+                       bch2_btree_node_unlock_write_inlined(trans, i->path,
                                                        insert_l(i)->b);
 
        if (!ret && trans->journal_pin)
@@ -635,13 +638,13 @@ int bch2_trans_commit_error(struct btree_trans *trans,
 
        switch (ret) {
        case BTREE_INSERT_BTREE_NODE_FULL:
-               ret = bch2_btree_split_leaf(trans, i->iter, trans->flags);
+               ret = bch2_btree_split_leaf(trans, i->path, trans->flags);
                if (!ret)
                        return 0;
 
                if (ret == -EINTR)
                        trace_trans_restart_btree_node_split(trans->ip, trace_ip,
-                                               i->btree_id, &i->iter->real_pos);
+                                               i->btree_id, &i->path->pos);
                break;
        case BTREE_INSERT_NEED_MARK_REPLICAS:
                bch2_trans_unlock(trans);
@@ -749,6 +752,10 @@ int __bch2_trans_commit(struct btree_trans *trans)
        }
 
 #ifdef CONFIG_BCACHEFS_DEBUG
+       /*
+        * if BTREE_TRIGGER_NORUN is set, it means we're probably being called
+        * from the key cache flush code:
+        */
        trans_for_each_update(trans, i)
                if (!i->cached &&
                    !(i->flags & BTREE_TRIGGER_NORUN))
@@ -769,13 +776,12 @@ int __bch2_trans_commit(struct btree_trans *trans)
                                i->trans_triggers_run = true;
                                trans_trigger_run = true;
 
-                               ret = bch2_trans_mark_update(trans, i->iter,
+                               ret = bch2_trans_mark_update(trans, i->path,
                                                             i->k, i->flags);
                                if (unlikely(ret)) {
                                        if (ret == -EINTR)
                                                trace_trans_restart_mark(trans->ip, _RET_IP_,
-                                                                        i->btree_id,
-                                                                        &i->iter->pos);
+                                                               i->btree_id, &i->path->pos);
                                        goto out;
                                }
                        }
@@ -783,18 +789,16 @@ int __bch2_trans_commit(struct btree_trans *trans)
        } while (trans_trigger_run);
 
        trans_for_each_update(trans, i) {
-               BUG_ON(!i->iter->should_be_locked);
+               BUG_ON(!i->path->should_be_locked);
 
-               if (unlikely(!bch2_btree_iter_upgrade(trans, i->iter,
-                                                     i->level + 1))) {
+               if (unlikely(!bch2_btree_path_upgrade(trans, i->path, i->level + 1))) {
                        trace_trans_restart_upgrade(trans->ip, _RET_IP_,
-                                               i->btree_id, &i->iter->pos);
-                       trans->restarted = true;
-                       ret = -EINTR;
+                                                   i->btree_id, &i->path->pos);
+                       ret = btree_trans_restart(trans);
                        goto out;
                }
 
-               BUG_ON(!btree_node_intent_locked(i->iter, i->level));
+               BUG_ON(!btree_node_intent_locked(i->path, i->level));
 
                u64s = jset_u64s(i->k->k.u64s);
                if (i->cached &&
@@ -828,6 +832,9 @@ out:
        if (likely(!(trans->flags & BTREE_INSERT_NOCHECK_RW)))
                percpu_ref_put(&trans->c->writes);
 out_reset:
+       trans_for_each_update(trans, i)
+               bch2_path_put(trans, i->path, true);
+
        trans->extra_journal_res        = 0;
        trans->nr_updates               = 0;
        trans->hooks                    = NULL;
@@ -869,11 +876,11 @@ static noinline int extent_front_merge(struct btree_trans *trans,
        bkey_reassemble(update, k);
 
        if (bch2_bkey_merge(c, bkey_i_to_s(update), bkey_i_to_s_c(*insert))) {
-               struct btree_iter *update_iter =
-                       bch2_trans_copy_iter(trans, iter);
+               struct btree_iter update_iter;
 
-               ret = bch2_btree_delete_at(trans, update_iter, flags);
-               bch2_trans_iter_put(trans, update_iter);
+               bch2_trans_copy_iter(&update_iter, iter);
+               ret = bch2_btree_delete_at(trans, &update_iter, flags);
+               bch2_trans_iter_exit(trans, &update_iter);
 
                if (ret)
                        return ret;
@@ -890,18 +897,18 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
                                    enum btree_update_flags flags)
 {
        struct bch_fs *c = trans->c;
-       struct btree_iter *iter, *update_iter;
+       struct btree_iter iter, update_iter;
        struct bpos start = bkey_start_pos(&insert->k);
        struct bkey_i *update;
        struct bkey_s_c k;
        enum btree_id btree_id = orig_iter->btree_id;
        int ret = 0, compressed_sectors;
 
-       iter = bch2_trans_get_iter(trans, btree_id, start,
-                                  BTREE_ITER_INTENT|
-                                  BTREE_ITER_WITH_UPDATES|
-                                  BTREE_ITER_NOT_EXTENTS);
-       k = bch2_btree_iter_peek(iter);
+       bch2_trans_iter_init(trans, &iter, btree_id, start,
+                            BTREE_ITER_INTENT|
+                            BTREE_ITER_WITH_UPDATES|
+                            BTREE_ITER_NOT_EXTENTS);
+       k = bch2_btree_iter_peek(&iter);
        if ((ret = bkey_err(k)))
                goto err;
        if (!k.k)
@@ -909,7 +916,7 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
 
        if (!bkey_cmp(k.k->p, bkey_start_pos(&insert->k))) {
                if (bch2_bkey_maybe_mergable(k.k, &insert->k)) {
-                       ret = extent_front_merge(trans, iter, k, &insert, flags);
+                       ret = extent_front_merge(trans, &iter, k, &insert, flags);
                        if (ret)
                                goto out;
                }
@@ -940,23 +947,22 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
 
                        bch2_cut_back(start, update);
 
-                       update_iter = bch2_trans_get_iter(trans, btree_id, update->k.p,
-                                                         BTREE_ITER_NOT_EXTENTS|
-                                                         BTREE_ITER_INTENT);
-                       ret   = bch2_btree_iter_traverse(update_iter) ?:
-                               bch2_trans_update(trans, update_iter, update,
+                       bch2_trans_iter_init(trans, &update_iter, btree_id, update->k.p,
+                                            BTREE_ITER_NOT_EXTENTS|
+                                            BTREE_ITER_INTENT);
+                       ret   = bch2_btree_iter_traverse(&update_iter) ?:
+                               bch2_trans_update(trans, &update_iter, update,
                                                  BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
                                                  flags);
-                       bch2_trans_iter_put(trans, update_iter);
+                       bch2_trans_iter_exit(trans, &update_iter);
                        if (ret)
                                goto err;
                }
 
                if (bkey_cmp(k.k->p, insert->k.p) <= 0) {
-                       update_iter = bch2_trans_copy_iter(trans, iter);
-                       ret = bch2_btree_delete_at(trans, update_iter,
-                                                  flags);
-                       bch2_trans_iter_put(trans, update_iter);
+                       bch2_trans_copy_iter(&update_iter, &iter);
+                       ret = bch2_btree_delete_at(trans, &update_iter, flags);
+                       bch2_trans_iter_exit(trans, &update_iter);
 
                        if (ret)
                                goto err;
@@ -970,13 +976,13 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
                        bkey_reassemble(update, k);
                        bch2_cut_front(insert->k.p, update);
 
-                       update_iter = bch2_trans_copy_iter(trans, iter);
-                       bch2_trans_update(trans, update_iter, update, flags);
-                       bch2_trans_iter_put(trans, update_iter);
+                       bch2_trans_copy_iter(&update_iter, &iter);
+                       bch2_trans_update(trans, &update_iter, update, flags);
+                       bch2_trans_iter_exit(trans, &update_iter);
                        goto out;
                }
 next:
-               k = bch2_btree_iter_next(iter);
+               k = bch2_btree_iter_next(&iter);
                if ((ret = bkey_err(k)))
                        goto err;
                if (!k.k)
@@ -987,14 +993,12 @@ next:
                bch2_bkey_merge(c, bkey_i_to_s(insert), k);
 out:
        if (!bkey_deleted(&insert->k)) {
-               bch2_btree_iter_set_pos(iter, insert->k.p);
-               ret   = bch2_btree_iter_traverse(iter) ?:
-                       bch2_trans_update(trans, iter, insert, flags);
-       } else {
-               set_btree_iter_dontneed(trans, iter);
+               bch2_btree_iter_set_pos(&iter, insert->k.p);
+               ret   = bch2_btree_iter_traverse(&iter) ?:
+                       bch2_trans_update(trans, &iter, insert, flags);
        }
 err:
-       bch2_trans_iter_put(trans, iter);
+       bch2_trans_iter_exit(trans, &iter);
 
        return ret;
 }
@@ -1002,31 +1006,34 @@ err:
 int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
                      struct bkey_i *k, enum btree_update_flags flags)
 {
-       struct btree_insert_entry *i, n = (struct btree_insert_entry) {
+       struct btree_insert_entry *i, n;
+
+       BUG_ON(!iter->path->should_be_locked);
+
+       if (iter->flags & BTREE_ITER_IS_EXTENTS)
+               return bch2_trans_update_extent(trans, iter, k, flags);
+
+       BUG_ON(trans->nr_updates >= BTREE_ITER_MAX);
+       BUG_ON(bpos_cmp(k->k.p, iter->path->pos));
+
+       n = (struct btree_insert_entry) {
                .flags          = flags,
-               .bkey_type      = __btree_node_type(iter->level, iter->btree_id),
+               .bkey_type      = __btree_node_type(iter->path->level, iter->btree_id),
                .btree_id       = iter->btree_id,
-               .level          = iter->level,
-               .cached         = iter->cached,
-               .iter           = iter,
+               .level          = iter->path->level,
+               .cached         = iter->flags & BTREE_ITER_CACHED,
+               .path           = iter->path,
                .k              = k,
                .ip_allocated   = _RET_IP_,
        };
 
-       BUG_ON(!iter->should_be_locked);
-
-       if (iter->flags & BTREE_ITER_IS_EXTENTS)
-               return bch2_trans_update_extent(trans, iter, k, flags);
+       __btree_path_get(n.path, true);
 
 #ifdef CONFIG_BCACHEFS_DEBUG
        trans_for_each_update(trans, i)
                BUG_ON(i != trans->updates &&
                       btree_insert_entry_cmp(i - 1, i) >= 0);
 #endif
-       BUG_ON(trans->nr_updates >= BTREE_ITER_MAX);
-       BUG_ON(bpos_cmp(n.k->k.p, n.iter->real_pos));
-
-       n.iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
 
        /*
         * Pending updates are kept sorted: first, find position of new update,
@@ -1048,7 +1055,10 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
                if (n.cached && !i->cached) {
                        i->k = n.k;
                        i->flags = n.flags;
+
+                       __btree_path_get(n.path, false);
                } else {
+                       bch2_path_put(trans, i->path, true);
                        *i = n;
                }
        } else
@@ -1068,15 +1078,15 @@ void bch2_trans_commit_hook(struct btree_trans *trans,
 int __bch2_btree_insert(struct btree_trans *trans,
                        enum btree_id id, struct bkey_i *k)
 {
-       struct btree_iter *iter;
+       struct btree_iter iter;
        int ret;
 
-       iter = bch2_trans_get_iter(trans, id, bkey_start_pos(&k->k),
+       bch2_trans_iter_init(trans, &iter, id, bkey_start_pos(&k->k),
                                   BTREE_ITER_INTENT);
 
-       ret   = bch2_btree_iter_traverse(iter) ?:
-               bch2_trans_update(trans, iter, k, 0);
-       bch2_trans_iter_put(trans, iter);
+       ret   = bch2_btree_iter_traverse(&iter) ?:
+               bch2_trans_update(trans, &iter, k, 0);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
@@ -1114,16 +1124,16 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
                                  struct bpos start, struct bpos end,
                                  u64 *journal_seq)
 {
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        int ret = 0;
 
-       iter = bch2_trans_get_iter(trans, id, start, BTREE_ITER_INTENT);
+       bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_INTENT);
 retry:
        while ((bch2_trans_begin(trans),
-              (k = bch2_btree_iter_peek(iter)).k) &&
+              (k = bch2_btree_iter_peek(&iter)).k) &&
               !(ret = bkey_err(k)) &&
-              bkey_cmp(iter->pos, end) < 0) {
+              bkey_cmp(iter.pos, end) < 0) {
                struct bkey_i delete;
 
                bkey_init(&delete.k);
@@ -1142,9 +1152,9 @@ retry:
                 * (bch2_btree_iter_peek() does guarantee that iter.pos >=
                 * bkey_start_pos(k.k)).
                 */
-               delete.k.p = iter->pos;
+               delete.k.p = iter.pos;
 
-               if (btree_node_type_is_extents(iter->btree_id)) {
+               if (btree_node_type_is_extents(id)) {
                        unsigned max_sectors =
                                KEY_SIZE_MAX & (~0 << trans->c->block_bits);
 
@@ -1152,12 +1162,12 @@ retry:
                        bch2_key_resize(&delete.k, max_sectors);
                        bch2_cut_back(end, &delete);
 
-                       ret = bch2_extent_trim_atomic(trans, iter, &delete);
+                       ret = bch2_extent_trim_atomic(trans, &iter, &delete);
                        if (ret)
                                break;
                }
 
-               ret   = bch2_trans_update(trans, iter, &delete, 0) ?:
+               ret   = bch2_trans_update(trans, &iter, &delete, 0) ?:
                        bch2_trans_commit(trans, NULL, journal_seq,
                                        BTREE_INSERT_NOFAIL);
                if (ret)
@@ -1171,7 +1181,7 @@ retry:
                goto retry;
        }
 
-       bch2_trans_iter_free(trans, iter);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
index a1d4a25bc42c6960a7d790fd9f08b93f2d1b2354..6831c002961d21bb5729fac43ce81e51b69dc24a 100644 (file)
@@ -1222,38 +1222,23 @@ int bch2_mark_key(struct bch_fs *c, struct bkey_s_c new, unsigned flags)
        return ret;
 }
 
-int bch2_mark_update(struct btree_trans *trans, struct btree_iter *iter,
+int bch2_mark_update(struct btree_trans *trans, struct btree_path *path,
                     struct bkey_i *new, unsigned flags)
 {
        struct bch_fs           *c = trans->c;
        struct bkey             _deleted = KEY(0, 0, 0);
        struct bkey_s_c         deleted = (struct bkey_s_c) { &_deleted, NULL };
        struct bkey_s_c         old;
-       int iter_flags, ret;
+       struct bkey             unpacked;
+       int ret;
 
        if (unlikely(flags & BTREE_TRIGGER_NORUN))
                return 0;
 
-       if (!btree_node_type_needs_gc(iter->btree_id))
+       if (!btree_node_type_needs_gc(path->btree_id))
                return 0;
 
-       if (likely(!(iter->flags & BTREE_ITER_CACHED_NOFILL))) {
-               iter_flags = iter->flags & BTREE_ITER_WITH_UPDATES;
-               iter->flags &= ~BTREE_ITER_WITH_UPDATES;
-
-               old = bch2_btree_iter_peek_slot(iter);
-               iter->flags |= iter_flags;
-
-               ret = bkey_err(old);
-               if (ret)
-                       return ret;
-       } else {
-               /*
-                * If BTREE_ITER_CACHED_NOFILL was used, we better not be
-                * running triggers that do anything on removal (alloc btree):
-                */
-               old = deleted;
-       }
+       old = bch2_btree_path_peek_slot(path, &unpacked);
 
        if (old.k->type == new->k.type &&
            ((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) {
@@ -1291,22 +1276,13 @@ void fs_usage_apply_warn(struct btree_trans *trans,
                pr_err("overlapping with");
 
                if (!i->cached) {
-                       struct btree_iter *copy = bch2_trans_copy_iter(trans, i->iter);
-                       struct bkey_s_c k;
-                       int ret;
-
-                       for_each_btree_key_continue(copy, 0, k, ret) {
-                               if (btree_node_type_is_extents(i->iter->btree_id)
-                                   ? bkey_cmp(i->k->k.p, bkey_start_pos(k.k)) <= 0
-                                   : bkey_cmp(i->k->k.p, k.k->p))
-                                       break;
+                       struct bkey u;
+                       struct bkey_s_c k = bch2_btree_path_peek_slot(i->path, &u);
 
-                               bch2_bkey_val_to_text(&PBUF(buf), c, k);
-                               pr_err("%s", buf);
-                       }
-                       bch2_trans_iter_put(trans, copy);
+                       bch2_bkey_val_to_text(&PBUF(buf), c, k);
+                       pr_err("%s", buf);
                } else {
-                       struct bkey_cached *ck = (void *) i->iter->l[0].b;
+                       struct bkey_cached *ck = (void *) i->path->l[0].b;
 
                        if (ck->valid) {
                                bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(ck->k));
@@ -1385,31 +1361,8 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans,
 
 /* trans_mark: */
 
-static struct btree_iter *trans_get_update(struct btree_trans *trans,
-                           enum btree_id btree_id, struct bpos pos,
-                           struct bkey_s_c *k)
-{
-       struct btree_insert_entry *i;
-
-       trans_for_each_update(trans, i)
-               if (i->iter->btree_id == btree_id &&
-                   (btree_node_type_is_extents(btree_id)
-                    ? bkey_cmp(pos, bkey_start_pos(&i->k->k)) >= 0 &&
-                      bkey_cmp(pos, i->k->k.p) < 0
-                    : !bkey_cmp(pos, i->iter->pos))) {
-                       *k = bkey_i_to_s_c(i->k);
-
-                       /* ugly hack.. */
-                       BUG_ON(btree_iter_live(trans, i->iter));
-                       trans->iters_live |= 1ULL << i->iter->idx;
-                       return i->iter;
-               }
-
-       return NULL;
-}
-
 static struct bkey_alloc_buf *
-bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter **_iter,
+bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter,
                              const struct bch_extent_ptr *ptr,
                              struct bkey_alloc_unpacked *u)
 {
@@ -1417,36 +1370,34 @@ bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter **_it
        struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
        struct bpos pos = POS(ptr->dev, PTR_BUCKET_NR(ca, ptr));
        struct bucket *g;
-       struct btree_iter *iter;
-       struct bkey_s_c k;
        struct bkey_alloc_buf *a;
+       struct bkey_i *update;
        int ret;
 
        a = bch2_trans_kmalloc(trans, sizeof(struct bkey_alloc_buf));
        if (IS_ERR(a))
                return a;
 
-       iter = trans_get_update(trans, BTREE_ID_alloc, pos, &k);
-       if (iter) {
-               *u = bch2_alloc_unpack(k);
-       } else {
-               iter = bch2_trans_get_iter(trans, BTREE_ID_alloc, pos,
-                                          BTREE_ITER_CACHED|
-                                          BTREE_ITER_CACHED_NOFILL|
-                                          BTREE_ITER_INTENT);
-               ret = bch2_btree_iter_traverse(iter);
-               if (ret) {
-                       bch2_trans_iter_put(trans, iter);
-                       return ERR_PTR(ret);
-               }
+       bch2_trans_iter_init(trans, iter, BTREE_ID_alloc, pos,
+                            BTREE_ITER_CACHED|
+                            BTREE_ITER_CACHED_NOFILL|
+                            BTREE_ITER_INTENT);
+       ret = bch2_btree_iter_traverse(iter);
+       if (ret) {
+               bch2_trans_iter_exit(trans, iter);
+               return ERR_PTR(ret);
+       }
 
+       update = __bch2_btree_trans_peek_updates(iter);
+       if (update && !bpos_cmp(update->k.p, pos)) {
+               *u = bch2_alloc_unpack(bkey_i_to_s_c(update));
+       } else {
                percpu_down_read(&c->mark_lock);
                g = bucket(ca, pos.offset);
                *u = alloc_mem_to_key(iter, g, READ_ONCE(g->mark));
                percpu_up_read(&c->mark_lock);
        }
 
-       *_iter = iter;
        return a;
 }
 
@@ -1455,7 +1406,7 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans,
                        s64 sectors, enum bch_data_type data_type)
 {
        struct bch_fs *c = trans->c;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_alloc_unpacked u;
        struct bkey_alloc_buf *a;
        int ret;
@@ -1470,9 +1421,9 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans,
                goto out;
 
        bch2_alloc_pack(c, a, u);
-       bch2_trans_update(trans, iter, &a->k, 0);
+       bch2_trans_update(trans, &iter, &a->k, 0);
 out:
-       bch2_trans_iter_put(trans, iter);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
@@ -1481,16 +1432,16 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
                        s64 sectors, enum bch_data_type data_type)
 {
        struct bch_fs *c = trans->c;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        struct bkey_i_stripe *s;
        struct bch_replicas_padded r;
        int ret = 0;
 
-       iter = bch2_trans_get_iter(trans, BTREE_ID_stripes, POS(0, p.ec.idx),
-                                  BTREE_ITER_INTENT|
-                                  BTREE_ITER_WITH_UPDATES);
-       k = bch2_btree_iter_peek_slot(iter);
+       bch2_trans_iter_init(trans, &iter, BTREE_ID_stripes, POS(0, p.ec.idx),
+                            BTREE_ITER_INTENT|
+                            BTREE_ITER_WITH_UPDATES);
+       k = bch2_btree_iter_peek_slot(&iter);
        ret = bkey_err(k);
        if (ret)
                goto err;
@@ -1521,13 +1472,13 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
        stripe_blockcount_set(&s->v, p.ec.block,
                stripe_blockcount_get(&s->v, p.ec.block) +
                sectors);
-       bch2_trans_update(trans, iter, &s->k_i, 0);
+       bch2_trans_update(trans, &iter, &s->k_i, 0);
 
        bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(&s->k_i));
        r.e.data_type = data_type;
        update_replicas_list(trans, &r.e, sectors);
 err:
-       bch2_trans_iter_put(trans, iter);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
@@ -1599,7 +1550,7 @@ static int bch2_trans_mark_stripe_alloc_ref(struct btree_trans *trans,
        struct bch_fs *c = trans->c;
        const struct bch_extent_ptr *ptr = &s.v->ptrs[idx];
        struct bkey_alloc_buf *a;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_alloc_unpacked u;
        bool parity = idx >= s.v->nr_blocks - s.v->nr_redundant;
        int ret = 0;
@@ -1623,7 +1574,7 @@ static int bch2_trans_mark_stripe_alloc_ref(struct btree_trans *trans,
        if (!deleting) {
                if (bch2_fs_inconsistent_on(u.stripe && u.stripe != s.k->p.offset, c,
                                "bucket %llu:%llu gen %u: multiple stripes using same bucket (%u, %llu)",
-                               iter->pos.inode, iter->pos.offset, u.gen,
+                               iter.pos.inode, iter.pos.offset, u.gen,
                                u.stripe, s.k->p.offset)) {
                        ret = -EIO;
                        goto err;
@@ -1637,9 +1588,9 @@ static int bch2_trans_mark_stripe_alloc_ref(struct btree_trans *trans,
        }
 
        bch2_alloc_pack(c, a, u);
-       bch2_trans_update(trans, iter, &a->k, 0);
+       bch2_trans_update(trans, &iter, &a->k, 0);
 err:
-       bch2_trans_iter_put(trans, iter);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
@@ -1744,17 +1695,17 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
                        u64 idx, unsigned flags)
 {
        struct bch_fs *c = trans->c;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        struct bkey_i *n;
        __le64 *refcount;
        int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1;
        s64 ret;
 
-       iter = bch2_trans_get_iter(trans, BTREE_ID_reflink, POS(0, idx),
-                                  BTREE_ITER_INTENT|
-                                  BTREE_ITER_WITH_UPDATES);
-       k = bch2_btree_iter_peek_slot(iter);
+       bch2_trans_iter_init(trans, &iter, BTREE_ID_reflink, POS(0, idx),
+                            BTREE_ITER_INTENT|
+                            BTREE_ITER_WITH_UPDATES);
+       k = bch2_btree_iter_peek_slot(&iter);
        ret = bkey_err(k);
        if (ret)
                goto err;
@@ -1784,14 +1735,14 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
                set_bkey_val_u64s(&n->k, 0);
        }
 
-       bch2_btree_iter_set_pos_to_extent_start(iter);
-       ret = bch2_trans_update(trans, iter, n, 0);
+       bch2_btree_iter_set_pos_to_extent_start(&iter);
+       ret = bch2_trans_update(trans, &iter, n, 0);
        if (ret)
                goto err;
 
        ret = k.k->p.offset - idx;
 err:
-       bch2_trans_iter_put(trans, iter);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
@@ -1843,39 +1794,23 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c old,
 }
 
 int bch2_trans_mark_update(struct btree_trans *trans,
-                          struct btree_iter *iter,
+                          struct btree_path *path,
                           struct bkey_i *new,
                           unsigned flags)
 {
        struct bkey             _deleted = KEY(0, 0, 0);
        struct bkey_s_c         deleted = (struct bkey_s_c) { &_deleted, NULL };
        struct bkey_s_c         old;
-       int iter_flags, ret;
+       struct bkey             unpacked;
+       int ret;
 
        if (unlikely(flags & BTREE_TRIGGER_NORUN))
                return 0;
 
-       if (!btree_node_type_needs_gc(iter->btree_id))
+       if (!btree_node_type_needs_gc(path->btree_id))
                return 0;
 
-
-       if (likely(!(iter->flags & BTREE_ITER_CACHED_NOFILL))) {
-               iter_flags = iter->flags & BTREE_ITER_WITH_UPDATES;
-               iter->flags &= ~BTREE_ITER_WITH_UPDATES;
-
-               old = bch2_btree_iter_peek_slot(iter);
-               iter->flags |= iter_flags;
-
-               ret = bkey_err(old);
-               if (ret)
-                       return ret;
-       } else {
-               /*
-                * If BTREE_ITER_CACHED_NOFILL was used, we better not be
-                * running triggers that do anything on removal (alloc btree):
-                */
-               old = deleted;
-       }
+       old = bch2_btree_path_peek_slot(path, &unpacked);
 
        if (old.k->type == new->k.type &&
            ((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) {
@@ -1897,7 +1832,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
                                    unsigned sectors)
 {
        struct bch_fs *c = trans->c;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_alloc_unpacked u;
        struct bkey_alloc_buf *a;
        struct bch_extent_ptr ptr = {
@@ -1920,7 +1855,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
                bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
                        "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n"
                        "while marking %s",
-                       iter->pos.inode, iter->pos.offset, u.gen,
+                       iter.pos.inode, iter.pos.offset, u.gen,
                        bch2_data_types[u.data_type],
                        bch2_data_types[type],
                        bch2_data_types[type]);
@@ -1932,9 +1867,9 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
        u.dirty_sectors = sectors;
 
        bch2_alloc_pack(c, a, u);
-       bch2_trans_update(trans, iter, &a->k, 0);
+       bch2_trans_update(trans, &iter, &a->k, 0);
 out:
-       bch2_trans_iter_put(trans, iter);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
index 3fb91ef606850016f7dcfaea843d94067d3688ab..4687fba2eed6da880ed79a258a6fbda8611e9e48 100644 (file)
@@ -228,13 +228,13 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
 
 int bch2_mark_key(struct bch_fs *, struct bkey_s_c, unsigned);
 
-int bch2_mark_update(struct btree_trans *, struct btree_iter *,
+int bch2_mark_update(struct btree_trans *, struct btree_path *,
                     struct bkey_i *, unsigned);
 
 int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c,
                        struct bkey_s_c, unsigned);
-int bch2_trans_mark_update(struct btree_trans *, struct btree_iter *iter,
-                          struct bkey_i *insert, unsigned);
+int bch2_trans_mark_update(struct btree_trans *, struct btree_path *,
+                          struct bkey_i *, unsigned);
 void bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *);
 
 int bch2_trans_mark_metadata_bucket(struct btree_trans *, struct bch_dev *,
index 6a28de30ea3b25f5231f74d31df5277d975688d5..5ffb7f0a3bf6a731dd3c090167ecbde70d63cb5f 100644 (file)
@@ -243,7 +243,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
 {
        struct dump_iter *i = file->private_data;
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        int err;
 
@@ -260,10 +260,10 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
 
        bch2_trans_init(&trans, i->c, 0, 0);
 
-       iter = bch2_trans_get_iter(&trans, i->id, i->from,
-                                  BTREE_ITER_PREFETCH|
-                                  BTREE_ITER_ALL_SNAPSHOTS);
-       k = bch2_btree_iter_peek(iter);
+       bch2_trans_iter_init(&trans, &iter, i->id, i->from,
+                            BTREE_ITER_PREFETCH|
+                            BTREE_ITER_ALL_SNAPSHOTS);
+       k = bch2_btree_iter_peek(&iter);
 
        while (k.k && !(err = bkey_err(k))) {
                bch2_bkey_val_to_text(&PBUF(i->buf), i->c, k);
@@ -272,8 +272,8 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
                i->buf[i->bytes] = '\n';
                i->bytes++;
 
-               k = bch2_btree_iter_next(iter);
-               i->from = iter->pos;
+               k = bch2_btree_iter_next(&iter);
+               i->from = iter.pos;
 
                err = flush_buf(i);
                if (err)
@@ -282,7 +282,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
                if (!i->size)
                        break;
        }
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
 
        bch2_trans_exit(&trans);
 
@@ -301,7 +301,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
 {
        struct dump_iter *i = file->private_data;
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct btree *b;
        int err;
 
@@ -336,7 +336,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
                if (!i->size)
                        break;
        }
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
 
        bch2_trans_exit(&trans);
 
@@ -355,7 +355,7 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
 {
        struct dump_iter *i = file->private_data;
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        struct btree *prev_node = NULL;
        int err;
@@ -373,11 +373,11 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
 
        bch2_trans_init(&trans, i->c, 0, 0);
 
-       iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH);
+       bch2_trans_iter_init(&trans, &iter, i->id, i->from, BTREE_ITER_PREFETCH);
 
-       while ((k = bch2_btree_iter_peek(iter)).k &&
+       while ((k = bch2_btree_iter_peek(&iter)).k &&
               !(err = bkey_err(k))) {
-               struct btree_iter_level *l = &iter->l[0];
+               struct btree_path_level *l = &iter.path->l[0];
                struct bkey_packed *_k =
                        bch2_btree_node_iter_peek(&l->iter, l->b);
 
@@ -396,8 +396,8 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
                if (err)
                        break;
 
-               bch2_btree_iter_advance(iter);
-               i->from = iter->pos;
+               bch2_btree_iter_advance(&iter);
+               i->from = iter.pos;
 
                err = flush_buf(i);
                if (err)
index 02b29681f695e09c30dcf8d3cc50fbff1c938ce9..1d510f7728b6853bb89f6dd1bc60e25352f6273c 100644 (file)
@@ -183,7 +183,8 @@ int bch2_dirent_rename(struct btree_trans *trans,
                       const struct qstr *dst_name, u64 *dst_inum, u64 *dst_offset,
                       enum bch_rename_mode mode)
 {
-       struct btree_iter *src_iter = NULL, *dst_iter = NULL;
+       struct btree_iter src_iter = { NULL };
+       struct btree_iter dst_iter = { NULL };
        struct bkey_s_c old_src, old_dst;
        struct bkey_i_dirent *new_src = NULL, *new_dst = NULL;
        struct bpos dst_pos =
@@ -199,17 +200,16 @@ int bch2_dirent_rename(struct btree_trans *trans,
         * the target already exists - we're relying on the VFS
         * to do that check for us for correctness:
         */
-       dst_iter = mode == BCH_RENAME
-               ? bch2_hash_hole(trans, bch2_dirent_hash_desc,
+       ret = mode == BCH_RENAME
+               ? bch2_hash_hole(trans, &dst_iter, bch2_dirent_hash_desc,
                                 dst_hash, dst_dir, dst_name)
-               : bch2_hash_lookup(trans, bch2_dirent_hash_desc,
+               : bch2_hash_lookup(trans, &dst_iter, bch2_dirent_hash_desc,
                                   dst_hash, dst_dir, dst_name,
                                   BTREE_ITER_INTENT);
-       ret = PTR_ERR_OR_ZERO(dst_iter);
        if (ret)
                goto out;
 
-       old_dst = bch2_btree_iter_peek_slot(dst_iter);
+       old_dst = bch2_btree_iter_peek_slot(&dst_iter);
        ret = bkey_err(old_dst);
        if (ret)
                goto out;
@@ -217,17 +217,16 @@ int bch2_dirent_rename(struct btree_trans *trans,
        if (mode != BCH_RENAME)
                *dst_inum = le64_to_cpu(bkey_s_c_to_dirent(old_dst).v->d_inum);
        if (mode != BCH_RENAME_EXCHANGE)
-               *src_offset = dst_iter->pos.offset;
+               *src_offset = dst_iter.pos.offset;
 
        /* Lookup src: */
-       src_iter = bch2_hash_lookup(trans, bch2_dirent_hash_desc,
-                                   src_hash, src_dir, src_name,
-                                   BTREE_ITER_INTENT);
-       ret = PTR_ERR_OR_ZERO(src_iter);
+       ret = bch2_hash_lookup(trans, &src_iter, bch2_dirent_hash_desc,
+                              src_hash, src_dir, src_name,
+                              BTREE_ITER_INTENT);
        if (ret)
                goto out;
 
-       old_src = bch2_btree_iter_peek_slot(src_iter);
+       old_src = bch2_btree_iter_peek_slot(&src_iter);
        ret = bkey_err(old_src);
        if (ret)
                goto out;
@@ -241,7 +240,7 @@ int bch2_dirent_rename(struct btree_trans *trans,
                goto out;
 
        dirent_copy_target(new_dst, bkey_s_c_to_dirent(old_src));
-       new_dst->k.p = dst_iter->pos;
+       new_dst->k.p = dst_iter.pos;
 
        /* Create new src key: */
        if (mode == BCH_RENAME_EXCHANGE) {
@@ -251,7 +250,7 @@ int bch2_dirent_rename(struct btree_trans *trans,
                        goto out;
 
                dirent_copy_target(new_src, bkey_s_c_to_dirent(old_dst));
-               new_src->k.p = src_iter->pos;
+               new_src->k.p = src_iter.pos;
        } else {
                new_src = bch2_trans_kmalloc(trans, sizeof(struct bkey_i));
                ret = PTR_ERR_OR_ZERO(new_src);
@@ -259,10 +258,10 @@ int bch2_dirent_rename(struct btree_trans *trans,
                        goto out;
 
                bkey_init(&new_src->k);
-               new_src->k.p = src_iter->pos;
+               new_src->k.p = src_iter.pos;
 
-               if (bkey_cmp(dst_pos, src_iter->pos) <= 0 &&
-                   bkey_cmp(src_iter->pos, dst_iter->pos) < 0) {
+               if (bkey_cmp(dst_pos, src_iter.pos) <= 0 &&
+                   bkey_cmp(src_iter.pos, dst_iter.pos) < 0) {
                        /*
                         * We have a hash collision for the new dst key,
                         * and new_src - the key we're deleting - is between
@@ -275,8 +274,8 @@ int bch2_dirent_rename(struct btree_trans *trans,
                                 * If we're not overwriting, we can just insert
                                 * new_dst at the src position:
                                 */
-                               new_dst->k.p = src_iter->pos;
-                               bch2_trans_update(trans, src_iter,
+                               new_dst->k.p = src_iter.pos;
+                               bch2_trans_update(trans, &src_iter,
                                                  &new_dst->k_i, 0);
                                goto out_set_offset;
                        } else {
@@ -290,7 +289,7 @@ int bch2_dirent_rename(struct btree_trans *trans,
                } else {
                        /* Check if we need a whiteout to delete src: */
                        ret = bch2_hash_needs_whiteout(trans, bch2_dirent_hash_desc,
-                                                      src_hash, src_iter);
+                                                      src_hash, &src_iter);
                        if (ret < 0)
                                goto out;
 
@@ -299,15 +298,15 @@ int bch2_dirent_rename(struct btree_trans *trans,
                }
        }
 
-       bch2_trans_update(trans, src_iter, &new_src->k_i, 0);
-       bch2_trans_update(trans, dst_iter, &new_dst->k_i, 0);
+       bch2_trans_update(trans, &src_iter, &new_src->k_i, 0);
+       bch2_trans_update(trans, &dst_iter, &new_dst->k_i, 0);
 out_set_offset:
        if (mode == BCH_RENAME_EXCHANGE)
                *src_offset = new_src->k.p.offset;
        *dst_offset = new_dst->k.p.offset;
 out:
-       bch2_trans_iter_put(trans, src_iter);
-       bch2_trans_iter_put(trans, dst_iter);
+       bch2_trans_iter_exit(trans, &src_iter);
+       bch2_trans_iter_exit(trans, &dst_iter);
        return ret;
 }
 
@@ -319,12 +318,13 @@ int bch2_dirent_delete_at(struct btree_trans *trans,
                                   hash_info, iter);
 }
 
-struct btree_iter *
-__bch2_dirent_lookup_trans(struct btree_trans *trans, u64 dir_inum,
-                          const struct bch_hash_info *hash_info,
-                          const struct qstr *name, unsigned flags)
+int __bch2_dirent_lookup_trans(struct btree_trans *trans,
+                              struct btree_iter *iter,
+                              u64 dir_inum,
+                              const struct bch_hash_info *hash_info,
+                              const struct qstr *name, unsigned flags)
 {
-       return bch2_hash_lookup(trans, bch2_dirent_hash_desc,
+       return bch2_hash_lookup(trans, iter, bch2_dirent_hash_desc,
                                hash_info, dir_inum, name, flags);
 }
 
@@ -333,26 +333,25 @@ u64 bch2_dirent_lookup(struct bch_fs *c, u64 dir_inum,
                       const struct qstr *name)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        u64 inum = 0;
        int ret = 0;
 
        bch2_trans_init(&trans, c, 0, 0);
 
-       iter = __bch2_dirent_lookup_trans(&trans, dir_inum,
-                                         hash_info, name, 0);
-       ret = PTR_ERR_OR_ZERO(iter);
+       ret = __bch2_dirent_lookup_trans(&trans, &iter, dir_inum,
+                                        hash_info, name, 0);
        if (ret)
                goto out;
 
-       k = bch2_btree_iter_peek_slot(iter);
+       k = bch2_btree_iter_peek_slot(&iter);
        ret = bkey_err(k);
        if (ret)
                goto out;
 
        inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum);
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
 out:
        BUG_ON(ret == -EINTR);
        bch2_trans_exit(&trans);
@@ -361,7 +360,7 @@ out:
 
 int bch2_empty_dir_trans(struct btree_trans *trans, u64 dir_inum)
 {
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        int ret;
 
@@ -375,7 +374,7 @@ int bch2_empty_dir_trans(struct btree_trans *trans, u64 dir_inum)
                        break;
                }
        }
-       bch2_trans_iter_put(trans, iter);
+       bch2_trans_iter_exit(trans, &iter);
 
        return ret;
 }
@@ -383,7 +382,7 @@ int bch2_empty_dir_trans(struct btree_trans *trans, u64 dir_inum)
 int bch2_readdir(struct bch_fs *c, u64 inum, struct dir_context *ctx)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        struct bkey_s_c_dirent dirent;
        int ret;
@@ -412,7 +411,7 @@ int bch2_readdir(struct bch_fs *c, u64 inum, struct dir_context *ctx)
                        break;
                ctx->pos = dirent.k->p.offset + 1;
        }
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
 
        ret = bch2_trans_exit(&trans) ?: ret;
 
index e1d8ce377d43755cd5584edf1afa05d0ba65495e..c14f6029e1c98b7064440eab94564079f00a11bf 100644 (file)
@@ -50,8 +50,7 @@ int bch2_dirent_rename(struct btree_trans *,
                       const struct qstr *, u64 *, u64 *,
                       enum bch_rename_mode);
 
-struct btree_iter *
-__bch2_dirent_lookup_trans(struct btree_trans *, u64,
+int __bch2_dirent_lookup_trans(struct btree_trans *, struct btree_iter *, u64,
                           const struct bch_hash_info *,
                           const struct qstr *, unsigned);
 u64 bch2_dirent_lookup(struct bch_fs *, u64, const struct bch_hash_info *,
index 7ad74987757f9a1a055be172901cb778d3024693..2c538f9b54f8b1434ac39a20d66797965d8afa52 100644 (file)
@@ -429,13 +429,14 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
 static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *stripe)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        int ret;
 
        bch2_trans_init(&trans, c, 0, 0);
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_stripes, POS(0, idx), BTREE_ITER_SLOTS);
-       k = bch2_btree_iter_peek_slot(iter);
+       bch2_trans_iter_init(&trans, &iter, BTREE_ID_stripes,
+                            POS(0, idx), BTREE_ITER_SLOTS);
+       k = bch2_btree_iter_peek_slot(&iter);
        ret = bkey_err(k);
        if (ret)
                goto err;
@@ -445,6 +446,7 @@ static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *strip
        }
        bkey_reassemble(&stripe->key.k_i, k);
 err:
+       bch2_trans_iter_exit(&trans, &iter);
        bch2_trans_exit(&trans);
        return ret;
 }
@@ -704,7 +706,7 @@ static int ec_stripe_bkey_insert(struct bch_fs *c,
                                 struct disk_reservation *res)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        struct bpos min_pos = POS(0, 1);
        struct bpos start_pos = bpos_max(min_pos, POS(0, c->ec_stripe_hint));
@@ -719,7 +721,7 @@ retry:
                if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0) {
                        if (start_pos.offset) {
                                start_pos = min_pos;
-                               bch2_btree_iter_set_pos(iter, start_pos);
+                               bch2_btree_iter_set_pos(&iter, start_pos);
                                continue;
                        }
 
@@ -733,19 +735,19 @@ retry:
 
        goto err;
 found_slot:
-       start_pos = iter->pos;
+       start_pos = iter.pos;
 
-       ret = ec_stripe_mem_alloc(&trans, iter);
+       ret = ec_stripe_mem_alloc(&trans, &iter);
        if (ret)
                goto err;
 
-       stripe->k.p = iter->pos;
+       stripe->k.p = iter.pos;
 
-       ret   = bch2_trans_update(&trans, iter, &stripe->k_i, 0) ?:
+       ret   = bch2_trans_update(&trans, &iter, &stripe->k_i, 0) ?:
                bch2_trans_commit(&trans, res, NULL,
                                BTREE_INSERT_NOFAIL);
 err:
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
 
        if (ret == -EINTR)
                goto retry;
@@ -759,15 +761,15 @@ err:
 static int ec_stripe_bkey_update(struct btree_trans *trans,
                                 struct bkey_i_stripe *new)
 {
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        const struct bch_stripe *existing;
        unsigned i;
        int ret;
 
-       iter = bch2_trans_get_iter(trans, BTREE_ID_stripes,
-                                  new->k.p, BTREE_ITER_INTENT);
-       k = bch2_btree_iter_peek_slot(iter);
+       bch2_trans_iter_init(trans, &iter, BTREE_ID_stripes,
+                            new->k.p, BTREE_ITER_INTENT);
+       k = bch2_btree_iter_peek_slot(&iter);
        ret = bkey_err(k);
        if (ret)
                goto err;
@@ -790,9 +792,9 @@ static int ec_stripe_bkey_update(struct btree_trans *trans,
                stripe_blockcount_set(&new->v, i,
                        stripe_blockcount_get(existing, i));
 
-       ret = bch2_trans_update(trans, iter, &new->k_i, 0);
+       ret = bch2_trans_update(trans, &iter, &new->k_i, 0);
 err:
-       bch2_trans_iter_put(trans, iter);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
@@ -820,7 +822,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
                                 struct bkey *pos)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        struct bkey_s_extent e;
        struct bkey_buf sk;
@@ -832,23 +834,23 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
 
        /* XXX this doesn't support the reflink btree */
 
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_extents,
-                                  bkey_start_pos(pos),
-                                  BTREE_ITER_INTENT);
+       bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
+                            bkey_start_pos(pos),
+                            BTREE_ITER_INTENT);
 
-       while ((k = bch2_btree_iter_peek(iter)).k &&
+       while ((k = bch2_btree_iter_peek(&iter)).k &&
               !(ret = bkey_err(k)) &&
               bkey_cmp(bkey_start_pos(k.k), pos->p) < 0) {
                struct bch_extent_ptr *ptr, *ec_ptr = NULL;
 
                if (extent_has_stripe_ptr(k, s->key.k.p.offset)) {
-                       bch2_btree_iter_advance(iter);
+                       bch2_btree_iter_advance(&iter);
                        continue;
                }
 
                block = bkey_matches_stripe(&s->key.v, k);
                if (block < 0) {
-                       bch2_btree_iter_advance(iter);
+                       bch2_btree_iter_advance(&iter);
                        continue;
                }
 
@@ -863,21 +865,21 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
 
                extent_stripe_ptr_add(e, s, ec_ptr, block);
 
-               bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k));
+               bch2_btree_iter_set_pos(&iter, bkey_start_pos(&sk.k->k));
                next_pos = sk.k->k.p;
 
-               ret   = bch2_btree_iter_traverse(iter) ?:
-                       bch2_trans_update(&trans, iter, sk.k, 0) ?:
+               ret   = bch2_btree_iter_traverse(&iter) ?:
+                       bch2_trans_update(&trans, &iter, sk.k, 0) ?:
                        bch2_trans_commit(&trans, NULL, NULL,
                                        BTREE_INSERT_NOFAIL);
                if (!ret)
-                       bch2_btree_iter_set_pos(iter, next_pos);
+                       bch2_btree_iter_set_pos(&iter, next_pos);
                if (ret == -EINTR)
                        ret = 0;
                if (ret)
                        break;
        }
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
 
        bch2_trans_exit(&trans);
        bch2_bkey_buf_exit(&sk, c);
@@ -1598,7 +1600,7 @@ write:
 int bch2_stripes_write(struct bch_fs *c, unsigned flags)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct genradix_iter giter;
        struct bkey_i_stripe *new_key;
        struct stripe *m;
@@ -1609,8 +1611,8 @@ int bch2_stripes_write(struct bch_fs *c, unsigned flags)
 
        bch2_trans_init(&trans, c, 0, 0);
 
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_stripes, POS_MIN,
-                                  BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+       bch2_trans_iter_init(&trans, &iter, BTREE_ID_stripes, POS_MIN,
+                            BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
 
        genradix_for_each(&c->stripes[0], giter, m) {
                if (!m->alive)
@@ -1618,13 +1620,13 @@ int bch2_stripes_write(struct bch_fs *c, unsigned flags)
 
                ret = __bch2_trans_do(&trans, NULL, NULL,
                                      BTREE_INSERT_NOFAIL|flags,
-                       __bch2_stripe_write_key(&trans, iter, m,
+                       __bch2_stripe_write_key(&trans, &iter, m,
                                        giter.pos, new_key));
 
                if (ret)
                        break;
        }
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
 
        bch2_trans_exit(&trans);
 
@@ -1659,19 +1661,19 @@ int bch2_stripes_read(struct bch_fs *c)
 int bch2_ec_mem_alloc(struct bch_fs *c, bool gc)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        size_t i, idx = 0;
        int ret = 0;
 
        bch2_trans_init(&trans, c, 0, 0);
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_stripes, POS(0, U64_MAX), 0);
+       bch2_trans_iter_init(&trans, &iter, BTREE_ID_stripes, POS(0, U64_MAX), 0);
 
-       k = bch2_btree_iter_prev(iter);
+       k = bch2_btree_iter_prev(&iter);
        if (!IS_ERR_OR_NULL(k.k))
                idx = k.k->p.offset + 1;
 
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
        ret = bch2_trans_exit(&trans);
        if (ret)
                return ret;
index 93d55f46233f0917c734c595e8d2258fe7ebce54..9d959b053defd6c23a43188325db20ad145c3ac3 100644 (file)
@@ -58,7 +58,7 @@ static int count_iters_for_insert(struct btree_trans *trans,
                u64 idx = le64_to_cpu(p.v->idx);
                unsigned sectors = bpos_min(*end, p.k->p).offset -
                        bkey_start_offset(p.k);
-               struct btree_iter *iter;
+               struct btree_iter iter;
                struct bkey_s_c r_k;
 
                for_each_btree_key(trans, iter,
@@ -83,8 +83,8 @@ static int count_iters_for_insert(struct btree_trans *trans,
                                break;
                        }
                }
+               bch2_trans_iter_exit(trans, &iter);
 
-               bch2_trans_iter_put(trans, iter);
                break;
        }
        }
@@ -99,7 +99,7 @@ int bch2_extent_atomic_end(struct btree_trans *trans,
                           struct bkey_i *insert,
                           struct bpos *end)
 {
-       struct btree_iter *copy;
+       struct btree_iter copy;
        struct bkey_s_c k;
        unsigned nr_iters = 0;
        int ret;
@@ -118,7 +118,7 @@ int bch2_extent_atomic_end(struct btree_trans *trans,
        if (ret < 0)
                return ret;
 
-       copy = bch2_trans_copy_iter(trans, iter);
+       bch2_trans_copy_iter(&copy, iter);
 
        for_each_btree_key_continue(copy, 0, k, ret) {
                unsigned offset = 0;
@@ -149,7 +149,7 @@ int bch2_extent_atomic_end(struct btree_trans *trans,
                        break;
        }
 
-       bch2_trans_iter_put(trans, copy);
+       bch2_trans_iter_exit(trans, &copy);
        return ret < 0 ? ret : 0;
 }
 
index 6524703f3da4e4b1820185a4623e2b45692d0169..0190605711e531dcbf2c706408cd9636e8a0a82c 100644 (file)
@@ -615,7 +615,7 @@ bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size,
                                unsigned nr_replicas, bool compressed)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bpos end = pos;
        struct bkey_s_c k;
        bool ret = true;
@@ -636,7 +636,7 @@ bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size,
                        break;
                }
        }
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
 
        bch2_trans_exit(&trans);
 
index 2189a11ccad8d42e89a3749f7b19836d76f9a053..a6617455ea1219999ec0a464e81f24e439cc7fdf 100644 (file)
@@ -19,16 +19,15 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
                      struct posix_acl *acl)
 {
        struct bch_fs *c = trans->c;
-       struct btree_iter *dir_iter = NULL;
-       struct btree_iter *inode_iter = NULL;
+       struct btree_iter dir_iter = { NULL };
+       struct btree_iter inode_iter = { NULL };
        struct bch_hash_info hash = bch2_hash_info_init(c, new_inode);
        u64 now = bch2_current_time(c);
        u64 cpu = raw_smp_processor_id();
        u64 dir_offset = 0;
        int ret;
 
-       dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, BTREE_ITER_INTENT);
-       ret = PTR_ERR_OR_ZERO(dir_iter);
+       ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir_inum, BTREE_ITER_INTENT);
        if (ret)
                goto err;
 
@@ -37,8 +36,7 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
        if (!name)
                new_inode->bi_flags |= BCH_INODE_UNLINKED;
 
-       inode_iter = bch2_inode_create(trans, new_inode, U32_MAX, cpu);
-       ret = PTR_ERR_OR_ZERO(inode_iter);
+       ret = bch2_inode_create(trans, &inode_iter, new_inode, U32_MAX, cpu);
        if (ret)
                goto err;
 
@@ -63,7 +61,7 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
                if (S_ISDIR(new_inode->bi_mode))
                        dir_u->bi_nlink++;
 
-               ret = bch2_inode_write(trans, dir_iter, dir_u);
+               ret = bch2_inode_write(trans, &dir_iter, dir_u);
                if (ret)
                        goto err;
 
@@ -82,14 +80,14 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
        }
 
        /* XXX use bch2_btree_iter_set_snapshot() */
-       inode_iter->snapshot = U32_MAX;
-       bch2_btree_iter_set_pos(inode_iter, SPOS(0, new_inode->bi_inum, U32_MAX));
+       inode_iter.snapshot = U32_MAX;
+       bch2_btree_iter_set_pos(&inode_iter, SPOS(0, new_inode->bi_inum, U32_MAX));
 
-       ret   = bch2_btree_iter_traverse(inode_iter) ?:
-               bch2_inode_write(trans, inode_iter, new_inode);
+       ret   = bch2_btree_iter_traverse(&inode_iter) ?:
+               bch2_inode_write(trans, &inode_iter, new_inode);
 err:
-       bch2_trans_iter_put(trans, inode_iter);
-       bch2_trans_iter_put(trans, dir_iter);
+       bch2_trans_iter_exit(trans, &inode_iter);
+       bch2_trans_iter_exit(trans, &dir_iter);
        return ret;
 }
 
@@ -98,22 +96,21 @@ int bch2_link_trans(struct btree_trans *trans, u64 dir_inum,
                    struct bch_inode_unpacked *inode_u, const struct qstr *name)
 {
        struct bch_fs *c = trans->c;
-       struct btree_iter *dir_iter = NULL, *inode_iter = NULL;
+       struct btree_iter dir_iter = { NULL };
+       struct btree_iter inode_iter = { NULL };
        struct bch_hash_info dir_hash;
        u64 now = bch2_current_time(c);
        u64 dir_offset = 0;
        int ret;
 
-       inode_iter = bch2_inode_peek(trans, inode_u, inum, BTREE_ITER_INTENT);
-       ret = PTR_ERR_OR_ZERO(inode_iter);
+       ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_INTENT);
        if (ret)
                goto err;
 
        inode_u->bi_ctime = now;
        bch2_inode_nlink_inc(inode_u);
 
-       dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, 0);
-       ret = PTR_ERR_OR_ZERO(dir_iter);
+       ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir_inum, 0);
        if (ret)
                goto err;
 
@@ -133,11 +130,11 @@ int bch2_link_trans(struct btree_trans *trans, u64 dir_inum,
                inode_u->bi_dir_offset  = dir_offset;
        }
 
-       ret =   bch2_inode_write(trans, dir_iter, dir_u) ?:
-               bch2_inode_write(trans, inode_iter, inode_u);
+       ret =   bch2_inode_write(trans, &dir_iter, dir_u) ?:
+               bch2_inode_write(trans, &inode_iter, inode_u);
 err:
-       bch2_trans_iter_put(trans, dir_iter);
-       bch2_trans_iter_put(trans, inode_iter);
+       bch2_trans_iter_exit(trans, &dir_iter);
+       bch2_trans_iter_exit(trans, &inode_iter);
        return ret;
 }
 
@@ -147,35 +144,33 @@ int bch2_unlink_trans(struct btree_trans *trans,
                      const struct qstr *name)
 {
        struct bch_fs *c = trans->c;
-       struct btree_iter *dir_iter = NULL, *dirent_iter = NULL,
-                         *inode_iter = NULL;
+       struct btree_iter dir_iter = { NULL };
+       struct btree_iter dirent_iter = { NULL };
+       struct btree_iter inode_iter = { NULL };
        struct bch_hash_info dir_hash;
        u64 inum, now = bch2_current_time(c);
        struct bkey_s_c k;
        int ret;
 
-       dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, BTREE_ITER_INTENT);
-       ret = PTR_ERR_OR_ZERO(dir_iter);
+       ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir_inum, BTREE_ITER_INTENT);
        if (ret)
                goto err;
 
        dir_hash = bch2_hash_info_init(c, dir_u);
 
-       dirent_iter = __bch2_dirent_lookup_trans(trans, dir_inum, &dir_hash,
-                                                name, BTREE_ITER_INTENT);
-       ret = PTR_ERR_OR_ZERO(dirent_iter);
+       ret = __bch2_dirent_lookup_trans(trans, &dirent_iter, dir_inum, &dir_hash,
+                                        name, BTREE_ITER_INTENT);
        if (ret)
                goto err;
 
-       k = bch2_btree_iter_peek_slot(dirent_iter);
+       k = bch2_btree_iter_peek_slot(&dirent_iter);
        ret = bkey_err(k);
        if (ret)
                goto err;
 
        inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum);
 
-       inode_iter = bch2_inode_peek(trans, inode_u, inum, BTREE_ITER_INTENT);
-       ret = PTR_ERR_OR_ZERO(inode_iter);
+       ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_INTENT);
        if (ret)
                goto err;
 
@@ -192,13 +187,13 @@ int bch2_unlink_trans(struct btree_trans *trans,
        ret =   (S_ISDIR(inode_u->bi_mode)
                 ? bch2_empty_dir_trans(trans, inum)
                 : 0) ?:
-               bch2_dirent_delete_at(trans, &dir_hash, dirent_iter) ?:
-               bch2_inode_write(trans, dir_iter, dir_u) ?:
-               bch2_inode_write(trans, inode_iter, inode_u);
+               bch2_dirent_delete_at(trans, &dir_hash, &dirent_iter) ?:
+               bch2_inode_write(trans, &dir_iter, dir_u) ?:
+               bch2_inode_write(trans, &inode_iter, inode_u);
 err:
-       bch2_trans_iter_put(trans, inode_iter);
-       bch2_trans_iter_put(trans, dirent_iter);
-       bch2_trans_iter_put(trans, dir_iter);
+       bch2_trans_iter_exit(trans, &inode_iter);
+       bch2_trans_iter_exit(trans, &dirent_iter);
+       bch2_trans_iter_exit(trans, &dir_iter);
        return ret;
 }
 
@@ -236,25 +231,25 @@ int bch2_rename_trans(struct btree_trans *trans,
                      enum bch_rename_mode mode)
 {
        struct bch_fs *c = trans->c;
-       struct btree_iter *src_dir_iter = NULL, *dst_dir_iter = NULL;
-       struct btree_iter *src_inode_iter = NULL, *dst_inode_iter = NULL;
+       struct btree_iter src_dir_iter = { NULL };
+       struct btree_iter dst_dir_iter = { NULL };
+       struct btree_iter src_inode_iter = { NULL };
+       struct btree_iter dst_inode_iter = { NULL };
        struct bch_hash_info src_hash, dst_hash;
        u64 src_inode, src_offset, dst_inode, dst_offset;
        u64 now = bch2_current_time(c);
        int ret;
 
-       src_dir_iter = bch2_inode_peek(trans, src_dir_u, src_dir,
-                                      BTREE_ITER_INTENT);
-       ret = PTR_ERR_OR_ZERO(src_dir_iter);
+       ret = bch2_inode_peek(trans, &src_dir_iter, src_dir_u, src_dir,
+                             BTREE_ITER_INTENT);
        if (ret)
                goto err;
 
        src_hash = bch2_hash_info_init(c, src_dir_u);
 
        if (dst_dir != src_dir) {
-               dst_dir_iter = bch2_inode_peek(trans, dst_dir_u, dst_dir,
-                                              BTREE_ITER_INTENT);
-               ret = PTR_ERR_OR_ZERO(dst_dir_iter);
+               ret = bch2_inode_peek(trans, &dst_dir_iter, dst_dir_u, dst_dir,
+                                     BTREE_ITER_INTENT);
                if (ret)
                        goto err;
 
@@ -273,16 +268,14 @@ int bch2_rename_trans(struct btree_trans *trans,
        if (ret)
                goto err;
 
-       src_inode_iter = bch2_inode_peek(trans, src_inode_u, src_inode,
-                                        BTREE_ITER_INTENT);
-       ret = PTR_ERR_OR_ZERO(src_inode_iter);
+       ret = bch2_inode_peek(trans, &src_inode_iter, src_inode_u, src_inode,
+                             BTREE_ITER_INTENT);
        if (ret)
                goto err;
 
        if (dst_inode) {
-               dst_inode_iter = bch2_inode_peek(trans, dst_inode_u, dst_inode,
-                                                BTREE_ITER_INTENT);
-               ret = PTR_ERR_OR_ZERO(dst_inode_iter);
+               ret = bch2_inode_peek(trans, &dst_inode_iter, dst_inode_u, dst_inode,
+                                     BTREE_ITER_INTENT);
                if (ret)
                        goto err;
        }
@@ -357,18 +350,18 @@ int bch2_rename_trans(struct btree_trans *trans,
        if (dst_inode)
                dst_inode_u->bi_ctime   = now;
 
-       ret =   bch2_inode_write(trans, src_dir_iter, src_dir_u) ?:
+       ret =   bch2_inode_write(trans, &src_dir_iter, src_dir_u) ?:
                (src_dir != dst_dir
-                ? bch2_inode_write(trans, dst_dir_iter, dst_dir_u)
+                ? bch2_inode_write(trans, &dst_dir_iter, dst_dir_u)
                 : 0 ) ?:
-               bch2_inode_write(trans, src_inode_iter, src_inode_u) ?:
+               bch2_inode_write(trans, &src_inode_iter, src_inode_u) ?:
                (dst_inode
-                ? bch2_inode_write(trans, dst_inode_iter, dst_inode_u)
+                ? bch2_inode_write(trans, &dst_inode_iter, dst_inode_u)
                 : 0 );
 err:
-       bch2_trans_iter_put(trans, dst_inode_iter);
-       bch2_trans_iter_put(trans, src_inode_iter);
-       bch2_trans_iter_put(trans, dst_dir_iter);
-       bch2_trans_iter_put(trans, src_dir_iter);
+       bch2_trans_iter_exit(trans, &dst_inode_iter);
+       bch2_trans_iter_exit(trans, &src_inode_iter);
+       bch2_trans_iter_exit(trans, &dst_dir_iter);
+       bch2_trans_iter_exit(trans, &src_dir_iter);
        return ret;
 }
index 251029c33164cb822867d5c073003f8ae7527248..909db2f104cdc74dfd382e0fb8b84ce80a8e7dab 100644 (file)
@@ -867,7 +867,7 @@ void bch2_readahead(struct readahead_control *ractl)
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
        struct bch_io_opts opts = io_opts(c, &inode->ei_inode);
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct page *page;
        struct readpages_iter readpages_iter;
        int ret;
@@ -876,8 +876,8 @@ void bch2_readahead(struct readahead_control *ractl)
        BUG_ON(ret);
 
        bch2_trans_init(&trans, c, 0, 0);
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS_MIN,
-                                  BTREE_ITER_SLOTS);
+       bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, POS_MIN,
+                            BTREE_ITER_SLOTS);
 
        bch2_pagecache_add_get(&inode->ei_pagecache_lock);
 
@@ -898,13 +898,13 @@ void bch2_readahead(struct readahead_control *ractl)
                rbio->bio.bi_end_io = bch2_readpages_end_io;
                BUG_ON(!bio_add_page(&rbio->bio, page, PAGE_SIZE, 0));
 
-               bchfs_read(&trans, iter, rbio, inode->v.i_ino,
+               bchfs_read(&trans, &iter, rbio, inode->v.i_ino,
                           &readpages_iter);
        }
 
        bch2_pagecache_add_put(&inode->ei_pagecache_lock);
 
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
        bch2_trans_exit(&trans);
        kfree(readpages_iter.pages);
 }
@@ -913,7 +913,7 @@ static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio,
                             u64 inum, struct page *page)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
 
        bch2_page_state_create(page, __GFP_NOFAIL);
 
@@ -923,12 +923,12 @@ static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio,
        BUG_ON(!bio_add_page(&rbio->bio, page, PAGE_SIZE, 0));
 
        bch2_trans_init(&trans, c, 0, 0);
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS_MIN,
-                                  BTREE_ITER_SLOTS);
+       bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, POS_MIN,
+                            BTREE_ITER_SLOTS);
 
-       bchfs_read(&trans, iter, rbio, inum, NULL);
+       bchfs_read(&trans, &iter, rbio, inum, NULL);
 
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
        bch2_trans_exit(&trans);
 }
 
@@ -2146,7 +2146,7 @@ static inline int range_has_data(struct bch_fs *c,
                                  struct bpos end)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        int ret = 0;
 
@@ -2161,7 +2161,7 @@ static inline int range_has_data(struct bch_fs *c,
                        break;
                }
        }
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
 
        return bch2_trans_exit(&trans) ?: ret;
 }
@@ -2471,7 +2471,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
        struct address_space *mapping = inode->v.i_mapping;
        struct bkey_buf copy;
        struct btree_trans trans;
-       struct btree_iter *src, *dst, *del;
+       struct btree_iter src, dst, del;
        loff_t shift, new_size;
        u64 src_start;
        int ret = 0;
@@ -2536,11 +2536,11 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
 
        bch2_bkey_buf_init(&copy);
        bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
-       src = bch2_trans_get_iter(&trans, BTREE_ID_extents,
+       bch2_trans_iter_init(&trans, &src, BTREE_ID_extents,
                        POS(inode->v.i_ino, src_start >> 9),
                        BTREE_ITER_INTENT);
-       dst = bch2_trans_copy_iter(&trans, src);
-       del = bch2_trans_copy_iter(&trans, src);
+       bch2_trans_copy_iter(&dst, &src);
+       bch2_trans_copy_iter(&del, &src);
 
        while (ret == 0 || ret == -EINTR) {
                struct disk_reservation disk_res =
@@ -2555,8 +2555,8 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
                bch2_trans_begin(&trans);
 
                k = insert
-                       ? bch2_btree_iter_peek_prev(src)
-                       : bch2_btree_iter_peek(src);
+                       ? bch2_btree_iter_peek_prev(&src)
+                       : bch2_btree_iter_peek(&src);
                if ((ret = bkey_err(k)))
                        continue;
 
@@ -2574,9 +2574,9 @@ reassemble:
                        bch2_cut_front(move_pos, copy.k);
 
                copy.k->k.p.offset += shift >> 9;
-               bch2_btree_iter_set_pos(dst, bkey_start_pos(&copy.k->k));
+               bch2_btree_iter_set_pos(&dst, bkey_start_pos(&copy.k->k));
 
-               ret = bch2_extent_atomic_end(&trans, dst, copy.k, &atomic_end);
+               ret = bch2_extent_atomic_end(&trans, &dst, copy.k, &atomic_end);
                if (ret)
                        continue;
 
@@ -2594,7 +2594,7 @@ reassemble:
                delete.k.p = copy.k->k.p;
                delete.k.size = copy.k->k.size;
                delete.k.p.offset -= shift >> 9;
-               bch2_btree_iter_set_pos(del, bkey_start_pos(&delete.k));
+               bch2_btree_iter_set_pos(&del, bkey_start_pos(&delete.k));
 
                next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p;
 
@@ -2615,20 +2615,20 @@ reassemble:
                        BUG_ON(ret);
                }
 
-               ret =   bch2_btree_iter_traverse(del) ?:
-                       bch2_trans_update(&trans, del, &delete, trigger_flags) ?:
-                       bch2_trans_update(&trans, dst, copy.k, trigger_flags) ?:
+               ret =   bch2_btree_iter_traverse(&del) ?:
+                       bch2_trans_update(&trans, &del, &delete, trigger_flags) ?:
+                       bch2_trans_update(&trans, &dst, copy.k, trigger_flags) ?:
                        bch2_trans_commit(&trans, &disk_res,
                                          &inode->ei_journal_seq,
                                          BTREE_INSERT_NOFAIL);
                bch2_disk_reservation_put(c, &disk_res);
 
                if (!ret)
-                       bch2_btree_iter_set_pos(src, next_pos);
+                       bch2_btree_iter_set_pos(&src, next_pos);
        }
-       bch2_trans_iter_put(&trans, del);
-       bch2_trans_iter_put(&trans, dst);
-       bch2_trans_iter_put(&trans, src);
+       bch2_trans_iter_exit(&trans, &del);
+       bch2_trans_iter_exit(&trans, &dst);
+       bch2_trans_iter_exit(&trans, &src);
        bch2_trans_exit(&trans);
        bch2_bkey_buf_exit(&copy, c);
 
@@ -2653,18 +2653,18 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
 {
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bpos end_pos = POS(inode->v.i_ino, end_sector);
        unsigned replicas = io_opts(c, &inode->ei_inode).data_replicas;
        int ret = 0;
 
        bch2_trans_init(&trans, c, BTREE_ITER_MAX, 512);
 
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_extents,
+       bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
                        POS(inode->v.i_ino, start_sector),
                        BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
 
-       while (!ret && bkey_cmp(iter->pos, end_pos) < 0) {
+       while (!ret && bkey_cmp(iter.pos, end_pos) < 0) {
                s64 i_sectors_delta = 0;
                struct disk_reservation disk_res = { 0 };
                struct quota_res quota_res = { 0 };
@@ -2674,20 +2674,20 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
 
                bch2_trans_begin(&trans);
 
-               k = bch2_btree_iter_peek_slot(iter);
+               k = bch2_btree_iter_peek_slot(&iter);
                if ((ret = bkey_err(k)))
                        goto bkey_err;
 
                /* already reserved */
                if (k.k->type == KEY_TYPE_reservation &&
                    bkey_s_c_to_reservation(k).v->nr_replicas >= replicas) {
-                       bch2_btree_iter_advance(iter);
+                       bch2_btree_iter_advance(&iter);
                        continue;
                }
 
                if (bkey_extent_is_data(k.k) &&
                    !(mode & FALLOC_FL_ZERO_RANGE)) {
-                       bch2_btree_iter_advance(iter);
+                       bch2_btree_iter_advance(&iter);
                        continue;
                }
 
@@ -2696,7 +2696,7 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
                reservation.k.p         = k.k->p;
                reservation.k.size      = k.k->size;
 
-               bch2_cut_front(iter->pos,       &reservation.k_i);
+               bch2_cut_front(iter.pos,        &reservation.k_i);
                bch2_cut_back(end_pos,          &reservation.k_i);
 
                sectors = reservation.k.size;
@@ -2720,7 +2720,7 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
                        reservation.v.nr_replicas = disk_res.nr_replicas;
                }
 
-               ret = bch2_extent_update(&trans, iter, &reservation.k_i,
+               ret = bch2_extent_update(&trans, &iter, &reservation.k_i,
                                &disk_res, &inode->ei_journal_seq,
                                0, &i_sectors_delta, true);
                i_sectors_acct(c, inode, &quota_res, i_sectors_delta);
@@ -2730,7 +2730,7 @@ bkey_err:
                if (ret == -EINTR)
                        ret = 0;
        }
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
        bch2_trans_exit(&trans);
        return ret;
 }
@@ -3010,7 +3010,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset)
        struct bch_inode_info *inode = file_bch_inode(file);
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        u64 isize, next_data = MAX_LFS_FILESIZE;
        int ret;
@@ -3031,7 +3031,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset)
                } else if (k.k->p.offset >> 9 > isize)
                        break;
        }
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
 
        ret = bch2_trans_exit(&trans) ?: ret;
        if (ret)
@@ -3106,7 +3106,7 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset)
        struct bch_inode_info *inode = file_bch_inode(file);
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        u64 isize, next_hole = MAX_LFS_FILESIZE;
        int ret;
@@ -3135,7 +3135,7 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset)
                        offset = max(offset, bkey_start_offset(k.k) << 9);
                }
        }
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
 
        ret = bch2_trans_exit(&trans) ?: ret;
        if (ret)
index f6c058540712f34dda2d53e4320e4eb1f13a6c51..570ae826ebb52eb44bd97c3d2f85fa7b1d68ced6 100644 (file)
@@ -142,7 +142,7 @@ int __must_check bch2_write_inode(struct bch_fs *c,
                                  void *p, unsigned fields)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter = { NULL };
        struct bch_inode_unpacked inode_u;
        int ret;
 
@@ -150,11 +150,10 @@ int __must_check bch2_write_inode(struct bch_fs *c,
 retry:
        bch2_trans_begin(&trans);
 
-       iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino,
-                              BTREE_ITER_INTENT);
-       ret   = PTR_ERR_OR_ZERO(iter) ?:
+       ret   = bch2_inode_peek(&trans, &iter, &inode_u, inode->v.i_ino,
+                               BTREE_ITER_INTENT) ?:
                (set ? set(inode, &inode_u, p) : 0) ?:
-               bch2_inode_write(&trans, iter, &inode_u) ?:
+               bch2_inode_write(&trans, &iter, &inode_u) ?:
                bch2_trans_commit(&trans, NULL,
                                  &inode->ei_journal_seq,
                                  BTREE_INSERT_NOFAIL);
@@ -166,7 +165,7 @@ retry:
        if (!ret)
                bch2_inode_update_after_write(c, inode, &inode_u, fields);
 
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
 
        if (ret == -EINTR)
                goto retry;
@@ -687,7 +686,7 @@ int bch2_setattr_nonsize(struct mnt_idmap *idmap,
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
        struct bch_qid qid;
        struct btree_trans trans;
-       struct btree_iter *inode_iter;
+       struct btree_iter inode_iter = { NULL };
        struct bch_inode_unpacked inode_u;
        struct posix_acl *acl = NULL;
        int ret;
@@ -713,9 +712,8 @@ retry:
        kfree(acl);
        acl = NULL;
 
-       inode_iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino,
-                                    BTREE_ITER_INTENT);
-       ret = PTR_ERR_OR_ZERO(inode_iter);
+       ret = bch2_inode_peek(&trans, &inode_iter, &inode_u, inode->v.i_ino,
+                             BTREE_ITER_INTENT);
        if (ret)
                goto btree_err;
 
@@ -727,12 +725,12 @@ retry:
                        goto btree_err;
        }
 
-       ret =   bch2_inode_write(&trans, inode_iter, &inode_u) ?:
+       ret =   bch2_inode_write(&trans, &inode_iter, &inode_u) ?:
                bch2_trans_commit(&trans, NULL,
                                  &inode->ei_journal_seq,
                                  BTREE_INSERT_NOFAIL);
 btree_err:
-       bch2_trans_iter_put(&trans, inode_iter);
+       bch2_trans_iter_exit(&trans, &inode_iter);
 
        if (ret == -EINTR)
                goto retry;
@@ -883,7 +881,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
        struct bch_fs *c = vinode->i_sb->s_fs_info;
        struct bch_inode_info *ei = to_bch_ei(vinode);
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        struct bkey_buf cur, prev;
        struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
@@ -902,23 +900,23 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
        bch2_bkey_buf_init(&prev);
        bch2_trans_init(&trans, c, 0, 0);
 
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_extents,
-                                  POS(ei->v.i_ino, start >> 9), 0);
+       bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
+                            POS(ei->v.i_ino, start >> 9), 0);
 retry:
        bch2_trans_begin(&trans);
 
-       while ((k = bch2_btree_iter_peek(iter)).k &&
+       while ((k = bch2_btree_iter_peek(&iter)).k &&
               !(ret = bkey_err(k)) &&
-              bkey_cmp(iter->pos, end) < 0) {
+              bkey_cmp(iter.pos, end) < 0) {
                enum btree_id data_btree = BTREE_ID_extents;
 
                if (!bkey_extent_is_data(k.k) &&
                    k.k->type != KEY_TYPE_reservation) {
-                       bch2_btree_iter_advance(iter);
+                       bch2_btree_iter_advance(&iter);
                        continue;
                }
 
-               offset_into_extent      = iter->pos.offset -
+               offset_into_extent      = iter.pos.offset -
                        bkey_start_offset(k.k);
                sectors                 = k.k->size - offset_into_extent;
 
@@ -939,7 +937,7 @@ retry:
                                   offset_into_extent),
                               cur.k);
                bch2_key_resize(&cur.k->k, sectors);
-               cur.k->k.p = iter->pos;
+               cur.k->k.p = iter.pos;
                cur.k->k.p.offset += cur.k->k.size;
 
                if (have_extent) {
@@ -952,8 +950,8 @@ retry:
                bkey_copy(prev.k, cur.k);
                have_extent = true;
 
-               bch2_btree_iter_set_pos(iter,
-                       POS(iter->pos.inode, iter->pos.offset + sectors));
+               bch2_btree_iter_set_pos(&iter,
+                       POS(iter.pos.inode, iter.pos.offset + sectors));
        }
 
        if (ret == -EINTR)
@@ -963,7 +961,7 @@ retry:
                ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k),
                                       FIEMAP_EXTENT_LAST);
 
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
        ret = bch2_trans_exit(&trans) ?: ret;
        bch2_bkey_buf_exit(&cur, c);
        bch2_bkey_buf_exit(&prev, c);
index 36eba46d566e351c1b50dec637bc02f1ff2a7e17..eb979e79eaac9c226ff90c70ef912395d4506448 100644 (file)
@@ -19,7 +19,7 @@
 
 static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum)
 {
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        u64 sectors = 0;
        int ret;
@@ -33,7 +33,7 @@ static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum)
                        sectors += k.k->size;
        }
 
-       bch2_trans_iter_free(trans, iter);
+       bch2_trans_iter_exit(trans, &iter);
 
        return ret ?: sectors;
 }
@@ -42,24 +42,24 @@ static int __lookup_inode(struct btree_trans *trans, u64 inode_nr,
                          struct bch_inode_unpacked *inode,
                          u32 *snapshot)
 {
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        int ret;
 
-       iter = bch2_trans_get_iter(trans, BTREE_ID_inodes,
-                       POS(0, inode_nr), 0);
-       k = bch2_btree_iter_peek_slot(iter);
+       bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes,
+                            POS(0, inode_nr), 0);
+       k = bch2_btree_iter_peek_slot(&iter);
        ret = bkey_err(k);
        if (ret)
                goto err;
 
        if (snapshot)
-               *snapshot = iter->pos.snapshot;
+               *snapshot = iter.pos.snapshot;
        ret = k.k->type == KEY_TYPE_inode
                ? bch2_inode_unpack(bkey_s_c_to_inode(k), inode)
                : -ENOENT;
 err:
-       bch2_trans_iter_free(trans, iter);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
@@ -74,13 +74,16 @@ static int __write_inode(struct btree_trans *trans,
                         struct bch_inode_unpacked *inode,
                         u32 snapshot)
 {
-       struct btree_iter *inode_iter =
-               bch2_trans_get_iter(trans, BTREE_ID_inodes,
-                                   SPOS(0, inode->bi_inum, snapshot),
-                                   BTREE_ITER_INTENT);
-       int ret = bch2_btree_iter_traverse(inode_iter) ?:
-               bch2_inode_write(trans, inode_iter, inode);
-       bch2_trans_iter_put(trans, inode_iter);
+       struct btree_iter iter;
+       int ret;
+
+       bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes,
+                           SPOS(0, inode->bi_inum, snapshot),
+                           BTREE_ITER_INTENT);
+
+       ret   = bch2_btree_iter_traverse(&iter) ?:
+               bch2_inode_write(trans, &iter, inode);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
@@ -100,7 +103,7 @@ static int write_inode(struct btree_trans *trans,
 static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
 {
        struct bch_fs *c = trans->c;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bch_inode_unpacked dir_inode;
        struct bch_hash_info dir_hash_info;
        int ret;
@@ -111,11 +114,11 @@ static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
 
        dir_hash_info = bch2_hash_info_init(c, &dir_inode);
 
-       iter = bch2_trans_get_iter(trans, BTREE_ID_dirents, pos, BTREE_ITER_INTENT);
+       bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_INTENT);
 
        ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
-                                 &dir_hash_info, iter);
-       bch2_trans_iter_put(trans, iter);
+                                 &dir_hash_info, &iter);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
@@ -230,13 +233,13 @@ static int reattach_inode(struct btree_trans *trans,
 static int remove_backpointer(struct btree_trans *trans,
                              struct bch_inode_unpacked *inode)
 {
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        int ret;
 
-       iter = bch2_trans_get_iter(trans, BTREE_ID_dirents,
-                                  POS(inode->bi_dir, inode->bi_dir_offset), 0);
-       k = bch2_btree_iter_peek_slot(iter);
+       bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents,
+                            POS(inode->bi_dir, inode->bi_dir_offset), 0);
+       k = bch2_btree_iter_peek_slot(&iter);
        ret = bkey_err(k);
        if (ret)
                goto out;
@@ -247,7 +250,7 @@ static int remove_backpointer(struct btree_trans *trans,
 
        ret = remove_dirent(trans, k.k->p);
 out:
-       bch2_trans_iter_put(trans, iter);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
@@ -343,7 +346,7 @@ static int hash_check_key(struct btree_trans *trans,
                          struct btree_iter *k_iter, struct bkey_s_c hash_k)
 {
        struct bch_fs *c = trans->c;
-       struct btree_iter *iter = NULL;
+       struct btree_iter iter = { NULL };
        char buf[200];
        struct bkey_s_c k;
        u64 hash;
@@ -378,12 +381,12 @@ static int hash_check_key(struct btree_trans *trans,
                }
 
                if (bkey_deleted(k.k)) {
-                       bch2_trans_iter_free(trans, iter);
+                       bch2_trans_iter_exit(trans, &iter);
                        goto bad_hash;
                }
 
        }
-       bch2_trans_iter_free(trans, iter);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 bad_hash:
        if (fsck_err(c, "hash table key at wrong offset: btree %u inode %llu offset %llu, "
@@ -513,7 +516,7 @@ noinline_for_stack
 static int check_inodes(struct bch_fs *c, bool full)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        struct bkey_s_c_inode inode;
        int ret;
@@ -532,12 +535,12 @@ static int check_inodes(struct bch_fs *c, bool full)
                    (inode.v->bi_flags & (BCH_INODE_I_SIZE_DIRTY|
                                          BCH_INODE_I_SECTORS_DIRTY|
                                          BCH_INODE_UNLINKED))) {
-                       ret = check_inode(&trans, iter, inode);
+                       ret = check_inode(&trans, &iter, inode);
                        if (ret)
                                break;
                }
        }
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
 
        BUG_ON(ret == -EINTR);
 
@@ -547,7 +550,7 @@ static int check_inodes(struct bch_fs *c, bool full)
 static int fix_overlapping_extent(struct btree_trans *trans,
                                       struct bkey_s_c k, struct bpos cut_at)
 {
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_i *u;
        int ret;
 
@@ -567,29 +570,29 @@ static int fix_overlapping_extent(struct btree_trans *trans,
         * assume things about extent overwrites - we should be running the
         * triggers manually here
         */
-       iter = bch2_trans_get_iter(trans, BTREE_ID_extents, u->k.p,
-                                  BTREE_ITER_INTENT|BTREE_ITER_NOT_EXTENTS);
+       bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, u->k.p,
+                            BTREE_ITER_INTENT|BTREE_ITER_NOT_EXTENTS);
 
-       BUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
-       ret   = bch2_btree_iter_traverse(iter) ?:
-               bch2_trans_update(trans, iter, u, BTREE_TRIGGER_NORUN) ?:
+       BUG_ON(iter.flags & BTREE_ITER_IS_EXTENTS);
+       ret   = bch2_btree_iter_traverse(&iter) ?:
+               bch2_trans_update(trans, &iter, u, BTREE_TRIGGER_NORUN) ?:
                bch2_trans_commit(trans, NULL, NULL,
                                  BTREE_INSERT_NOFAIL|
                                  BTREE_INSERT_LAZY_RW);
-       bch2_trans_iter_put(trans, iter);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
 static int inode_backpointer_exists(struct btree_trans *trans,
                                    struct bch_inode_unpacked *inode)
 {
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        int ret;
 
-       iter = bch2_trans_get_iter(trans, BTREE_ID_dirents,
-                                  POS(inode->bi_dir, inode->bi_dir_offset), 0);
-       k = bch2_btree_iter_peek_slot(iter);
+       bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents,
+                            POS(inode->bi_dir, inode->bi_dir_offset), 0);
+       k = bch2_btree_iter_peek_slot(&iter);
        ret = bkey_err(k);
        if (ret)
                goto out;
@@ -598,7 +601,7 @@ static int inode_backpointer_exists(struct btree_trans *trans,
 
        ret = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum) == inode->bi_inum;
 out:
-       bch2_trans_iter_free(trans, iter);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
@@ -618,7 +621,7 @@ static int check_extents(struct bch_fs *c)
 {
        struct inode_walker w = inode_walker_init();
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        struct bkey_buf prev;
        u64 i_sectors = 0;
@@ -630,12 +633,12 @@ static int check_extents(struct bch_fs *c)
 
        bch_verbose(c, "checking extents");
 
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_extents,
-                                  POS(BCACHEFS_ROOT_INO, 0),
-                                  BTREE_ITER_INTENT|
-                                  BTREE_ITER_PREFETCH);
+       bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
+                            POS(BCACHEFS_ROOT_INO, 0),
+                            BTREE_ITER_INTENT|
+                            BTREE_ITER_PREFETCH);
 retry:
-       while ((k = bch2_btree_iter_peek(iter)).k &&
+       while ((k = bch2_btree_iter_peek(&iter)).k &&
               !(ret = bkey_err(k))) {
                if (w.have_inode &&
                    w.cur_inum != k.k->p.inode &&
@@ -700,12 +703,12 @@ retry:
                        i_sectors += k.k->size;
                bch2_bkey_buf_reassemble(&prev, c, k);
 
-               bch2_btree_iter_advance(iter);
+               bch2_btree_iter_advance(&iter);
        }
 fsck_err:
        if (ret == -EINTR)
                goto retry;
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
        bch2_bkey_buf_exit(&prev, c);
        return bch2_trans_exit(&trans) ?: ret;
 }
@@ -890,7 +893,7 @@ static int check_dirents(struct bch_fs *c)
        struct inode_walker w = inode_walker_init();
        struct bch_hash_info hash_info;
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        unsigned nr_subdirs = 0;
        int ret = 0;
 
@@ -898,18 +901,18 @@ static int check_dirents(struct bch_fs *c)
 
        bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_dirents,
-                                  POS(BCACHEFS_ROOT_INO, 0),
-                                  BTREE_ITER_INTENT|
-                                  BTREE_ITER_PREFETCH);
+       bch2_trans_iter_init(&trans, &iter, BTREE_ID_dirents,
+                            POS(BCACHEFS_ROOT_INO, 0),
+                            BTREE_ITER_INTENT|
+                            BTREE_ITER_PREFETCH);
 
        do {
                ret = lockrestart_do(&trans,
-                               check_dirent(&trans, iter, &hash_info, &w, &nr_subdirs));
+                               check_dirent(&trans, &iter, &hash_info, &w, &nr_subdirs));
                if (ret)
                        break;
-       } while (bch2_btree_iter_advance(iter));
-       bch2_trans_iter_put(&trans, iter);
+       } while (bch2_btree_iter_advance(&iter));
+       bch2_trans_iter_exit(&trans, &iter);
 
        return bch2_trans_exit(&trans) ?: ret;
 }
@@ -923,7 +926,7 @@ static int check_xattrs(struct bch_fs *c)
        struct inode_walker w = inode_walker_init();
        struct bch_hash_info hash_info;
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        int ret = 0;
 
@@ -931,12 +934,12 @@ static int check_xattrs(struct bch_fs *c)
 
        bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs,
-                                  POS(BCACHEFS_ROOT_INO, 0),
-                                  BTREE_ITER_INTENT|
-                                  BTREE_ITER_PREFETCH);
+       bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs,
+                            POS(BCACHEFS_ROOT_INO, 0),
+                            BTREE_ITER_INTENT|
+                            BTREE_ITER_PREFETCH);
 retry:
-       while ((k = bch2_btree_iter_peek(iter)).k &&
+       while ((k = bch2_btree_iter_peek(&iter)).k &&
               !(ret = bkey_err(k))) {
                ret = walk_inode(&trans, &w, k.k->p.inode);
                if (ret)
@@ -945,7 +948,7 @@ retry:
                if (fsck_err_on(!w.have_inode, c,
                                "xattr for missing inode %llu",
                                k.k->p.inode)) {
-                       ret = bch2_btree_delete_at(&trans, iter, 0);
+                       ret = bch2_btree_delete_at(&trans, &iter, 0);
                        if (ret)
                                break;
                        continue;
@@ -955,17 +958,17 @@ retry:
                        hash_info = bch2_hash_info_init(c, &w.inode);
 
                ret = hash_check_key(&trans, bch2_xattr_hash_desc,
-                                    &hash_info, iter, k);
+                                    &hash_info, &iter, k);
                if (ret)
                        break;
 
-               bch2_btree_iter_advance(iter);
+               bch2_btree_iter_advance(&iter);
        }
 fsck_err:
        if (ret == -EINTR)
                goto retry;
 
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
        return bch2_trans_exit(&trans) ?: ret;
 }
 
@@ -1114,7 +1117,7 @@ fsck_err:
 static int check_directory_structure(struct bch_fs *c)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        struct bch_inode_unpacked u;
        struct pathbuf path = { 0, 0, NULL };
@@ -1139,7 +1142,7 @@ static int check_directory_structure(struct bch_fs *c)
                if (ret)
                        break;
        }
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
 
        BUG_ON(ret == -EINTR);
 
@@ -1215,7 +1218,7 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c,
                                       u64 start, u64 *end)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        struct bkey_s_c_inode inode;
        struct bch_inode_unpacked u;
@@ -1253,7 +1256,7 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c,
                }
 
        }
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
        bch2_trans_exit(&trans);
 
        if (ret)
@@ -1267,7 +1270,7 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links
                                     u64 range_start, u64 range_end)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        struct bkey_s_c_dirent d;
        int ret;
@@ -1289,7 +1292,7 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links
 
                bch2_trans_cond_resched(&trans);
        }
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
 
        ret = bch2_trans_exit(&trans) ?: ret;
        if (ret)
@@ -1304,7 +1307,7 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c,
                               u64 range_start, u64 range_end)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        struct bkey_s_c_inode inode;
        struct bch_inode_unpacked u;
@@ -1346,14 +1349,14 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c,
                        ret = __bch2_trans_do(&trans, NULL, NULL,
                                              BTREE_INSERT_NOFAIL|
                                              BTREE_INSERT_LAZY_RW,
-                                             bch2_btree_iter_traverse(iter) ?:
-                                       bch2_inode_write(&trans, iter, &u));
+                                             bch2_btree_iter_traverse(&iter) ?:
+                                       bch2_inode_write(&trans, &iter, &u));
                        if (ret)
                                bch_err(c, "error in fsck: error %i updating inode", ret);
                }
        }
 fsck_err:
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
        bch2_trans_exit(&trans);
 
        if (ret)
index 63f50891594cf06b10676eb0aa763f8eeedf0de4..2b653ee03f4fa9be4e6033dc8aa205a395ca4fce 100644 (file)
@@ -292,18 +292,18 @@ int bch2_inode_unpack(struct bkey_s_c_inode inode,
        return 0;
 }
 
-struct btree_iter *bch2_inode_peek(struct btree_trans *trans,
-                                  struct bch_inode_unpacked *inode,
-                                  u64 inum, unsigned flags)
+int bch2_inode_peek(struct btree_trans *trans,
+                   struct btree_iter *iter,
+                   struct bch_inode_unpacked *inode,
+                   u64 inum, unsigned flags)
 {
-       struct btree_iter *iter;
        struct bkey_s_c k;
        int ret;
 
        if (trans->c->opts.inodes_use_key_cache)
                flags |= BTREE_ITER_CACHED;
 
-       iter = bch2_trans_get_iter(trans, BTREE_ID_inodes, POS(0, inum), flags);
+       bch2_trans_iter_init(trans, iter, BTREE_ID_inodes, POS(0, inum), flags);
        k = bch2_btree_iter_peek_slot(iter);
        ret = bkey_err(k);
        if (ret)
@@ -317,10 +317,10 @@ struct btree_iter *bch2_inode_peek(struct btree_trans *trans,
        if (ret)
                goto err;
 
-       return iter;
+       return 0;
 err:
-       bch2_trans_iter_put(trans, iter);
-       return ERR_PTR(ret);
+       bch2_trans_iter_exit(trans, iter);
+       return ret;
 }
 
 int bch2_inode_write(struct btree_trans *trans,
@@ -482,12 +482,12 @@ static inline u32 bkey_generation(struct bkey_s_c k)
        }
 }
 
-struct btree_iter *bch2_inode_create(struct btree_trans *trans,
-                                    struct bch_inode_unpacked *inode_u,
-                                    u32 snapshot, u64 cpu)
+int bch2_inode_create(struct btree_trans *trans,
+                     struct btree_iter *iter,
+                     struct bch_inode_unpacked *inode_u,
+                     u32 snapshot, u64 cpu)
 {
        struct bch_fs *c = trans->c;
-       struct btree_iter *iter = NULL;
        struct bkey_s_c k;
        u64 min, max, start, pos, *hint;
        int ret = 0;
@@ -513,9 +513,9 @@ struct btree_iter *bch2_inode_create(struct btree_trans *trans,
                start = min;
 
        pos = start;
-       iter = bch2_trans_get_iter(trans, BTREE_ID_inodes, POS(0, pos),
-                                  BTREE_ITER_ALL_SNAPSHOTS|
-                                  BTREE_ITER_INTENT);
+       bch2_trans_iter_init(trans, iter, BTREE_ID_inodes, POS(0, pos),
+                            BTREE_ITER_ALL_SNAPSHOTS|
+                            BTREE_ITER_INTENT);
 again:
        while ((k = bch2_btree_iter_peek(iter)).k &&
               !(ret = bkey_err(k)) &&
@@ -553,8 +553,8 @@ again:
                ret = -ENOSPC;
 
        if (ret) {
-               bch2_trans_iter_put(trans, iter);
-               return ERR_PTR(ret);
+               bch2_trans_iter_exit(trans, iter);
+               return ret;
        }
 
        /* Retry from start */
@@ -566,8 +566,8 @@ found_slot:
        k = bch2_btree_iter_peek_slot(iter);
        ret = bkey_err(k);
        if (ret) {
-               bch2_trans_iter_put(trans, iter);
-               return ERR_PTR(ret);
+               bch2_trans_iter_exit(trans, iter);
+               return ret;
        }
 
        /* We may have raced while the iterator wasn't pointing at pos: */
@@ -578,13 +578,13 @@ found_slot:
        *hint                   = k.k->p.offset;
        inode_u->bi_inum        = k.k->p.offset;
        inode_u->bi_generation  = bkey_generation(k);
-       return iter;
+       return 0;
 }
 
 int bch2_inode_rm(struct bch_fs *c, u64 inode_nr, bool cached)
 {
        struct btree_trans trans;
-       struct btree_iter *iter = NULL;
+       struct btree_iter iter = { NULL };
        struct bkey_i_inode_generation delete;
        struct bpos start = POS(inode_nr, 0);
        struct bpos end = POS(inode_nr + 1, 0);
@@ -617,9 +617,9 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr, bool cached)
 retry:
        bch2_trans_begin(&trans);
 
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_inodes,
-                                  POS(0, inode_nr), iter_flags);
-       k = bch2_btree_iter_peek_slot(iter);
+       bch2_trans_iter_init(&trans, &iter, BTREE_ID_inodes,
+                            POS(0, inode_nr), iter_flags);
+       k = bch2_btree_iter_peek_slot(&iter);
 
        ret = bkey_err(k);
        if (ret)
@@ -636,14 +636,14 @@ retry:
        bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u);
 
        bkey_inode_generation_init(&delete.k_i);
-       delete.k.p = iter->pos;
+       delete.k.p = iter.pos;
        delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1);
 
-       ret   = bch2_trans_update(&trans, iter, &delete.k_i, 0) ?:
+       ret   = bch2_trans_update(&trans, &iter, &delete.k_i, 0) ?:
                bch2_trans_commit(&trans, NULL, NULL,
                                BTREE_INSERT_NOFAIL);
 err:
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
        if (ret == -EINTR)
                goto retry;
 
@@ -654,12 +654,11 @@ err:
 static int bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr,
                                         struct bch_inode_unpacked *inode)
 {
-       struct btree_iter *iter;
+       struct btree_iter iter = { NULL };
        int ret;
 
-       iter = bch2_inode_peek(trans, inode, inode_nr, 0);
-       ret = PTR_ERR_OR_ZERO(iter);
-       bch2_trans_iter_put(trans, iter);
+       ret = bch2_inode_peek(trans, &iter, inode, inode_nr, 0);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
index d67af4f56f05b1f7e366e5f4bb036efd54751ffe..25bef104ebcc5a692a6dcc54c2b105697fadb294 100644 (file)
@@ -57,8 +57,8 @@ int bch2_inode_unpack(struct bkey_s_c_inode, struct bch_inode_unpacked *);
 
 void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *);
 
-struct btree_iter *bch2_inode_peek(struct btree_trans *,
-                       struct bch_inode_unpacked *, u64, unsigned);
+int bch2_inode_peek(struct btree_trans *, struct btree_iter *,
+                   struct bch_inode_unpacked *, u64, unsigned);
 int bch2_inode_write(struct btree_trans *, struct btree_iter *,
                     struct bch_inode_unpacked *);
 
@@ -71,8 +71,8 @@ void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *,
                     uid_t, gid_t, umode_t, dev_t,
                     struct bch_inode_unpacked *);
 
-struct btree_iter *bch2_inode_create(struct btree_trans *,
-                                    struct bch_inode_unpacked *, u32, u64);
+int bch2_inode_create(struct btree_trans *, struct btree_iter *,
+                     struct bch_inode_unpacked *, u32, u64);
 
 int bch2_inode_rm(struct bch_fs *, u64, bool);
 
index 34295419190dc0c9e75a5b49dbff34746407e2f2..bee33258c0d80e8ccda7c3c086ce5d49d7f3ac7a 100644 (file)
@@ -202,7 +202,7 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans,
                               s64 *disk_sectors_delta)
 {
        struct bch_fs *c = trans->c;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c old;
        unsigned new_replicas = bch2_bkey_replicas(c, bkey_i_to_s_c(new));
        bool new_compressed = bch2_bkey_sectors_compressed(bkey_i_to_s_c(new));
@@ -213,7 +213,7 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans,
        *i_sectors_delta        = 0;
        *disk_sectors_delta     = 0;
 
-       iter = bch2_trans_copy_iter(trans, extent_iter);
+       bch2_trans_copy_iter(&iter, extent_iter);
 
        for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, old, ret) {
                s64 sectors = min(new->k.p.offset, old.k->p.offset) -
@@ -246,7 +246,7 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans,
                         * less:
                         */
                        if (!bkey_cmp(old.k->p, new->k.p)) {
-                               old = bch2_btree_iter_next(iter);
+                               old = bch2_btree_iter_next(&iter);
                                ret = bkey_err(old);
                                if (ret)
                                        break;
@@ -261,7 +261,7 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans,
                }
        }
 
-       bch2_trans_iter_put(trans, iter);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
@@ -311,12 +311,11 @@ int bch2_extent_update(struct btree_trans *trans,
                : 0;
 
        if (i_sectors_delta || new_i_size) {
-               struct btree_iter *inode_iter;
+               struct btree_iter inode_iter;
                struct bch_inode_unpacked inode_u;
 
-               inode_iter = bch2_inode_peek(trans, &inode_u,
+               ret = bch2_inode_peek(trans, &inode_iter, &inode_u,
                                k->k.p.inode, BTREE_ITER_INTENT);
-               ret = PTR_ERR_OR_ZERO(inode_iter);
                if (ret)
                        return ret;
 
@@ -345,11 +344,11 @@ int bch2_extent_update(struct btree_trans *trans,
 
                        inode_p.inode.k.p.snapshot = iter->snapshot;
 
-                       ret = bch2_trans_update(trans, inode_iter,
+                       ret = bch2_trans_update(trans, &inode_iter,
                                          &inode_p.inode.k_i, 0);
                }
 
-               bch2_trans_iter_put(trans, inode_iter);
+               bch2_trans_iter_exit(trans, &inode_iter);
 
                if (ret)
                        return ret;
@@ -424,18 +423,18 @@ int bch2_fpunch(struct bch_fs *c, u64 inum, u64 start, u64 end,
                u64 *journal_seq, s64 *i_sectors_delta)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        int ret = 0;
 
        bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_extents,
+       bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
                                   POS(inum, start),
                                   BTREE_ITER_INTENT);
 
-       ret = bch2_fpunch_at(&trans, iter, POS(inum, end),
+       ret = bch2_fpunch_at(&trans, &iter, POS(inum, end),
                             journal_seq, i_sectors_delta);
 
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
        bch2_trans_exit(&trans);
 
        if (ret == -EINTR)
@@ -451,28 +450,28 @@ static int bch2_write_index_default(struct bch_write_op *op)
        struct keylist *keys = &op->insert_keys;
        struct bkey_i *k = bch2_keylist_front(keys);
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        int ret;
 
        bch2_bkey_buf_init(&sk);
        bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
 
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_extents,
-                                  bkey_start_pos(&k->k),
-                                  BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+       bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
+                            bkey_start_pos(&k->k),
+                            BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
 
        do {
                bch2_trans_begin(&trans);
 
                k = bch2_keylist_front(keys);
 
-               k->k.p.snapshot = iter->snapshot;
+               k->k.p.snapshot = iter.snapshot;
 
                bch2_bkey_buf_realloc(&sk, c, k->k.u64s);
                bkey_copy(sk.k, k);
-               bch2_cut_front(iter->pos, sk.k);
+               bch2_cut_front(iter.pos, sk.k);
 
-               ret = bch2_extent_update(&trans, iter, sk.k,
+               ret = bch2_extent_update(&trans, &iter, sk.k,
                                         &op->res, op_journal_seq(op),
                                         op->new_i_size, &op->i_sectors_delta,
                                         op->flags & BCH_WRITE_CHECK_ENOSPC);
@@ -481,11 +480,11 @@ static int bch2_write_index_default(struct bch_write_op *op)
                if (ret)
                        break;
 
-               if (bkey_cmp(iter->pos, k->k.p) >= 0)
+               if (bkey_cmp(iter.pos, k->k.p) >= 0)
                        bch2_keylist_pop_front(keys);
        } while (!bch2_keylist_empty(keys));
 
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
        bch2_trans_exit(&trans);
        bch2_bkey_buf_exit(&sk, c);
 
@@ -1638,7 +1637,7 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio
                                     unsigned flags)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_buf sk;
        struct bkey_s_c k;
        int ret;
@@ -1649,12 +1648,12 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio
        bch2_bkey_buf_init(&sk);
        bch2_trans_init(&trans, c, 0, 0);
 
-       iter = bch2_trans_get_iter(&trans, rbio->data_btree,
-                                  rbio->read_pos, BTREE_ITER_SLOTS);
+       bch2_trans_iter_init(&trans, &iter, rbio->data_btree,
+                            rbio->read_pos, BTREE_ITER_SLOTS);
 retry:
        rbio->bio.bi_status = 0;
 
-       k = bch2_btree_iter_peek_slot(iter);
+       k = bch2_btree_iter_peek_slot(&iter);
        if (bkey_err(k))
                goto err;
 
@@ -1681,7 +1680,7 @@ retry:
                goto err;
 out:
        bch2_rbio_done(rbio);
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
        bch2_trans_exit(&trans);
        bch2_bkey_buf_exit(&sk, c);
        return;
@@ -1747,7 +1746,7 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
        struct bch_fs *c = rbio->c;
        u64 data_offset = rbio->data_pos.offset - rbio->pick.crc.offset;
        struct bch_extent_crc_unpacked new_crc;
-       struct btree_iter *iter = NULL;
+       struct btree_iter iter;
        struct bkey_i *new;
        struct bkey_s_c k;
        int ret = 0;
@@ -1755,9 +1754,9 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
        if (crc_is_compressed(rbio->pick.crc))
                return 0;
 
-       iter = bch2_trans_get_iter(trans, rbio->data_btree, rbio->data_pos,
-                                  BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-       k = bch2_btree_iter_peek_slot(iter);
+       bch2_trans_iter_init(trans, &iter, rbio->data_btree, rbio->data_pos,
+                            BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+       k = bch2_btree_iter_peek_slot(&iter);
        if ((ret = bkey_err(k)))
                goto out;
 
@@ -1792,9 +1791,9 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
        if (!bch2_bkey_narrow_crcs(new, new_crc))
                goto out;
 
-       ret = bch2_trans_update(trans, iter, new, 0);
+       ret = bch2_trans_update(trans, &iter, new, 0);
 out:
-       bch2_trans_iter_put(trans, iter);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
@@ -1965,7 +1964,7 @@ int __bch2_read_indirect_extent(struct btree_trans *trans,
                                unsigned *offset_into_extent,
                                struct bkey_buf *orig_k)
 {
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        u64 reflink_offset;
        int ret;
@@ -1973,10 +1972,10 @@ int __bch2_read_indirect_extent(struct btree_trans *trans,
        reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k->k)->v.idx) +
                *offset_into_extent;
 
-       iter = bch2_trans_get_iter(trans, BTREE_ID_reflink,
-                                  POS(0, reflink_offset),
-                                  BTREE_ITER_SLOTS);
-       k = bch2_btree_iter_peek_slot(iter);
+       bch2_trans_iter_init(trans, &iter, BTREE_ID_reflink,
+                            POS(0, reflink_offset),
+                            BTREE_ITER_SLOTS);
+       k = bch2_btree_iter_peek_slot(&iter);
        ret = bkey_err(k);
        if (ret)
                goto err;
@@ -1993,10 +1992,10 @@ int __bch2_read_indirect_extent(struct btree_trans *trans,
                goto err;
        }
 
-       *offset_into_extent = iter->pos.offset - bkey_start_offset(k.k);
+       *offset_into_extent = iter.pos.offset - bkey_start_offset(k.k);
        bch2_bkey_buf_reassemble(orig_k, trans->c, k);
 err:
-       bch2_trans_iter_put(trans, iter);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
@@ -2273,7 +2272,7 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
                 struct bch_io_failures *failed, unsigned flags)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_buf sk;
        struct bkey_s_c k;
        int ret;
@@ -2282,10 +2281,9 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
 
        bch2_bkey_buf_init(&sk);
        bch2_trans_init(&trans, c, 0, 0);
-
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_extents,
-                                  POS(inode, bvec_iter.bi_sector),
-                                  BTREE_ITER_SLOTS);
+       bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
+                            POS(inode, bvec_iter.bi_sector),
+                            BTREE_ITER_SLOTS);
 retry:
        bch2_trans_begin(&trans);
 
@@ -2302,15 +2300,15 @@ retry:
                        break;
                }
 
-               bch2_btree_iter_set_pos(iter,
+               bch2_btree_iter_set_pos(&iter,
                                POS(inode, bvec_iter.bi_sector));
 
-               k = bch2_btree_iter_peek_slot(iter);
+               k = bch2_btree_iter_peek_slot(&iter);
                ret = bkey_err(k);
                if (ret)
                        break;
 
-               offset_into_extent = iter->pos.offset -
+               offset_into_extent = iter.pos.offset -
                        bkey_start_offset(k.k);
                sectors = k.k->size - offset_into_extent;
 
@@ -2341,7 +2339,7 @@ retry:
                if (bvec_iter.bi_size == bytes)
                        flags |= BCH_READ_LAST_FRAGMENT;
 
-               ret = __bch2_read_extent(&trans, rbio, bvec_iter, iter->pos,
+               ret = __bch2_read_extent(&trans, rbio, bvec_iter, iter.pos,
                                         data_btree, k,
                                         offset_into_extent, failed, flags);
                if (ret)
@@ -2357,7 +2355,7 @@ retry:
        if (ret == -EINTR || ret == READ_RETRY || ret == READ_RETRY_AVOID)
                goto retry;
 
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
        bch2_trans_exit(&trans);
        bch2_bkey_buf_exit(&sk, c);
 
index f2060f903cbcf90489de1712511c2925b69d7198..68fb2ebd91ac14dc5988d972e99d213e7ba5c2bc 100644 (file)
@@ -250,7 +250,7 @@ void bch2_blacklist_entries_gc(struct work_struct *work)
        bch2_trans_init(&trans, c, 0, 0);
 
        for (i = 0; i < BTREE_ID_NR; i++) {
-               struct btree_iter *iter;
+               struct btree_iter iter;
                struct btree *b;
 
                for_each_btree_node(&trans, iter, i, POS_MIN,
@@ -259,7 +259,7 @@ void bch2_blacklist_entries_gc(struct work_struct *work)
                                bch2_trans_exit(&trans);
                                return;
                        }
-               bch2_trans_iter_free(&trans, iter);
+               bch2_trans_iter_exit(&trans, &iter);
        }
 
        ret = bch2_trans_exit(&trans);
index 1f65eca48c6ef48d20c033d119a04f0215f7a607..1899326d9754eeebdf9c850ace262b2eb25fe16a 100644 (file)
@@ -39,7 +39,7 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
                                   enum btree_id btree_id)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        struct bkey_buf sk;
        int ret = 0;
@@ -47,13 +47,13 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
        bch2_bkey_buf_init(&sk);
        bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
-       iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN,
-                                  BTREE_ITER_PREFETCH);
+       bch2_trans_iter_init(&trans, &iter, btree_id, POS_MIN,
+                            BTREE_ITER_PREFETCH);
 
-       while ((k = bch2_btree_iter_peek(iter)).k &&
+       while ((k = bch2_btree_iter_peek(&iter)).k &&
               !(ret = bkey_err(k))) {
                if (!bch2_bkey_has_device(k, dev_idx)) {
-                       bch2_btree_iter_advance(iter);
+                       bch2_btree_iter_advance(&iter);
                        continue;
                }
 
@@ -71,10 +71,10 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
                 */
                bch2_extent_normalize(c, bkey_i_to_s(sk.k));
 
-               bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k));
+               bch2_btree_iter_set_pos(&iter, bkey_start_pos(&sk.k->k));
 
-               ret   = bch2_btree_iter_traverse(iter) ?:
-                       bch2_trans_update(&trans, iter, sk.k, 0) ?:
+               ret   = bch2_btree_iter_traverse(&iter) ?:
+                       bch2_trans_update(&trans, &iter, sk.k, 0) ?:
                        bch2_trans_commit(&trans, NULL, NULL,
                                        BTREE_INSERT_NOFAIL);
 
@@ -88,7 +88,7 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
                if (ret)
                        break;
        }
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
 
        ret = bch2_trans_exit(&trans) ?: ret;
        bch2_bkey_buf_exit(&sk, c);
@@ -107,7 +107,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
 static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct closure cl;
        struct btree *b;
        struct bkey_buf k;
@@ -139,9 +139,9 @@ retry:
                                break;
                        }
 
-                       ret = bch2_btree_node_update_key(&trans, iter, b, k.k, false);
+                       ret = bch2_btree_node_update_key(&trans, &iter, b, k.k, false);
                        if (ret == -EINTR) {
-                               b = bch2_btree_iter_peek_node(iter);
+                               b = bch2_btree_iter_peek_node(&iter);
                                ret = 0;
                                goto retry;
                        }
@@ -150,7 +150,7 @@ retry:
                                break;
                        }
                }
-               bch2_trans_iter_free(&trans, iter);
+               bch2_trans_iter_exit(&trans, &iter);
 
                if (ret)
                        goto err;
index 3c2e566beb2dd769a68fbe4f3663b1ddcf318bcb..eb2b91f7e68222205b6f28cfe7869df2282478e8 100644 (file)
@@ -56,7 +56,7 @@ int bch2_migrate_index_update(struct bch_write_op *op)
 {
        struct bch_fs *c = op->c;
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct migrate_write *m =
                container_of(op, struct migrate_write, op);
        struct keylist *keys = &op->insert_keys;
@@ -69,9 +69,9 @@ int bch2_migrate_index_update(struct bch_write_op *op)
 
        bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
 
-       iter = bch2_trans_get_iter(&trans, m->btree_id,
-                                  bkey_start_pos(&bch2_keylist_front(keys)->k),
-                                  BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+       bch2_trans_iter_init(&trans, &iter, m->btree_id,
+                            bkey_start_pos(&bch2_keylist_front(keys)->k),
+                            BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
 
        while (1) {
                struct bkey_s_c k;
@@ -86,7 +86,7 @@ int bch2_migrate_index_update(struct bch_write_op *op)
 
                bch2_trans_begin(&trans);
 
-               k = bch2_btree_iter_peek_slot(iter);
+               k = bch2_btree_iter_peek_slot(&iter);
                ret = bkey_err(k);
                if (ret)
                        goto err;
@@ -102,9 +102,9 @@ int bch2_migrate_index_update(struct bch_write_op *op)
 
                bch2_bkey_buf_copy(&_new, c, bch2_keylist_front(keys));
                new = bkey_i_to_extent(_new.k);
-               bch2_cut_front(iter->pos, &new->k_i);
+               bch2_cut_front(iter.pos, &new->k_i);
 
-               bch2_cut_front(iter->pos,       insert);
+               bch2_cut_front(iter.pos,        insert);
                bch2_cut_back(new->k.p,         insert);
                bch2_cut_back(insert->k.p,      &new->k_i);
 
@@ -146,7 +146,7 @@ int bch2_migrate_index_update(struct bch_write_op *op)
                                               op->opts.background_target,
                                               op->opts.data_replicas);
 
-               ret = bch2_sum_sector_overwrites(&trans, iter, insert,
+               ret = bch2_sum_sector_overwrites(&trans, &iter, insert,
                                                 &extending,
                                                 &should_check_enospc,
                                                 &i_sectors_delta,
@@ -165,13 +165,13 @@ int bch2_migrate_index_update(struct bch_write_op *op)
 
                next_pos = insert->k.p;
 
-               ret   = bch2_trans_update(&trans, iter, insert, 0) ?:
+               ret   = bch2_trans_update(&trans, &iter, insert, 0) ?:
                        bch2_trans_commit(&trans, &op->res,
                                op_journal_seq(op),
                                BTREE_INSERT_NOFAIL|
                                m->data_opts.btree_insert_flags);
                if (!ret) {
-                       bch2_btree_iter_set_pos(iter, next_pos);
+                       bch2_btree_iter_set_pos(&iter, next_pos);
                        atomic_long_inc(&c->extent_migrate_done);
                }
 err:
@@ -180,7 +180,7 @@ err:
                if (ret)
                        break;
 next:
-               while (bkey_cmp(iter->pos, bch2_keylist_front(keys)->k.p) >= 0) {
+               while (bkey_cmp(iter.pos, bch2_keylist_front(keys)->k.p) >= 0) {
                        bch2_keylist_pop_front(keys);
                        if (bch2_keylist_empty(keys))
                                goto out;
@@ -188,18 +188,18 @@ next:
                continue;
 nomatch:
                if (m->ctxt) {
-                       BUG_ON(k.k->p.offset <= iter->pos.offset);
+                       BUG_ON(k.k->p.offset <= iter.pos.offset);
                        atomic64_inc(&m->ctxt->stats->keys_raced);
-                       atomic64_add(k.k->p.offset - iter->pos.offset,
+                       atomic64_add(k.k->p.offset - iter.pos.offset,
                                     &m->ctxt->stats->sectors_raced);
                }
                atomic_long_inc(&c->extent_migrate_raced);
                trace_move_race(&new->k);
-               bch2_btree_iter_advance(iter);
+               bch2_btree_iter_advance(&iter);
                goto next;
        }
 out:
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
        bch2_trans_exit(&trans);
        bch2_bkey_buf_exit(&_insert, c);
        bch2_bkey_buf_exit(&_new, c);
@@ -524,13 +524,13 @@ err:
 static int lookup_inode(struct btree_trans *trans, struct bpos pos,
                        struct bch_inode_unpacked *inode)
 {
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        int ret;
 
-       iter = bch2_trans_get_iter(trans, BTREE_ID_inodes, pos,
-                                  BTREE_ITER_ALL_SNAPSHOTS);
-       k = bch2_btree_iter_peek(iter);
+       bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, pos,
+                            BTREE_ITER_ALL_SNAPSHOTS);
+       k = bch2_btree_iter_peek(&iter);
        ret = bkey_err(k);
        if (ret)
                goto err;
@@ -548,7 +548,7 @@ static int lookup_inode(struct btree_trans *trans, struct bpos pos,
        if (ret)
                goto err;
 err:
-       bch2_trans_iter_put(trans, iter);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
@@ -566,7 +566,7 @@ static int __bch2_move_data(struct bch_fs *c,
        struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
        struct bkey_buf sk;
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        struct data_opts data_opts;
        enum data_cmd data_cmd;
@@ -580,8 +580,8 @@ static int __bch2_move_data(struct bch_fs *c,
        stats->btree_id = btree_id;
        stats->pos      = start;
 
-       iter = bch2_trans_get_iter(&trans, btree_id, start,
-                                  BTREE_ITER_PREFETCH);
+       bch2_trans_iter_init(&trans, &iter, btree_id, start,
+                            BTREE_ITER_PREFETCH);
 
        if (rate)
                bch2_ratelimit_reset(rate);
@@ -612,9 +612,9 @@ static int __bch2_move_data(struct bch_fs *c,
 
                bch2_trans_begin(&trans);
 
-               k = bch2_btree_iter_peek(iter);
+               k = bch2_btree_iter_peek(&iter);
 
-               stats->pos = iter->pos;
+               stats->pos = iter.pos;
 
                if (!k.k)
                        break;
@@ -687,12 +687,12 @@ next:
                atomic64_add(k.k->size * bch2_bkey_nr_ptrs_allocated(k),
                             &stats->sectors_seen);
 next_nondata:
-               bch2_btree_iter_advance(iter);
+               bch2_btree_iter_advance(&iter);
                bch2_trans_cond_resched(&trans);
        }
 out:
 
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
        ret = bch2_trans_exit(&trans) ?: ret;
        bch2_bkey_buf_exit(&sk, c);
 
@@ -786,7 +786,7 @@ static int bch2_move_btree(struct bch_fs *c,
        bool kthread = (current->flags & PF_KTHREAD) != 0;
        struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct btree *b;
        enum btree_id id;
        struct data_opts data_opts;
@@ -813,7 +813,7 @@ static int bch2_move_btree(struct bch_fs *c,
                             bpos_cmp(b->key.k.p, end_pos)) > 0)
                                break;
 
-                       stats->pos = iter->pos;
+                       stats->pos = iter.pos;
 
                        switch ((cmd = pred(c, arg, b, &io_opts, &data_opts))) {
                        case DATA_SKIP:
@@ -827,13 +827,13 @@ static int bch2_move_btree(struct bch_fs *c,
                                BUG();
                        }
 
-                       ret = bch2_btree_node_rewrite(&trans, iter,
+                       ret = bch2_btree_node_rewrite(&trans, &iter,
                                        b->data->keys.seq, 0) ?: ret;
 next:
                        bch2_trans_cond_resched(&trans);
                }
+               bch2_trans_iter_exit(&trans, &iter);
 
-               ret = bch2_trans_iter_free(&trans, iter) ?: ret;
                if (kthread && kthread_should_stop())
                        break;
        }
index 7861781a4a7fea4de99f209070be22f61ec71b84..9b0f4d3f176d5fcbd7c0e0e189679c40416abefd 100644 (file)
@@ -357,7 +357,7 @@ static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k)
 static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        int ret = 0;
 
@@ -372,7 +372,7 @@ static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type)
                if (ret)
                        break;
        }
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
 
        return bch2_trans_exit(&trans) ?: ret;
 }
@@ -419,7 +419,7 @@ int bch2_fs_quota_read(struct bch_fs *c)
        unsigned i, qtypes = enabled_qtypes(c);
        struct bch_memquota_type *q;
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bch_inode_unpacked u;
        struct bkey_s_c k;
        int ret;
@@ -450,7 +450,7 @@ int bch2_fs_quota_read(struct bch_fs *c)
                                        KEY_TYPE_QUOTA_NOCHECK);
                }
        }
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
 
        return bch2_trans_exit(&trans) ?: ret;
 }
@@ -717,13 +717,13 @@ static int bch2_set_quota_trans(struct btree_trans *trans,
                                struct bkey_i_quota *new_quota,
                                struct qc_dqblk *qdq)
 {
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        int ret;
 
-       iter = bch2_trans_get_iter(trans, BTREE_ID_quotas, new_quota->k.p,
-                                  BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-       k = bch2_btree_iter_peek_slot(iter);
+       bch2_trans_iter_init(trans, &iter, BTREE_ID_quotas, new_quota->k.p,
+                            BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+       k = bch2_btree_iter_peek_slot(&iter);
 
        ret = bkey_err(k);
        if (unlikely(ret))
@@ -742,8 +742,8 @@ static int bch2_set_quota_trans(struct btree_trans *trans,
        if (qdq->d_fieldmask & QC_INO_HARD)
                new_quota->v.c[Q_INO].hardlimit = cpu_to_le64(qdq->d_ino_hardlimit);
 
-       ret = bch2_trans_update(trans, iter, &new_quota->k_i, 0);
-       bch2_trans_iter_put(trans, iter);
+       ret = bch2_trans_update(trans, &iter, &new_quota->k_i, 0);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
index 71b0f14f41f386db46fce646c2d3ef3f2efb6540..11208e83fabee044669a6bcc592a1e802d41dd40 100644 (file)
@@ -327,7 +327,7 @@ static void btree_and_journal_iter_prefetch(struct bch_fs *c, struct btree *b,
                bch2_bkey_buf_reassemble(&tmp, c, k);
 
                bch2_btree_node_prefetch(c, NULL, NULL, tmp.k,
-                                        b->c.btree_id, b->c.level - 1);
+                                       b->c.btree_id, b->c.level - 1);
 
                bch2_btree_and_journal_iter_advance(&iter);
                i++;
@@ -518,16 +518,16 @@ static int __bch2_journal_replay_key(struct btree_trans *trans,
                                     enum btree_id id, unsigned level,
                                     struct bkey_i *k)
 {
-       struct btree_iter *iter;
+       struct btree_iter iter;
        int ret;
 
-       iter = bch2_trans_get_node_iter(trans, id, k->k.p,
-                                       BTREE_MAX_DEPTH, level,
-                                       BTREE_ITER_INTENT|
-                                       BTREE_ITER_NOT_EXTENTS);
-       ret   = bch2_btree_iter_traverse(iter) ?:
-               bch2_trans_update(trans, iter, k, BTREE_TRIGGER_NORUN);
-       bch2_trans_iter_put(trans, iter);
+       bch2_trans_node_iter_init(trans, &iter, id, k->k.p,
+                                 BTREE_MAX_DEPTH, level,
+                                 BTREE_ITER_INTENT|
+                                 BTREE_ITER_NOT_EXTENTS);
+       ret   = bch2_btree_iter_traverse(&iter) ?:
+               bch2_trans_update(trans, &iter, k, BTREE_TRIGGER_NORUN);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
@@ -545,16 +545,16 @@ static int bch2_journal_replay_key(struct bch_fs *c, struct journal_key *k)
 
 static int __bch2_alloc_replay_key(struct btree_trans *trans, struct bkey_i *k)
 {
-       struct btree_iter *iter;
+       struct btree_iter iter;
        int ret;
 
-       iter = bch2_trans_get_iter(trans, BTREE_ID_alloc, k->k.p,
-                                  BTREE_ITER_CACHED|
-                                  BTREE_ITER_CACHED_NOFILL|
-                                  BTREE_ITER_INTENT);
-       ret   = bch2_btree_iter_traverse(iter) ?:
-               bch2_trans_update(trans, iter, k, BTREE_TRIGGER_NORUN);
-       bch2_trans_iter_put(trans, iter);
+       bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, k->k.p,
+                            BTREE_ITER_CACHED|
+                            BTREE_ITER_CACHED_NOFILL|
+                            BTREE_ITER_INTENT);
+       ret   = bch2_btree_iter_traverse(&iter) ?:
+               bch2_trans_update(trans, &iter, k, BTREE_TRIGGER_NORUN);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
index 3d9c5c5b0eba75a7e548e6b1e48f539bddbf4365..576cfbccf5b537b2d000935d739476b278fe58b7 100644 (file)
@@ -116,7 +116,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
                                     struct bkey_i *orig)
 {
        struct bch_fs *c = trans->c;
-       struct btree_iter *reflink_iter;
+       struct btree_iter reflink_iter = { NULL };
        struct bkey_s_c k;
        struct bkey_i *r_v;
        struct bkey_i_reflink_p *r_p;
@@ -129,8 +129,8 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
        for_each_btree_key(trans, reflink_iter, BTREE_ID_reflink,
                           POS(0, c->reflink_hint),
                           BTREE_ITER_INTENT|BTREE_ITER_SLOTS, k, ret) {
-               if (reflink_iter->pos.inode) {
-                       bch2_btree_iter_set_pos(reflink_iter, POS_MIN);
+               if (reflink_iter.pos.inode) {
+                       bch2_btree_iter_set_pos(&reflink_iter, POS_MIN);
                        continue;
                }
 
@@ -142,7 +142,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
                goto err;
 
        /* rewind iter to start of hole, if necessary: */
-       bch2_btree_iter_set_pos_to_extent_start(reflink_iter);
+       bch2_btree_iter_set_pos_to_extent_start(&reflink_iter);
 
        r_v = bch2_trans_kmalloc(trans, sizeof(__le64) + bkey_bytes(&orig->k));
        ret = PTR_ERR_OR_ZERO(r_v);
@@ -151,7 +151,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
 
        bkey_init(&r_v->k);
        r_v->k.type     = bkey_type_to_indirect(&orig->k);
-       r_v->k.p        = reflink_iter->pos;
+       r_v->k.p        = reflink_iter.pos;
        bch2_key_resize(&r_v->k, orig->k.size);
        r_v->k.version  = orig->k.version;
 
@@ -161,7 +161,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
        *refcount       = 0;
        memcpy(refcount + 1, &orig->v, bkey_val_bytes(&orig->k));
 
-       ret = bch2_trans_update(trans, reflink_iter, r_v, 0);
+       ret = bch2_trans_update(trans, &reflink_iter, r_v, 0);
        if (ret)
                goto err;
 
@@ -172,9 +172,8 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
 
        ret = bch2_trans_update(trans, extent_iter, &r_p->k_i, 0);
 err:
-       if (!IS_ERR(reflink_iter))
-               c->reflink_hint = reflink_iter->pos.offset;
-       bch2_trans_iter_put(trans, reflink_iter);
+       c->reflink_hint = reflink_iter.pos.offset;
+       bch2_trans_iter_exit(trans, &reflink_iter);
 
        return ret;
 }
@@ -184,7 +183,7 @@ static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end)
        struct bkey_s_c k;
        int ret;
 
-       for_each_btree_key_continue(iter, 0, k, ret) {
+       for_each_btree_key_continue(*iter, 0, k, ret) {
                if (bkey_cmp(iter->pos, end) >= 0)
                        break;
 
@@ -203,7 +202,7 @@ s64 bch2_remap_range(struct bch_fs *c,
                     u64 new_i_size, s64 *i_sectors_delta)
 {
        struct btree_trans trans;
-       struct btree_iter *dst_iter, *src_iter;
+       struct btree_iter dst_iter, src_iter;
        struct bkey_s_c src_k;
        struct bkey_buf new_dst, new_src;
        struct bpos dst_end = dst_start, src_end = src_start;
@@ -223,13 +222,13 @@ s64 bch2_remap_range(struct bch_fs *c,
        bch2_bkey_buf_init(&new_src);
        bch2_trans_init(&trans, c, BTREE_ITER_MAX, 4096);
 
-       src_iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, src_start,
-                                      BTREE_ITER_INTENT);
-       dst_iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, dst_start,
-                                      BTREE_ITER_INTENT);
+       bch2_trans_iter_init(&trans, &src_iter, BTREE_ID_extents, src_start,
+                            BTREE_ITER_INTENT);
+       bch2_trans_iter_init(&trans, &dst_iter, BTREE_ID_extents, dst_start,
+                            BTREE_ITER_INTENT);
 
        while ((ret == 0 || ret == -EINTR) &&
-              bkey_cmp(dst_iter->pos, dst_end) < 0) {
+              bkey_cmp(dst_iter.pos, dst_end) < 0) {
                struct disk_reservation disk_res = { 0 };
 
                bch2_trans_begin(&trans);
@@ -239,31 +238,31 @@ s64 bch2_remap_range(struct bch_fs *c,
                        break;
                }
 
-               dst_done = dst_iter->pos.offset - dst_start.offset;
+               dst_done = dst_iter.pos.offset - dst_start.offset;
                src_want = POS(src_start.inode, src_start.offset + dst_done);
-               bch2_btree_iter_set_pos(src_iter, src_want);
+               bch2_btree_iter_set_pos(&src_iter, src_want);
 
-               src_k = get_next_src(src_iter, src_end);
+               src_k = get_next_src(&src_iter, src_end);
                ret = bkey_err(src_k);
                if (ret)
                        continue;
 
-               if (bkey_cmp(src_want, src_iter->pos) < 0) {
-                       ret = bch2_fpunch_at(&trans, dst_iter,
+               if (bkey_cmp(src_want, src_iter.pos) < 0) {
+                       ret = bch2_fpunch_at(&trans, &dst_iter,
                                        bpos_min(dst_end,
-                                                POS(dst_iter->pos.inode, dst_iter->pos.offset +
-                                                    src_iter->pos.offset - src_want.offset)),
+                                                POS(dst_iter.pos.inode, dst_iter.pos.offset +
+                                                    src_iter.pos.offset - src_want.offset)),
                                                 journal_seq, i_sectors_delta);
                        continue;
                }
 
                if (src_k.k->type != KEY_TYPE_reflink_p) {
-                       bch2_btree_iter_set_pos_to_extent_start(src_iter);
+                       bch2_btree_iter_set_pos_to_extent_start(&src_iter);
 
                        bch2_bkey_buf_reassemble(&new_src, c, src_k);
                        src_k = bkey_i_to_s_c(new_src.k);
 
-                       ret = bch2_make_extent_indirect(&trans, src_iter,
+                       ret = bch2_make_extent_indirect(&trans, &src_iter,
                                                new_src.k);
                        if (ret)
                                continue;
@@ -286,43 +285,42 @@ s64 bch2_remap_range(struct bch_fs *c,
                        BUG();
                }
 
-               new_dst.k->k.p = dst_iter->pos;
+               new_dst.k->k.p = dst_iter.pos;
                bch2_key_resize(&new_dst.k->k,
                                min(src_k.k->p.offset - src_want.offset,
-                                   dst_end.offset - dst_iter->pos.offset));
-               ret = bch2_extent_update(&trans, dst_iter, new_dst.k,
+                                   dst_end.offset - dst_iter.pos.offset));
+               ret = bch2_extent_update(&trans, &dst_iter, new_dst.k,
                                         &disk_res, journal_seq,
                                         new_i_size, i_sectors_delta,
                                         true);
                bch2_disk_reservation_put(c, &disk_res);
        }
-       bch2_trans_iter_put(&trans, dst_iter);
-       bch2_trans_iter_put(&trans, src_iter);
+       bch2_trans_iter_exit(&trans, &dst_iter);
+       bch2_trans_iter_exit(&trans, &src_iter);
 
-       BUG_ON(!ret && bkey_cmp(dst_iter->pos, dst_end));
-       BUG_ON(bkey_cmp(dst_iter->pos, dst_end) > 0);
+       BUG_ON(!ret && bkey_cmp(dst_iter.pos, dst_end));
+       BUG_ON(bkey_cmp(dst_iter.pos, dst_end) > 0);
 
-       dst_done = dst_iter->pos.offset - dst_start.offset;
-       new_i_size = min(dst_iter->pos.offset << 9, new_i_size);
+       dst_done = dst_iter.pos.offset - dst_start.offset;
+       new_i_size = min(dst_iter.pos.offset << 9, new_i_size);
 
        do {
                struct bch_inode_unpacked inode_u;
-               struct btree_iter *inode_iter;
+               struct btree_iter inode_iter = { NULL };
 
                bch2_trans_begin(&trans);
 
-               inode_iter = bch2_inode_peek(&trans, &inode_u,
+               ret2 = bch2_inode_peek(&trans, &inode_iter, &inode_u,
                                dst_start.inode, BTREE_ITER_INTENT);
-               ret2 = PTR_ERR_OR_ZERO(inode_iter);
 
                if (!ret2 &&
                    inode_u.bi_size < new_i_size) {
                        inode_u.bi_size = new_i_size;
-                       ret2  = bch2_inode_write(&trans, inode_iter, &inode_u) ?:
+                       ret2  = bch2_inode_write(&trans, &inode_iter, &inode_u) ?:
                                bch2_trans_commit(&trans, NULL, journal_seq, 0);
                }
 
-               bch2_trans_iter_put(&trans, inode_iter);
+               bch2_trans_iter_exit(&trans, &inode_iter);
        } while (ret2 == -EINTR);
 
        ret = bch2_trans_exit(&trans) ?: ret;
index 23602349419161d7e7bd422f931a81dad44ba4e4..c6a132b3c5bb2eb24cf112f11fd23d254e01b499 100644 (file)
@@ -139,18 +139,18 @@ struct bch_hash_desc {
        bool            (*cmp_bkey)(struct bkey_s_c, struct bkey_s_c);
 };
 
-static __always_inline struct btree_iter *
+static __always_inline int
 bch2_hash_lookup(struct btree_trans *trans,
+                struct btree_iter *iter,
                 const struct bch_hash_desc desc,
                 const struct bch_hash_info *info,
                 u64 inode, const void *key,
                 unsigned flags)
 {
-       struct btree_iter *iter;
        struct bkey_s_c k;
        int ret;
 
-       for_each_btree_key(trans, iter, desc.btree_id,
+       for_each_btree_key(trans, *iter, desc.btree_id,
                           POS(inode, desc.hash_key(info, key)),
                           BTREE_ITER_SLOTS|flags, k, ret) {
                if (iter->pos.inode != inode)
@@ -158,7 +158,7 @@ bch2_hash_lookup(struct btree_trans *trans,
 
                if (k.k->type == desc.key_type) {
                        if (!desc.cmp_key(k, key))
-                               return iter;
+                               return 0;
                } else if (k.k->type == KEY_TYPE_hash_whiteout) {
                        ;
                } else {
@@ -166,35 +166,33 @@ bch2_hash_lookup(struct btree_trans *trans,
                        break;
                }
        }
-       bch2_trans_iter_put(trans, iter);
+       bch2_trans_iter_exit(trans, iter);
 
-       return ERR_PTR(ret ?: -ENOENT);
+       return ret ?: -ENOENT;
 }
 
-static __always_inline struct btree_iter *
+static __always_inline int
 bch2_hash_hole(struct btree_trans *trans,
+              struct btree_iter *iter,
               const struct bch_hash_desc desc,
               const struct bch_hash_info *info,
               u64 inode, const void *key)
 {
-       struct btree_iter *iter;
        struct bkey_s_c k;
        int ret;
 
-       for_each_btree_key(trans, iter, desc.btree_id,
+       for_each_btree_key(trans, *iter, desc.btree_id,
                           POS(inode, desc.hash_key(info, key)),
                           BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
                if (iter->pos.inode != inode)
                        break;
 
                if (k.k->type != desc.key_type)
-                       return iter;
+                       return 0;
        }
+       bch2_trans_iter_exit(trans, iter);
 
-       iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
-       bch2_trans_iter_put(trans, iter);
-
-       return ERR_PTR(ret ?: -ENOSPC);
+       return ret ?: -ENOSPC;
 }
 
 static __always_inline
@@ -203,13 +201,13 @@ int bch2_hash_needs_whiteout(struct btree_trans *trans,
                             const struct bch_hash_info *info,
                             struct btree_iter *start)
 {
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        int ret;
 
-       iter = bch2_trans_copy_iter(trans, start);
+       bch2_trans_copy_iter(&iter, start);
 
-       bch2_btree_iter_advance(iter);
+       bch2_btree_iter_advance(&iter);
 
        for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k, ret) {
                if (k.k->type != desc.key_type &&
@@ -218,13 +216,12 @@ int bch2_hash_needs_whiteout(struct btree_trans *trans,
 
                if (k.k->type == desc.key_type &&
                    desc.hash_bkey(info, k) <= start->pos.offset) {
-                       iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
                        ret = 1;
                        break;
                }
        }
 
-       bch2_trans_iter_put(trans, iter);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
@@ -234,7 +231,7 @@ int bch2_hash_set(struct btree_trans *trans,
                  const struct bch_hash_info *info,
                  u64 inode, struct bkey_i *insert, int flags)
 {
-       struct btree_iter *iter, *slot = NULL;
+       struct btree_iter iter, slot = { NULL };
        struct bkey_s_c k;
        bool found = false;
        int ret;
@@ -242,7 +239,7 @@ int bch2_hash_set(struct btree_trans *trans,
        for_each_btree_key(trans, iter, desc.btree_id,
                           POS(inode, desc.hash_bkey(info, bkey_i_to_s_c(insert))),
                           BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
-               if (iter->pos.inode != inode)
+               if (iter.pos.inode != inode)
                        break;
 
                if (k.k->type == desc.key_type) {
@@ -253,9 +250,9 @@ int bch2_hash_set(struct btree_trans *trans,
                        continue;
                }
 
-               if (!slot &&
+               if (!slot.path &&
                    !(flags & BCH_HASH_SET_MUST_REPLACE))
-                       slot = bch2_trans_copy_iter(trans, iter);
+                       bch2_trans_copy_iter(&slot, &iter);
 
                if (k.k->type != KEY_TYPE_hash_whiteout)
                        goto not_found;
@@ -264,8 +261,8 @@ int bch2_hash_set(struct btree_trans *trans,
        if (!ret)
                ret = -ENOSPC;
 out:
-       bch2_trans_iter_put(trans, slot);
-       bch2_trans_iter_put(trans, iter);
+       bch2_trans_iter_exit(trans, &slot);
+       bch2_trans_iter_exit(trans, &iter);
 
        return ret;
 found:
@@ -277,11 +274,11 @@ not_found:
        } else if (found && (flags & BCH_HASH_SET_MUST_CREATE)) {
                ret = -EEXIST;
        } else {
-               if (!found && slot)
+               if (!found && slot.path)
                        swap(iter, slot);
 
-               insert->k.p = iter->pos;
-               ret = bch2_trans_update(trans, iter, insert, 0);
+               insert->k.p = iter.pos;
+               ret = bch2_trans_update(trans, &iter, insert, 0);
        }
 
        goto out;
@@ -318,16 +315,16 @@ int bch2_hash_delete(struct btree_trans *trans,
                     const struct bch_hash_info *info,
                     u64 inode, const void *key)
 {
-       struct btree_iter *iter;
+       struct btree_iter iter;
        int ret;
 
-       iter = bch2_hash_lookup(trans, desc, info, inode, key,
+       ret = bch2_hash_lookup(trans, &iter, desc, info, inode, key,
                                BTREE_ITER_INTENT);
-       if (IS_ERR(iter))
-               return PTR_ERR(iter);
+       if (ret)
+               return ret;
 
-       ret = bch2_hash_delete_at(trans, desc, info, iter);
-       bch2_trans_iter_put(trans, iter);
+       ret = bch2_hash_delete_at(trans, desc, info, &iter);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
index 1d793e554084533eb072ed8015431ed3cd1511a1..b18ca3947ac8710cb3502930063bd8aaa3d3402d 100644 (file)
@@ -494,11 +494,11 @@ static void __bch2_fs_free(struct bch_fs *c)
        percpu_free_rwsem(&c->mark_lock);
        free_percpu(c->online_reserved);
 
-       if (c->btree_iters_bufs)
+       if (c->btree_paths_bufs)
                for_each_possible_cpu(cpu)
-                       kfree(per_cpu_ptr(c->btree_iters_bufs, cpu)->iter);
+                       kfree(per_cpu_ptr(c->btree_paths_bufs, cpu)->path);
 
-       free_percpu(c->btree_iters_bufs);
+       free_percpu(c->btree_paths_bufs);
        free_percpu(c->pcpu);
        mempool_exit(&c->large_bkey_pool);
        mempool_exit(&c->btree_bounce_pool);
@@ -783,7 +783,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
                        BIOSET_NEED_BVECS) ||
            !(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) ||
            !(c->online_reserved = alloc_percpu(u64)) ||
-           !(c->btree_iters_bufs = alloc_percpu(struct btree_iter_buf)) ||
+           !(c->btree_paths_bufs = alloc_percpu(struct btree_path_buf)) ||
            mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
                                        btree_bytes(c)) ||
            mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) ||
index b5ce336f00ca06382d6f7c99afbe5936b666a678..92e58f5c6bbfabb078cefd292b214cff5883d0e8 100644 (file)
@@ -290,7 +290,7 @@ static int fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c)
 static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        u64 nr_uncompressed_extents = 0, uncompressed_sectors = 0,
            nr_compressed_extents = 0,
@@ -325,6 +325,7 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c
                                break;
                        }
                }
+       bch2_trans_iter_exit(&trans, &iter);
 
        ret = bch2_trans_exit(&trans) ?: ret;
        if (ret)
index 43b514974d91b7dcd7f2eef2b779eaf9e97e27a0..1b583b134853bbbff9c9d4579d89f295aa45587d 100644 (file)
@@ -29,7 +29,7 @@ static void delete_test_keys(struct bch_fs *c)
 static int test_delete(struct bch_fs *c, u64 nr)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_i_cookie k;
        int ret;
 
@@ -37,13 +37,12 @@ static int test_delete(struct bch_fs *c, u64 nr)
        k.k.p.snapshot = U32_MAX;
 
        bch2_trans_init(&trans, c, 0, 0);
-
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs, k.k.p,
-                                  BTREE_ITER_INTENT);
+       bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, k.k.p,
+                            BTREE_ITER_INTENT);
 
        ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-               bch2_btree_iter_traverse(iter) ?:
-               bch2_trans_update(&trans, iter, &k.k_i, 0));
+               bch2_btree_iter_traverse(&iter) ?:
+               bch2_trans_update(&trans, &iter, &k.k_i, 0));
        if (ret) {
                bch_err(c, "update error in test_delete: %i", ret);
                goto err;
@@ -51,8 +50,8 @@ static int test_delete(struct bch_fs *c, u64 nr)
 
        pr_info("deleting once");
        ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-               bch2_btree_iter_traverse(iter) ?:
-               bch2_btree_delete_at(&trans, iter, 0));
+               bch2_btree_iter_traverse(&iter) ?:
+               bch2_btree_delete_at(&trans, &iter, 0));
        if (ret) {
                bch_err(c, "delete error (first) in test_delete: %i", ret);
                goto err;
@@ -60,14 +59,14 @@ static int test_delete(struct bch_fs *c, u64 nr)
 
        pr_info("deleting twice");
        ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-               bch2_btree_iter_traverse(iter) ?:
-               bch2_btree_delete_at(&trans, iter, 0));
+               bch2_btree_iter_traverse(&iter) ?:
+               bch2_btree_delete_at(&trans, &iter, 0));
        if (ret) {
                bch_err(c, "delete error (second) in test_delete: %i", ret);
                goto err;
        }
 err:
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
        bch2_trans_exit(&trans);
        return ret;
 }
@@ -75,7 +74,7 @@ err:
 static int test_delete_written(struct bch_fs *c, u64 nr)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_i_cookie k;
        int ret;
 
@@ -84,12 +83,12 @@ static int test_delete_written(struct bch_fs *c, u64 nr)
 
        bch2_trans_init(&trans, c, 0, 0);
 
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs, k.k.p,
-                                  BTREE_ITER_INTENT);
+       bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, k.k.p,
+                            BTREE_ITER_INTENT);
 
        ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-               bch2_btree_iter_traverse(iter) ?:
-               bch2_trans_update(&trans, iter, &k.k_i, 0));
+               bch2_btree_iter_traverse(&iter) ?:
+               bch2_trans_update(&trans, &iter, &k.k_i, 0));
        if (ret) {
                bch_err(c, "update error in test_delete_written: %i", ret);
                goto err;
@@ -99,14 +98,14 @@ static int test_delete_written(struct bch_fs *c, u64 nr)
        bch2_journal_flush_all_pins(&c->journal);
 
        ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-               bch2_btree_iter_traverse(iter) ?:
-               bch2_btree_delete_at(&trans, iter, 0));
+               bch2_btree_iter_traverse(&iter) ?:
+               bch2_btree_delete_at(&trans, &iter, 0));
        if (ret) {
                bch_err(c, "delete error in test_delete_written: %i", ret);
                goto err;
        }
 err:
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
        bch2_trans_exit(&trans);
        return ret;
 }
@@ -114,7 +113,7 @@ err:
 static int test_iterate(struct bch_fs *c, u64 nr)
 {
        struct btree_trans trans;
-       struct btree_iter *iter = NULL;
+       struct btree_iter iter = { NULL };
        struct bkey_s_c k;
        u64 i;
        int ret = 0;
@@ -156,12 +155,12 @@ static int test_iterate(struct bch_fs *c, u64 nr)
 
        pr_info("iterating backwards");
 
-       while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(iter)).k))
+       while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(&iter)).k))
                BUG_ON(k.k->p.offset != --i);
 
        BUG_ON(i);
 err:
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
        bch2_trans_exit(&trans);
        return ret;
 }
@@ -169,7 +168,7 @@ err:
 static int test_iterate_extents(struct bch_fs *c, u64 nr)
 {
        struct btree_trans trans;
-       struct btree_iter *iter = NULL;
+       struct btree_iter iter = { NULL };
        struct bkey_s_c k;
        u64 i;
        int ret = 0;
@@ -210,14 +209,14 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr)
 
        pr_info("iterating backwards");
 
-       while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(iter)).k)) {
+       while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(&iter)).k)) {
                BUG_ON(k.k->p.offset != i);
                i = bkey_start_offset(k.k);
        }
 
        BUG_ON(i);
 err:
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
        bch2_trans_exit(&trans);
        return ret;
 }
@@ -225,7 +224,7 @@ err:
 static int test_iterate_slots(struct bch_fs *c, u64 nr)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter = { NULL };
        struct bkey_s_c k;
        u64 i;
        int ret = 0;
@@ -263,7 +262,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr)
                BUG_ON(k.k->p.offset != i);
                i += 2;
        }
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
 
        BUG_ON(i != nr * 2);
 
@@ -280,7 +279,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr)
                if (i == nr * 2)
                        break;
        }
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
 err:
        bch2_trans_exit(&trans);
        return ret;
@@ -289,7 +288,7 @@ err:
 static int test_iterate_slots_extents(struct bch_fs *c, u64 nr)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter = { NULL };
        struct bkey_s_c k;
        u64 i;
        int ret = 0;
@@ -326,7 +325,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr)
                BUG_ON(k.k->size != 8);
                i += 16;
        }
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
 
        BUG_ON(i != nr);
 
@@ -345,7 +344,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr)
                if (i == nr)
                        break;
        }
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
 err:
        bch2_trans_exit(&trans);
        return 0;
@@ -358,21 +357,19 @@ err:
 static int test_peek_end(struct bch_fs *c, u64 nr)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
 
        bch2_trans_init(&trans, c, 0, 0);
+       bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, POS_MIN, 0);
 
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs, POS_MIN, 0);
-
-       k = bch2_btree_iter_peek(iter);
+       k = bch2_btree_iter_peek(&iter);
        BUG_ON(k.k);
 
-       k = bch2_btree_iter_peek(iter);
+       k = bch2_btree_iter_peek(&iter);
        BUG_ON(k.k);
 
-       bch2_trans_iter_put(&trans, iter);
-
+       bch2_trans_iter_exit(&trans, &iter);
        bch2_trans_exit(&trans);
        return 0;
 }
@@ -380,21 +377,19 @@ static int test_peek_end(struct bch_fs *c, u64 nr)
 static int test_peek_end_extents(struct bch_fs *c, u64 nr)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
 
        bch2_trans_init(&trans, c, 0, 0);
+       bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, POS_MIN, 0);
 
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS_MIN, 0);
-
-       k = bch2_btree_iter_peek(iter);
+       k = bch2_btree_iter_peek(&iter);
        BUG_ON(k.k);
 
-       k = bch2_btree_iter_peek(iter);
+       k = bch2_btree_iter_peek(&iter);
        BUG_ON(k.k);
 
-       bch2_trans_iter_put(&trans, iter);
-
+       bch2_trans_iter_exit(&trans, &iter);
        bch2_trans_exit(&trans);
        return 0;
 }
@@ -540,18 +535,18 @@ static int rand_insert_multi(struct bch_fs *c, u64 nr)
 static int rand_lookup(struct bch_fs *c, u64 nr)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        int ret = 0;
        u64 i;
 
        bch2_trans_init(&trans, c, 0, 0);
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs, POS_MIN, 0);
+       bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, POS_MIN, 0);
 
        for (i = 0; i < nr; i++) {
-               bch2_btree_iter_set_pos(iter, POS(0, test_rand()));
+               bch2_btree_iter_set_pos(&iter, POS(0, test_rand()));
 
-               k = bch2_btree_iter_peek(iter);
+               k = bch2_btree_iter_peek(&iter);
                ret = bkey_err(k);
                if (ret) {
                        bch_err(c, "error in rand_lookup: %i", ret);
@@ -559,63 +554,73 @@ static int rand_lookup(struct bch_fs *c, u64 nr)
                }
        }
 
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
        bch2_trans_exit(&trans);
        return ret;
 }
 
+static int rand_mixed_trans(struct btree_trans *trans,
+                           struct btree_iter *iter,
+                           struct bkey_i_cookie *cookie,
+                           u64 i, u64 pos)
+{
+       struct bkey_s_c k;
+       int ret;
+
+       bch2_btree_iter_set_pos(iter, POS(0, pos));
+
+       k = bch2_btree_iter_peek(iter);
+       ret = bkey_err(k);
+       if (ret && ret != -EINTR)
+               bch_err(trans->c, "lookup error in rand_mixed: %i", ret);
+       if (ret)
+               return ret;
+
+       if (!(i & 3) && k.k) {
+               bkey_cookie_init(&cookie->k_i);
+               cookie->k.p = iter->pos;
+               bch2_trans_update(trans, iter, &cookie->k_i, 0);
+       }
+
+       return 0;
+}
+
 static int rand_mixed(struct bch_fs *c, u64 nr)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
-       struct bkey_s_c k;
+       struct btree_iter iter;
+       struct bkey_i_cookie cookie;
        int ret = 0;
-       u64 i;
+       u64 i, rand;
 
        bch2_trans_init(&trans, c, 0, 0);
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs, POS_MIN, 0);
+       bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, POS_MIN, 0);
 
        for (i = 0; i < nr; i++) {
-               bch2_btree_iter_set_pos(iter, POS(0, test_rand()));
-
-               k = bch2_btree_iter_peek(iter);
-               ret = bkey_err(k);
+               rand = test_rand();
+               ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+                       rand_mixed_trans(&trans, &iter, &cookie, i, rand));
                if (ret) {
-                       bch_err(c, "lookup error in rand_mixed: %i", ret);
+                       bch_err(c, "update error in rand_mixed: %i", ret);
                        break;
                }
-
-               if (!(i & 3) && k.k) {
-                       struct bkey_i_cookie k;
-
-                       bkey_cookie_init(&k.k_i);
-                       k.k.p = iter->pos;
-
-                       ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-                               bch2_btree_iter_traverse(iter) ?:
-                               bch2_trans_update(&trans, iter, &k.k_i, 0));
-                       if (ret) {
-                               bch_err(c, "update error in rand_mixed: %i", ret);
-                               break;
-                       }
-               }
        }
 
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
        bch2_trans_exit(&trans);
        return ret;
 }
 
 static int __do_delete(struct btree_trans *trans, struct bpos pos)
 {
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_i delete;
        struct bkey_s_c k;
        int ret = 0;
 
-       iter = bch2_trans_get_iter(trans, BTREE_ID_xattrs, pos,
-                                  BTREE_ITER_INTENT);
-       k = bch2_btree_iter_peek(iter);
+       bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, pos,
+                            BTREE_ITER_INTENT);
+       k = bch2_btree_iter_peek(&iter);
        ret = bkey_err(k);
        if (ret)
                goto err;
@@ -626,9 +631,9 @@ static int __do_delete(struct btree_trans *trans, struct bpos pos)
        bkey_init(&delete.k);
        delete.k.p = k.k->p;
 
-       ret = bch2_trans_update(trans, iter, &delete, 0);
+       ret = bch2_trans_update(trans, &iter, &delete, 0);
 err:
-       bch2_trans_iter_put(trans, iter);
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
@@ -658,7 +663,7 @@ static int rand_delete(struct bch_fs *c, u64 nr)
 static int seq_insert(struct bch_fs *c, u64 nr)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        struct bkey_i_cookie insert;
        int ret = 0;
@@ -670,11 +675,11 @@ static int seq_insert(struct bch_fs *c, u64 nr)
 
        for_each_btree_key(&trans, iter, BTREE_ID_xattrs, POS_MIN,
                           BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
-               insert.k.p = iter->pos;
+               insert.k.p = iter.pos;
 
                ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-                       bch2_btree_iter_traverse(iter) ?:
-                       bch2_trans_update(&trans, iter, &insert.k_i, 0));
+                       bch2_btree_iter_traverse(&iter) ?:
+                       bch2_trans_update(&trans, &iter, &insert.k_i, 0));
                if (ret) {
                        bch_err(c, "error in seq_insert: %i", ret);
                        break;
@@ -683,7 +688,7 @@ static int seq_insert(struct bch_fs *c, u64 nr)
                if (++i == nr)
                        break;
        }
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
 
        bch2_trans_exit(&trans);
        return ret;
@@ -692,7 +697,7 @@ static int seq_insert(struct bch_fs *c, u64 nr)
 static int seq_lookup(struct bch_fs *c, u64 nr)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        int ret = 0;
 
@@ -700,7 +705,7 @@ static int seq_lookup(struct bch_fs *c, u64 nr)
 
        for_each_btree_key(&trans, iter, BTREE_ID_xattrs, POS_MIN, 0, k, ret)
                ;
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
 
        bch2_trans_exit(&trans);
        return ret;
@@ -709,7 +714,7 @@ static int seq_lookup(struct bch_fs *c, u64 nr)
 static int seq_overwrite(struct bch_fs *c, u64 nr)
 {
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        int ret = 0;
 
@@ -722,14 +727,14 @@ static int seq_overwrite(struct bch_fs *c, u64 nr)
                bkey_reassemble(&u.k_i, k);
 
                ret = __bch2_trans_do(&trans, NULL, NULL, 0,
-                       bch2_btree_iter_traverse(iter) ?:
-                       bch2_trans_update(&trans, iter, &u.k_i, 0));
+                       bch2_btree_iter_traverse(&iter) ?:
+                       bch2_trans_update(&trans, &iter, &u.k_i, 0));
                if (ret) {
                        bch_err(c, "error in seq_overwrite: %i", ret);
                        break;
                }
        }
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
 
        bch2_trans_exit(&trans);
        return ret;
index af1f415fb5e7e9a6f3566bf0e54d10608e5d7ef0..44a556518d4a30a574a365932effad9f86dcbf7b 100644 (file)
@@ -540,7 +540,7 @@ TRACE_EVENT(copygc_wait,
                  __entry->wait_amount, __entry->until)
 );
 
-TRACE_EVENT(trans_get_iter,
+TRACE_EVENT(trans_get_path,
        TP_PROTO(unsigned long trans_ip,
                 unsigned long caller_ip,
                 enum btree_id btree_id,
@@ -814,7 +814,7 @@ TRACE_EVENT(iter_traverse,
                  __entry->ret)
 );
 
-TRACE_EVENT(iter_set_search_pos,
+TRACE_EVENT(path_set_pos,
        TP_PROTO(unsigned long  trans_ip,
                 unsigned long  caller_ip,
                 enum btree_id  btree_id,
index bf4164f987435f7b1775e26d0bacb9af2140c357..babbfaadeb3f328d7dc7ae973c6365fb75e8af1d 100644 (file)
@@ -122,23 +122,22 @@ static int bch2_xattr_get_trans(struct btree_trans *trans, struct bch_inode_info
                                const char *name, void *buffer, size_t size, int type)
 {
        struct bch_hash_info hash = bch2_hash_info_init(trans->c, &inode->ei_inode);
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c_xattr xattr;
        struct bkey_s_c k;
        int ret;
 
-       iter = bch2_hash_lookup(trans, bch2_xattr_hash_desc, &hash,
-                               inode->v.i_ino,
-                               &X_SEARCH(type, name, strlen(name)),
-                               0);
-       ret = PTR_ERR_OR_ZERO(iter);
+       ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc, &hash,
+                              inode->v.i_ino,
+                              &X_SEARCH(type, name, strlen(name)),
+                              0);
        if (ret)
-               goto err;
+               goto err1;
 
-       k = bch2_btree_iter_peek_slot(iter);
+       k = bch2_btree_iter_peek_slot(&iter);
        ret = bkey_err(k);
        if (ret)
-               goto err;
+               goto err2;
 
        xattr = bkey_s_c_to_xattr(k);
        ret = le16_to_cpu(xattr.v->x_val_len);
@@ -148,8 +147,9 @@ static int bch2_xattr_get_trans(struct btree_trans *trans, struct bch_inode_info
                else
                        memcpy(buffer, xattr_val(xattr.v), ret);
        }
-       bch2_trans_iter_put(trans, iter);
-err:
+err2:
+       bch2_trans_iter_exit(trans, &iter);
+err1:
        return ret == -ENOENT ? -ENODATA : ret;
 }
 
@@ -279,7 +279,7 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
        struct bch_fs *c = dentry->d_sb->s_fs_info;
        struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
        struct btree_trans trans;
-       struct btree_iter *iter;
+       struct btree_iter iter;
        struct bkey_s_c k;
        struct xattr_buf buf = { .buf = buffer, .len = buffer_size };
        u64 inum = dentry->d_inode->i_ino;
@@ -301,7 +301,7 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
                if (ret)
                        break;
        }
-       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_iter_exit(&trans, &iter);
 
        ret = bch2_trans_exit(&trans) ?: ret;