bch2_btree_iter_set_pos(iter, POS(ca->dev_idx, b));
 retry:
        k = bch2_btree_iter_peek_slot(iter);
-       ret = btree_iter_err(k);
+       ret = bkey_err(k);
        if (ret)
                return ret;
 
 
                 * We might have got -EINTR because trylock failed, and we're
                 * holding other locks that would cause us to deadlock:
                 */
-               for_each_linked_btree_iter(iter, linked)
+               trans_for_each_iter(iter->trans, linked)
                        if (btree_iter_cmp(iter, linked) < 0)
                                __bch2_btree_iter_unlock(linked);
 
                        }
                }
 
-               bch2_btree_iter_relock(iter);
+               bch2_btree_trans_relock(iter->trans);
        }
 out:
        if (btree_lock_want(iter, level + 1) == BTREE_NODE_UNLOCKED)
                btree_node_unlock(iter, level + 1);
 
-       bch2_btree_iter_verify_locks(iter);
+       bch2_btree_trans_verify_locks(iter->trans);
 
        BUG_ON((!may_drop_locks || !IS_ERR(ret)) &&
               (iter->uptodate >= BTREE_ITER_NEED_RELOCK ||
 
        EBUG_ON(iter->l[b->level].b != b);
        EBUG_ON(iter->l[b->level].lock_seq + 1 != b->lock.state.seq);
 
-       for_each_btree_iter_with_node(iter, b, linked)
+       trans_for_each_iter_with_node(iter->trans, b, linked)
                linked->l[b->level].lock_seq += 2;
 
        six_unlock_write(&b->lock);
 
        EBUG_ON(btree_node_read_locked(iter, b->level));
 
-       for_each_linked_btree_iter(iter, linked)
+       trans_for_each_iter(iter->trans, linked)
                if (linked->l[b->level].b == b &&
                    btree_node_read_locked(linked, b->level))
                        readers++;
        if (iter->uptodate == BTREE_ITER_NEED_RELOCK)
                iter->uptodate = BTREE_ITER_NEED_PEEK;
 
-       bch2_btree_iter_verify_locks(iter);
+       bch2_btree_trans_verify_locks(iter->trans);
+
        return iter->uptodate < BTREE_ITER_NEED_RELOCK;
 }
 
        bool ret = true;
 
        /* Check if it's safe to block: */
-       for_each_btree_iter(iter, linked) {
+       trans_for_each_iter(iter->trans, linked) {
                if (!linked->nodes_locked)
                        continue;
 
 /* Btree iterator locking: */
 
 #ifdef CONFIG_BCACHEFS_DEBUG
-void __bch2_btree_iter_verify_locks(struct btree_iter *iter)
+void bch2_btree_iter_verify_locks(struct btree_iter *iter)
 {
        unsigned l;
 
        }
 }
 
-void bch2_btree_iter_verify_locks(struct btree_iter *iter)
+void bch2_btree_trans_verify_locks(struct btree_trans *trans)
 {
-       struct btree_iter *linked;
-
-       for_each_btree_iter(iter, linked)
-               __bch2_btree_iter_verify_locks(linked);
+       struct btree_iter *iter;
 
+       trans_for_each_iter(trans, iter)
+               bch2_btree_iter_verify_locks(iter);
 }
 #endif
 
 __flatten
-static bool __bch2_btree_iter_relock(struct btree_iter *iter)
+static bool bch2_btree_iter_relock(struct btree_iter *iter)
 {
        return iter->uptodate >= BTREE_ITER_NEED_RELOCK
                ? btree_iter_get_locks(iter, false)
                : true;
 }
 
-bool bch2_btree_iter_relock(struct btree_iter *iter)
-{
-       struct btree_iter *linked;
-       bool ret = true;
-
-       for_each_btree_iter(iter, linked)
-               ret &= __bch2_btree_iter_relock(linked);
-
-       return ret;
-}
-
 bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
                               unsigned new_locks_want)
 {
         * on iterators that might lock ancestors before us to avoid getting
         * -EINTR later:
         */
-       for_each_linked_btree_iter(iter, linked)
-               if (linked->btree_id == iter->btree_id &&
+       trans_for_each_iter(iter->trans, linked)
+               if (linked != iter &&
+                   linked->btree_id == iter->btree_id &&
                    btree_iter_cmp(linked, iter) <= 0 &&
                    linked->locks_want < new_locks_want) {
                        linked->locks_want = new_locks_want;
         * might have had to modify locks_want on linked iterators due to lock
         * ordering:
         */
-       for_each_btree_iter(iter, linked) {
+       trans_for_each_iter(iter->trans, linked) {
                unsigned new_locks_want = downgrade_to ?:
                        (linked->flags & BTREE_ITER_INTENT ? 1 : 0);
 
                }
        }
 
-       bch2_btree_iter_verify_locks(iter);
+       bch2_btree_trans_verify_locks(iter->trans);
 }
 
 int bch2_btree_iter_unlock(struct btree_iter *iter)
 {
        struct btree_iter *linked;
 
-       for_each_btree_iter(iter, linked)
+       trans_for_each_iter(iter->trans, linked)
                __bch2_btree_iter_unlock(linked);
 
-       return iter->flags & BTREE_ITER_ERROR ? -EIO : 0;
+       return btree_iter_err(iter);
 }
 
+bool bch2_btree_trans_relock(struct btree_trans *trans)
+{
+       struct btree_iter *iter;
+       bool ret = true;
+
+       trans_for_each_iter(trans, iter)
+               ret &= bch2_btree_iter_relock(iter);
+
+       return ret;
+}
+
+void bch2_btree_trans_unlock(struct btree_trans *trans)
+{
+       struct btree_iter *iter;
+
+       trans_for_each_iter(trans, iter)
+               __bch2_btree_iter_unlock(iter);
+}
+
+/* Btree transaction locking: */
+
 /* Btree iterator: */
 
 #ifdef CONFIG_BCACHEFS_DEBUG
 {
        struct btree_iter *linked;
 
-       for_each_btree_iter_with_node(iter, b, linked)
+       trans_for_each_iter_with_node(iter->trans, b, linked)
                __bch2_btree_iter_verify(linked, b);
 }
 
                __bch2_btree_node_iter_fix(iter, b, node_iter, t,
                                          where, clobber_u64s, new_u64s);
 
-       for_each_btree_iter_with_node(iter, b, linked)
+       trans_for_each_iter_with_node(iter->trans, b, linked)
                __bch2_btree_node_iter_fix(linked, b,
                                          &linked->l[b->level].iter, t,
                                          where, clobber_u64s, new_u64s);
        enum btree_node_locked_type t;
        struct btree_iter *linked;
 
-       for_each_btree_iter(iter, linked)
+       trans_for_each_iter(iter->trans, linked)
                if (btree_iter_pos_in_node(linked, b)) {
                        /*
                         * bch2_btree_iter_node_drop() has already been called -
        iter->l[level].b = BTREE_ITER_NOT_END;
        mark_btree_node_unlocked(iter, level);
 
-       for_each_btree_iter(iter, linked)
+       trans_for_each_iter(iter->trans, linked)
                if (linked->l[level].b == b) {
                        __btree_node_unlock(linked, level);
                        linked->l[level].b = BTREE_ITER_NOT_END;
 {
        struct btree_iter *linked;
 
-       for_each_btree_iter_with_node(iter, b, linked)
+       trans_for_each_iter_with_node(iter->trans, b, linked)
                __btree_iter_init(linked, b->level);
 }
 
                iter = iter->next;
        } while (iter != sorted_iters);
 
-       ret = btree_iter_linked(iter) ? -EINTR : 0;
+       ret = btree_trans_has_multiple_iters(iter->trans) ? -EINTR : 0;
 out:
        bch2_btree_cache_cannibalize_unlock(c);
        return ret;
        if (unlikely(iter->level >= BTREE_MAX_DEPTH))
                return 0;
 
-       if (__bch2_btree_iter_relock(iter))
+       if (bch2_btree_iter_relock(iter))
                return 0;
 
        /*
 
        iter->uptodate = BTREE_ITER_NEED_PEEK;
 
-       bch2_btree_iter_verify_locks(iter);
+       bch2_btree_trans_verify_locks(iter->trans);
        __bch2_btree_iter_verify(iter, iter->l[iter->level].b);
        return 0;
 }
        if (unlikely(ret))
                ret = btree_iter_traverse_error(iter, ret);
 
-       BUG_ON(ret == -EINTR && !btree_iter_linked(iter));
+       BUG_ON(ret == -EINTR && !btree_trans_has_multiple_iters(iter->trans));
 
        return ret;
 }
                (iter->btree_id == BTREE_ID_EXTENTS &&
                 type != BTREE_ITER_NODES));
 
-       bch2_btree_iter_verify_locks(iter);
+       bch2_btree_trans_verify_locks(iter->trans);
 }
 
 /* Iterate across nodes (leaf and interior nodes) */
        if (!btree_iter_linked(iter))
                return;
 
-       for_each_linked_btree_iter(iter, linked)
+       trans_for_each_iter(iter->trans, linked)
                if (linked->next == iter) {
                        linked->next = iter->next;
                        iter->next = iter;
                        struct btree_iter *iter)
 {
        ssize_t idx = btree_trans_iter_idx(trans, iter);
-       int ret = (iter->flags & BTREE_ITER_ERROR) ? -EIO : 0;
+       int ret = btree_iter_err(iter);
 
        trans->iters_live       &= ~(1ULL << idx);
        return ret;
 int bch2_trans_iter_free(struct btree_trans *trans,
                         struct btree_iter *iter)
 {
-       int ret = (iter->flags & BTREE_ITER_ERROR) ? -EIO : 0;
+       int ret = btree_iter_err(iter);
 
        __bch2_trans_iter_free(trans, btree_trans_iter_idx(trans, iter));
        return ret;
 int bch2_trans_iter_free_on_commit(struct btree_trans *trans,
                                   struct btree_iter *iter)
 {
-       int ret = (iter->flags & BTREE_ITER_ERROR) ? -EIO : 0;
+       int ret = btree_iter_err(iter);
 
        trans->iters_unlink_on_commit |=
                1ULL << btree_trans_iter_idx(trans, iter);
                unsigned idx = __ffs(iters);
                struct btree_iter *iter = &trans->iters[idx];
 
-               if (iter->flags & BTREE_ITER_ERROR)
-                       ret = -EIO;
+               ret = ret ?: btree_iter_err(iter);
 
                __bch2_btree_iter_unlock(iter);
                iters ^= 1 << idx;
 
        return btree_iter_node(iter, b->level + 1);
 }
 
+static inline bool btree_trans_has_multiple_iters(const struct btree_trans *trans)
+{
+       return hweight64(trans->iters_linked) > 1;
+}
+
 static inline bool btree_iter_linked(const struct btree_iter *iter)
 {
        return iter->next != iter;
 }
 
+static inline int btree_iter_err(const struct btree_iter *iter)
+{
+       return iter->flags & BTREE_ITER_ERROR ? -EIO : 0;
+}
+
+/* Iterate over iters within a transaction: */
+
+static inline struct btree_iter *
+__trans_next_iter(struct btree_trans *trans, struct btree_iter *iter)
+{
+       unsigned idx;
+
+       /* XXX expensive pointer subtraction: */
+
+       for (idx = iter - trans->iters;
+            idx < trans->nr_iters;
+            idx++)
+               if (trans->iters_linked & (1ULL << idx))
+                       return &trans->iters[idx];
+
+       return NULL;
+}
+
+#define trans_for_each_iter(_trans, _iter)                             \
+       for (_iter = (_trans)->iters;                                   \
+            (_iter = __trans_next_iter((_trans), _iter));              \
+            _iter++)
+
 static inline bool __iter_has_node(const struct btree_iter *iter,
                                   const struct btree *b)
 {
 }
 
 static inline struct btree_iter *
-__next_linked_iter(struct btree_iter *iter, struct btree_iter *linked)
+__trans_next_iter_with_node(struct btree_trans *trans, struct btree *b,
+                           struct btree_iter *iter)
 {
-       return linked->next != iter ? linked->next : NULL;
-}
+       unsigned idx;
 
-static inline struct btree_iter *
-__next_iter_with_node(struct btree_iter *iter, struct btree *b,
-                     struct btree_iter *linked)
-{
-       while (linked && !__iter_has_node(linked, b))
-               linked = __next_linked_iter(iter, linked);
+       /* XXX expensive pointer subtraction: */
+
+       for (idx = iter - trans->iters;
+            idx < trans->nr_iters;
+            idx++) {
+               if (!(trans->iters_linked & (1ULL << idx)))
+                       continue;
 
-       return linked;
+               iter = &trans->iters[idx];
+               if (__iter_has_node(iter, b))
+                       return iter;
+       }
+
+       return NULL;
 }
 
-/**
- * for_each_btree_iter - iterate over all iterators linked with @_iter,
- * including @_iter
- */
-#define for_each_btree_iter(_iter, _linked)                            \
-       for ((_linked) = (_iter); (_linked);                            \
-            (_linked) = __next_linked_iter(_iter, _linked))
-
-/**
- * for_each_btree_iter_with_node - iterate over all iterators linked with @_iter
- * that also point to @_b
- *
- * @_b is assumed to be locked by @_iter
- *
- * Filters out iterators that don't have a valid btree_node iterator for @_b -
- * i.e. iterators for which bch2_btree_node_relock() would not succeed.
- */
-#define for_each_btree_iter_with_node(_iter, _b, _linked)              \
-       for ((_linked) = (_iter);                                       \
-            ((_linked) = __next_iter_with_node(_iter, _b, _linked));   \
-            (_linked) = __next_linked_iter(_iter, _linked))
-
-/**
- * for_each_linked_btree_iter - iterate over all iterators linked with @_iter,
- * _not_ including @_iter
- */
-#define for_each_linked_btree_iter(_iter, _linked)                     \
-       for ((_linked) = (_iter)->next;                                 \
-            (_linked) != (_iter);                                      \
-            (_linked) = (_linked)->next)
+#define trans_for_each_iter_with_node(_trans, _b, _iter)               \
+       for (_iter = (_trans)->iters;                                   \
+            (_iter = __trans_next_iter_with_node((_trans), (_b), _iter));\
+            _iter++)
 
 #ifdef CONFIG_BCACHEFS_DEBUG
 void bch2_btree_iter_verify(struct btree_iter *, struct btree *);
-void bch2_btree_iter_verify_locks(struct btree_iter *);
+void bch2_btree_trans_verify_locks(struct btree_trans *);
 #else
 static inline void bch2_btree_iter_verify(struct btree_iter *iter,
                                          struct btree *b) {}
-static inline void bch2_btree_iter_verify_locks(struct btree_iter *iter) {}
+static inline void bch2_btree_trans_verify_locks(struct btree_trans *iter) {}
 #endif
 
 void bch2_btree_node_iter_fix(struct btree_iter *, struct btree *,
                              unsigned, unsigned);
 
 int bch2_btree_iter_unlock(struct btree_iter *);
-bool bch2_btree_iter_relock(struct btree_iter *);
+
+bool bch2_btree_trans_relock(struct btree_trans *);
+void bch2_btree_trans_unlock(struct btree_trans *);
 
 bool __bch2_btree_iter_upgrade(struct btree_iter *, unsigned);
 bool __bch2_btree_iter_upgrade_nounlock(struct btree_iter *, unsigned);
             !IS_ERR_OR_NULL((_k).k);                                   \
             (_k) = __bch2_btree_iter_next(_iter, _flags))
 
-static inline int btree_iter_err(struct bkey_s_c k)
+static inline int bkey_err(struct bkey_s_c k)
 {
        return PTR_ERR_OR_ZERO(k.k);
 }
 
 {
        struct btree_iter *linked;
 
-       for_each_linked_btree_iter(iter, linked)
-               if (linked->l[level].b == b &&
+       trans_for_each_iter(iter->trans, linked)
+               if (linked != iter &&
+                   linked->l[level].b == b &&
                    btree_node_locked_type(linked, level) >= want) {
                        six_lock_increment(&b->lock, want);
                        return true;
 
 {
        struct btree_iter *linked;
 
-       for_each_btree_iter(iter, linked)
+       trans_for_each_iter(iter->trans, linked)
                BUG_ON(linked->l[b->level].b == b);
 
        /*
 
        bch2_btree_node_free_inmem(c, b, iter);
 
-       bch2_btree_iter_verify_locks(iter);
+       bch2_btree_trans_verify_locks(iter->trans);
 
        bch2_time_stats_update(&c->times[BCH_TIME_btree_node_split],
                               start_time);
 
        btree_update_updated_node(as, b);
 
-       for_each_btree_iter_with_node(iter, b, linked)
+       trans_for_each_iter_with_node(iter->trans, b, linked)
                bch2_btree_node_iter_peek(&linked->l[b->level].iter, b);
 
        bch2_btree_iter_verify(iter, b);
         * We already have a disk reservation and open buckets pinned; this
         * allocation must not block:
         */
-       for_each_btree_iter(iter, linked)
+       trans_for_each_iter(iter->trans, linked)
                if (linked->btree_id == BTREE_ID_EXTENTS)
                        flags |= BTREE_INSERT_USE_RESERVE;
 
                if (flags & BTREE_INSERT_NOUNLOCK)
                        return -EINTR;
 
-               bch2_btree_iter_unlock(iter);
+               bch2_btree_trans_unlock(iter->trans);
                down_read(&c->gc_lock);
 
-               if (btree_iter_linked(iter))
+               if (!bch2_btree_trans_relock(iter->trans))
                        ret = -EINTR;
        }
 
        if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
                up_read(&c->gc_lock);
 out:
-       bch2_btree_iter_verify_locks(iter);
+       bch2_btree_trans_verify_locks(iter->trans);
 
        /*
         * Don't downgrade locks here: we're called after successful insert,
                return -EINTR;
 
        if (!down_read_trylock(&c->gc_lock)) {
-               bch2_btree_iter_unlock(iter);
+               bch2_btree_trans_unlock(iter->trans);
                down_read(&c->gc_lock);
 
-               if (!bch2_btree_iter_relock(iter)) {
+               if (!bch2_btree_trans_relock(iter->trans)) {
                        ret = -EINTR;
                        goto err;
                }
                /* bch2_btree_reserve_get will unlock */
                ret = bch2_btree_cache_cannibalize_lock(c, &cl);
                if (ret) {
-                       ret = -EINTR;
-
-                       bch2_btree_iter_unlock(iter);
+                       bch2_btree_trans_unlock(iter->trans);
                        up_read(&c->gc_lock);
                        closure_sync(&cl);
                        down_read(&c->gc_lock);
 
-                       if (!bch2_btree_iter_relock(iter))
+                       if (!bch2_btree_trans_relock(iter->trans)) {
+                               ret = -EINTR;
                                goto err;
+                       }
                }
 
                new_hash = bch2_btree_node_mem_alloc(c);
                if (ret != -EINTR)
                        goto err;
 
-               bch2_btree_iter_unlock(iter);
+               bch2_btree_trans_unlock(iter->trans);
                up_read(&c->gc_lock);
                closure_sync(&cl);
                down_read(&c->gc_lock);
 
-               if (!bch2_btree_iter_relock(iter))
+               if (!bch2_btree_trans_relock(iter->trans))
                        goto err;
        }
 
 
                bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter);
 }
 
-static bool btree_trans_relock(struct btree_trans *trans)
-{
-       struct btree_insert_entry *i;
-
-       trans_for_each_update_iter(trans, i)
-               return bch2_btree_iter_relock(i->iter);
-       return true;
-}
-
-static void btree_trans_unlock(struct btree_trans *trans)
-{
-       struct btree_insert_entry *i;
-
-       trans_for_each_update_iter(trans, i) {
-               bch2_btree_iter_unlock(i->iter);
-               break;
-       }
-}
-
 static inline int btree_trans_cmp(struct btree_insert_entry l,
                                  struct btree_insert_entry r)
 {
 
                EBUG_ON((i->iter->flags & BTREE_ITER_IS_EXTENTS) &&
                        !(trans->flags & BTREE_INSERT_ATOMIC));
-
-               bch2_btree_iter_verify_locks(i->iter);
        }
 
        BUG_ON(debug_check_bkeys(c) &&
        if (ret != -EAGAIN)
                return ret;
 
-       btree_trans_unlock(trans);
+       bch2_btree_trans_unlock(trans);
 
        ret = bch2_journal_preres_get(&c->journal,
                        &trans->journal_preres, u64s, 0);
        if (ret)
                return ret;
 
-       if (!btree_trans_relock(trans)) {
+       if (!bch2_btree_trans_relock(trans)) {
                trans_restart(" (iter relock after journal preres get blocked)");
                return -EINTR;
        }
                 * have been traversed/locked, depending on what the caller was
                 * doing:
                 */
-               trans_for_each_update_iter(trans, i) {
-                       for_each_btree_iter(i->iter, linked)
-                               if (linked->uptodate < BTREE_ITER_NEED_RELOCK)
-                                       linked->flags |= BTREE_ITER_NOUNLOCK;
-                       break;
-               }
+               trans_for_each_iter(trans, linked)
+                       if (linked->uptodate < BTREE_ITER_NEED_RELOCK)
+                               linked->flags |= BTREE_ITER_NOUNLOCK;
        }
 
        trans_for_each_update_iter(trans, i)
                                return ret;
                }
 
-               if (btree_trans_relock(trans))
+               if (bch2_btree_trans_relock(trans))
                        return 0;
 
                trans_restart(" (iter relock after marking replicas)");
                ret = -EINTR;
                break;
        case BTREE_INSERT_NEED_JOURNAL_RES:
-               btree_trans_unlock(trans);
+               bch2_btree_trans_unlock(trans);
 
                ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_CHECK);
                if (ret)
                        return ret;
 
-               if (btree_trans_relock(trans))
+               if (bch2_btree_trans_relock(trans))
                        return 0;
 
                trans_restart(" (iter relock after journal res get blocked)");
                        goto err;
                }
 
-               if (i->iter->flags & BTREE_ITER_ERROR) {
-                       ret = -EIO;
+               ret = btree_iter_err(i->iter);
+               if (ret)
                        goto err;
-               }
        }
 
        ret = do_btree_insert_at(trans, stopped_at);
                bch2_btree_iter_downgrade(i->iter);
 err:
        /* make sure we didn't drop or screw up locks: */
-       trans_for_each_update_iter(trans, i) {
-               bch2_btree_iter_verify_locks(i->iter);
-               break;
-       }
+       bch2_btree_trans_verify_locks(trans);
 
-       trans_for_each_update_iter(trans, i) {
-               for_each_btree_iter(i->iter, linked)
-                       linked->flags &= ~BTREE_ITER_NOUNLOCK;
-               break;
-       }
+       trans_for_each_iter(trans, linked)
+               linked->flags &= ~BTREE_ITER_NOUNLOCK;
 
        return ret;
 }
 
        trans_for_each_update(trans, i)
                btree_insert_entry_checks(trans, i);
+       bch2_btree_trans_verify_locks(trans);
 
        if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW) &&
                     !percpu_ref_tryget(&c->writes))) {
                if (likely(!(trans->flags & BTREE_INSERT_LAZY_RW)))
                        return -EROFS;
 
-               btree_trans_unlock(trans);
+               bch2_btree_trans_unlock(trans);
 
                ret = bch2_fs_read_write_early(c);
                if (ret)
 
                percpu_ref_get(&c->writes);
 
-               if (!btree_trans_relock(trans)) {
+               if (!bch2_btree_trans_relock(trans)) {
                        ret = -EINTR;
                        goto err;
                }
        iter = bch2_trans_get_iter(&trans, id, start, BTREE_ITER_INTENT);
 
        while ((k = bch2_btree_iter_peek(iter)).k &&
-              !(ret = btree_iter_err(k)) &&
+              !(ret = bkey_err(k)) &&
               bkey_cmp(iter->pos, end) < 0) {
                unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits);
                /* really shouldn't be using a bare, unpadded bkey_i */
 
        iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH);
        k = bch2_btree_iter_peek(iter);
 
-       while (k.k && !(err = btree_iter_err(k))) {
+       while (k.k && !(err = bkey_err(k))) {
                bch2_bkey_val_to_text(&PBUF(i->buf), i->c, k);
                i->bytes = strlen(i->buf);
                BUG_ON(i->bytes >= PAGE_SIZE);
        iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH);
 
        while ((k = bch2_btree_iter_peek(iter)).k &&
-              !(err = btree_iter_err(k))) {
+              !(err = bkey_err(k))) {
                struct btree_iter_level *l = &iter->l[0];
                struct bkey_packed *_k =
                        bch2_btree_node_iter_peek(&l->iter, l->b);
 
                                   POS(0, stripe_idx),
                                   BTREE_ITER_SLOTS);
        k = bch2_btree_iter_peek_slot(iter);
-       if (btree_iter_err(k) || k.k->type != KEY_TYPE_stripe) {
+       if (bkey_err(k) || k.k->type != KEY_TYPE_stripe) {
                __bcache_io_error(c,
                        "error doing reconstruct read: stripe not found");
                kfree(buf);
        if (!__ec_stripe_mem_alloc(c, idx, GFP_NOWAIT|__GFP_NOWARN))
                return 0;
 
-       bch2_btree_iter_unlock(iter);
+       bch2_btree_trans_unlock(iter->trans);
 
        if (!__ec_stripe_mem_alloc(c, idx, GFP_KERNEL))
                return -EINTR;
                                   BTREE_ITER_INTENT);
 
        while ((k = bch2_btree_iter_peek(iter)).k &&
-              !(ret = btree_iter_err(k)) &&
+              !(ret = bkey_err(k)) &&
               bkey_cmp(bkey_start_pos(k.k), pos->p) < 0) {
                idx = extent_matches_stripe(c, &s->key.v, k);
                if (idx < 0) {
        bch2_btree_iter_set_pos(iter, POS(0, idx));
 
        k = bch2_btree_iter_peek_slot(iter);
-       ret = btree_iter_err(k);
+       ret = bkey_err(k);
        if (ret)
                return ret;
 
 
                 * carefully not advancing past @new and thus whatever leaf node
                 * @_iter currently points to:
                 */
-               BUG_ON(btree_iter_err(old));
+               BUG_ON(bkey_err(old));
 
                if (allocating &&
                    !*allocating &&
        if (i_sectors_delta ||
            new_i_size > inode->ei_inode.bi_size) {
                if (c->opts.new_inode_updates) {
-                       bch2_btree_iter_unlock(extent_iter);
+                       bch2_btree_trans_unlock(trans);
                        mutex_lock(&inode->ei_update_lock);
 
-                       if (!bch2_btree_iter_relock(extent_iter)) {
+                       if (!bch2_btree_trans_relock(trans)) {
                                mutex_unlock(&inode->ei_update_lock);
                                return -EINTR;
                        }
        }
 }
 
-static void bchfs_read(struct bch_fs *c, struct btree_iter *iter,
+static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter,
                       struct bch_read_bio *rbio, u64 inum,
                       struct readpages_iter *readpages_iter)
 {
+       struct bch_fs *c = trans->c;
        struct bio *bio = &rbio->bio;
        int flags = BCH_READ_RETRY_IF_STALE|
                BCH_READ_MAY_PROMOTE;
                BUG_ON(!k.k);
 
                if (IS_ERR(k.k)) {
-                       int ret = bch2_btree_iter_unlock(iter);
+                       int ret = btree_iter_err(iter);
                        BUG_ON(!ret);
                        bcache_io_error(c, bio, "btree IO error %i", ret);
                        bio_endio(bio);
                }
 
                bkey_reassemble(&tmp.k, k);
-               bch2_btree_iter_unlock(iter);
+               bch2_btree_trans_unlock(trans);
                k = bkey_i_to_s_c(&tmp.k);
 
                if (readpages_iter) {
                rbio->bio.bi_end_io = bch2_readpages_end_io;
                __bio_add_page(&rbio->bio, page, PAGE_SIZE, 0);
 
-               bchfs_read(c, iter, rbio, inode->v.i_ino, &readpages_iter);
+               bchfs_read(&trans, iter, rbio, inode->v.i_ino,
+                          &readpages_iter);
        }
 
        bch2_pagecache_add_put(&inode->ei_pagecache_lock);
        iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN,
                                   BTREE_ITER_SLOTS);
 
-       bchfs_read(c, iter, rbio, inum, NULL);
+       bchfs_read(&trans, iter, rbio, inum, NULL);
 
        bch2_trans_exit(&trans);
 }
                                   BTREE_ITER_INTENT);
 
        while ((k = bch2_btree_iter_peek(iter)).k &&
-              !(ret = btree_iter_err(k)) &&
+              !(ret = bkey_err(k)) &&
               bkey_cmp(iter->pos, end) < 0) {
                struct disk_reservation disk_res =
                        bch2_disk_reservation_init(c, 0);
 
                ret = bch2_btree_iter_traverse(dst);
                if (ret)
-                       goto btree_iter_err;
+                       goto bkey_err;
 
                bch2_btree_iter_set_pos(src,
                        POS(dst->pos.inode, dst->pos.offset + (len >> 9)));
 
                k = bch2_btree_iter_peek_slot(src);
-               if ((ret = btree_iter_err(k)))
-                       goto btree_iter_err;
+               if ((ret = bkey_err(k)))
+                       goto bkey_err;
 
                bkey_reassemble(©.k, k);
 
                                dst, ©.k,
                                0, true, true, NULL);
                bch2_disk_reservation_put(c, &disk_res);
-btree_iter_err:
+bkey_err:
                if (ret == -EINTR)
                        ret = 0;
                if (ret)
                struct bkey_s_c k;
 
                k = bch2_btree_iter_peek_slot(iter);
-               if ((ret = btree_iter_err(k)))
-                       goto btree_iter_err;
+               if ((ret = bkey_err(k)))
+                       goto bkey_err;
 
                /* already reserved */
                if (k.k->type == KEY_TYPE_reservation &&
                                        "a_res,
                                        sectors, true);
                        if (unlikely(ret))
-                               goto btree_iter_err;
+                               goto bkey_err;
                }
 
                if (reservation.v.nr_replicas < replicas ||
                        ret = bch2_disk_reservation_get(c, &disk_res, sectors,
                                                        replicas, 0);
                        if (unlikely(ret))
-                               goto btree_iter_err;
+                               goto bkey_err;
 
                        reservation.v.nr_replicas = disk_res.nr_replicas;
                }
                                &disk_res, "a_res,
                                iter, &reservation.k_i,
                                0, true, true, NULL);
-btree_iter_err:
+bkey_err:
                bch2_quota_reservation_put(c, inode, "a_res);
                bch2_disk_reservation_put(c, &disk_res);
                if (ret == -EINTR)
 
        return bch2_trans_iter_free(trans, iter) ?: sectors;
 }
 
-static int remove_dirent(struct bch_fs *c, struct btree_iter *iter,
+static int remove_dirent(struct btree_trans *trans,
                         struct bkey_s_c_dirent dirent)
 {
+       struct bch_fs *c = trans->c;
        struct qstr name;
        struct bch_inode_unpacked dir_inode;
        struct bch_hash_info dir_hash_info;
        buf[name.len] = '\0';
        name.name = buf;
 
-       /* Unlock iter so we don't deadlock, after copying name: */
-       bch2_btree_iter_unlock(iter);
+       /* Unlock so we don't deadlock, after copying name: */
+       bch2_btree_trans_unlock(trans);
 
        ret = bch2_inode_find_by_inum(c, dir_inum, &dir_inode);
        if (ret) {
 
 struct hash_check {
        struct bch_hash_info    info;
-       struct btree_trans      *trans;
 
        /* start of current chain of hash collisions: */
        struct btree_iter       *chain;
 
        /* next offset in current chain of hash collisions: */
-       u64                     next;
+       u64                     chain_end;
 };
 
-static void hash_check_init(const struct bch_hash_desc desc,
-                           struct btree_trans *trans,
+static void hash_check_init(struct hash_check *h)
+{
+       h->chain = NULL;
+}
+
+static void hash_stop_chain(struct btree_trans *trans,
                            struct hash_check *h)
 {
-       h->trans = trans;
-       h->chain = bch2_trans_get_iter(trans, desc.btree_id, POS_MIN, 0);
-       h->next = -1;
+       if (h->chain)
+               bch2_trans_iter_free(trans, h->chain);
+       h->chain = NULL;
 }
 
-static void hash_check_set_inode(struct hash_check *h, struct bch_fs *c,
+static void hash_check_set_inode(struct btree_trans *trans,
+                                struct hash_check *h,
                                 const struct bch_inode_unpacked *bi)
 {
-       h->info = bch2_hash_info_init(c, bi);
-       h->next = -1;
+       h->info = bch2_hash_info_init(trans->c, bi);
+       hash_stop_chain(trans, h);
 }
 
 static int hash_redo_key(const struct bch_hash_desc desc,
        if (ret)
                goto err;
 
-       bch2_btree_iter_unlock(k_iter);
-
        bch2_hash_set(trans, desc, &h->info, k_iter->pos.inode,
                      tmp, BCH_HASH_SET_MUST_CREATE);
        ret = bch2_trans_commit(trans, NULL, NULL,
        if (!bkey_cmp(h->chain->pos, k_iter->pos))
                return 0;
 
-       iter = bch2_trans_copy_iter(h->trans, h->chain);
+       iter = bch2_trans_copy_iter(trans, h->chain);
        BUG_ON(IS_ERR(iter));
 
        for_each_btree_key_continue(iter, 0, k2) {
                }
        }
 fsck_err:
-       bch2_trans_iter_free(h->trans, iter);
+       bch2_trans_iter_free(trans, iter);
        return ret;
 }
 
-static bool key_has_correct_hash(const struct bch_hash_desc desc,
-                                struct hash_check *h, struct bch_fs *c,
-                                struct btree_iter *k_iter, struct bkey_s_c k)
+static void hash_set_chain_start(struct btree_trans *trans,
+                       const struct bch_hash_desc desc,
+                       struct hash_check *h,
+                       struct btree_iter *k_iter, struct bkey_s_c k)
 {
-       u64 hash;
+       bool hole = (k.k->type != KEY_TYPE_whiteout &&
+                    k.k->type != desc.key_type);
 
-       if (k.k->type != KEY_TYPE_whiteout &&
-           k.k->type != desc.key_type)
-               return true;
+       if (hole || k.k->p.offset > h->chain_end + 1)
+               hash_stop_chain(trans, h);
+
+       if (!hole) {
+               if (!h->chain) {
+                       h->chain = bch2_trans_copy_iter(trans, k_iter);
+                       BUG_ON(IS_ERR(h->chain));
+               }
+
+               h->chain_end = k.k->p.offset;
+       }
+}
+
+static bool key_has_correct_hash(struct btree_trans *trans,
+                       const struct bch_hash_desc desc,
+                       struct hash_check *h,
+                       struct btree_iter *k_iter, struct bkey_s_c k)
+{
+       u64 hash;
 
-       if (k.k->p.offset != h->next)
-               bch2_btree_iter_copy(h->chain, k_iter);
-       h->next = k.k->p.offset + 1;
+       hash_set_chain_start(trans, desc, h, k_iter, k);
 
        if (k.k->type != desc.key_type)
                return true;
        u64 hashed;
        int ret = 0;
 
-       if (k.k->type != KEY_TYPE_whiteout &&
-           k.k->type != desc.key_type)
-               return 0;
-
-       if (k.k->p.offset != h->next)
-               bch2_btree_iter_copy(h->chain, k_iter);
-       h->next = k.k->p.offset + 1;
+       hash_set_chain_start(trans, desc, h, k_iter, k);
 
        if (k.k->type != desc.key_type)
                return 0;
        unsigned len;
        u64 hash;
 
-       if (key_has_correct_hash(bch2_dirent_hash_desc, h, c, iter, *k))
+       if (key_has_correct_hash(trans, bch2_dirent_hash_desc, h, iter, *k))
                return 0;
 
        len = bch2_dirent_name_bytes(bkey_s_c_to_dirent(*k));
        iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS,
                                   POS(BCACHEFS_ROOT_INO, 0), 0);
 
-       hash_check_init(bch2_dirent_hash_desc, &trans, &h);
+       hash_check_init(&h);
 
        for_each_btree_key_continue(iter, 0, k) {
                struct bkey_s_c_dirent d;
                }
 
                if (w.first_this_inode && w.have_inode)
-                       hash_check_set_inode(&h, c, &w.inode);
+                       hash_check_set_inode(&trans, &h, &w.inode);
 
                ret = check_dirent_hash(&trans, &h, iter, &k);
                if (ret > 0) {
                                ".. dirent") ||
                    fsck_err_on(memchr(d.v->d_name, '/', name_len), c,
                                "dirent name has invalid chars")) {
-                       ret = remove_dirent(c, iter, d);
+                       ret = remove_dirent(&trans, d);
                        if (ret)
                                goto err;
                        continue;
                                "dirent points to own directory:\n%s",
                                (bch2_bkey_val_to_text(&PBUF(buf), c,
                                                       k), buf))) {
-                       ret = remove_dirent(c, iter, d);
+                       ret = remove_dirent(&trans, d);
                        if (ret)
                                goto err;
                        continue;
                                "dirent points to missing inode:\n%s",
                                (bch2_bkey_val_to_text(&PBUF(buf), c,
                                                       k), buf))) {
-                       ret = remove_dirent(c, iter, d);
+                       ret = remove_dirent(&trans, d);
                        if (ret)
                                goto err;
                        continue;
 
                }
        }
+
+       hash_stop_chain(&trans, &h);
 err:
 fsck_err:
        return bch2_trans_exit(&trans) ?: ret;
        iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS,
                                   POS(BCACHEFS_ROOT_INO, 0), 0);
 
-       hash_check_init(bch2_xattr_hash_desc, &trans, &h);
+       hash_check_init(&h);
 
        for_each_btree_key_continue(iter, 0, k) {
                ret = walk_inode(c, &w, k.k->p.inode);
                }
 
                if (w.first_this_inode && w.have_inode)
-                       hash_check_set_inode(&h, c, &w.inode);
+                       hash_check_set_inode(&trans, &h, &w.inode);
 
                ret = hash_check_key(&trans, bch2_xattr_hash_desc,
                                     &h, iter, k);
                        if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c,
                                        "directory %llu has multiple hardlinks",
                                        d_inum)) {
-                               ret = remove_dirent(c, iter, dirent);
+                               ret = remove_dirent(&trans, dirent);
                                if (ret)
                                        goto err;
                                continue;
                if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.inode), c,
                                "unreachable directory found (inum %llu)",
                                k.k->p.inode)) {
-                       bch2_btree_iter_unlock(iter);
+                       bch2_btree_trans_unlock(&trans);
 
                        ret = reattach_inode(c, lostfound_inode, k.k->p.inode);
                        if (ret) {
        int ret = 0;
 
        ret = bch2_inode_unpack(inode, &u);
+
+       bch2_btree_trans_unlock(trans);
+
        if (bch2_fs_inconsistent_on(ret, c,
                         "error unpacking inode %llu in fsck",
                         inode.k->p.inode))
        nlinks_iter = genradix_iter_init(links, 0);
 
        while ((k = bch2_btree_iter_peek(iter)).k &&
-              !(ret2 = btree_iter_err(k))) {
+              !(ret2 = bkey_err(k))) {
 peek_nlinks:   link = genradix_iter_peek(&nlinks_iter, links);
 
                if (!link && (!k.k || iter->pos.inode >= range_end))
                        link = &zero_links;
 
                if (k.k && k.k->type == KEY_TYPE_inode) {
-                       /*
-                        * Avoid potential deadlocks with iter for
-                        * truncate/rm/etc.:
-                        */
-                       bch2_btree_iter_unlock(iter);
-
                        ret = check_inode(&trans, lostfound_inode, iter,
                                          bkey_s_c_to_inode(k), link);
                        BUG_ON(ret == -EINTR);
        struct btree_iter *iter;
        struct bkey_s_c k;
        struct bkey_s_c_inode inode;
-       int ret = 0;
+       int ret = 0, ret2;
 
        bch2_trans_init(&trans, c);
 
                }
        }
 
-       if (!ret)
-               ret = bch2_btree_iter_unlock(iter);
+       ret2 = bch2_trans_exit(&trans);
 
-       bch2_trans_exit(&trans);
-
-       return ret;
+       return ret ?: ret2;
 }
 
 /*
 
        while (1) {
                struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
 
-               ret = btree_iter_err(k);
+               ret = bkey_err(k);
                if (ret)
                        return ret;
 
                struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
                u32 bi_generation = 0;
 
-               ret = btree_iter_err(k);
+               ret = bkey_err(k);
                if (ret)
                        break;
 
 
        rbio->bio.bi_status = 0;
 
        k = bch2_btree_iter_peek_slot(iter);
-       if (btree_iter_err(k))
+       if (bkey_err(k))
                goto err;
 
        bkey_reassemble(&tmp.k, k);
 
                bkey_reassemble(&tmp.k, k);
                k = bkey_i_to_s_c(&tmp.k);
-               bch2_btree_iter_unlock(iter);
+               bch2_btree_trans_unlock(&trans);
 
                bytes = min_t(unsigned, bvec_iter.bi_size,
                              (k.k->p.offset - bvec_iter.bi_sector) << 9);
         * If we get here, it better have been because there was an error
         * reading a btree node
         */
-       BUG_ON(!(iter->flags & BTREE_ITER_ERROR));
+       BUG_ON(!btree_iter_err(iter));
        __bcache_io_error(c, "btree IO error");
 err:
        rbio->bio.bi_status = BLK_STS_IOERR;
                 */
                bkey_reassemble(&tmp.k, k);
                k = bkey_i_to_s_c(&tmp.k);
-               bch2_btree_iter_unlock(iter);
+               bch2_btree_trans_unlock(&trans);
 
                bytes = min_t(unsigned, rbio->bio.bi_iter.bi_size,
                              (k.k->p.offset - rbio->bio.bi_iter.bi_sector) << 9);
         * If we get here, it better have been because there was an error
         * reading a btree node
         */
-       BUG_ON(!(iter->flags & BTREE_ITER_ERROR));
+       BUG_ON(!btree_iter_err(iter));
        bcache_io_error(c, &rbio->bio, "btree IO error");
 
        bch2_trans_exit(&trans);
 
 
 
        while ((k = bch2_btree_iter_peek(iter)).k &&
-              !(ret = btree_iter_err(k))) {
+              !(ret = bkey_err(k))) {
                if (!bkey_extent_is_data(k.k) ||
                    !bch2_extent_has_device(bkey_s_c_to_extent(k), dev_idx)) {
                        ret = bch2_mark_bkey_replicas(c, k);
 
                bool did_work = false;
                int nr;
 
-               ret = btree_iter_err(k);
+               ret = bkey_err(k);
                if (ret)
                        break;
 
 
                if (!k.k)
                        break;
-               ret = btree_iter_err(k);
+               ret = bkey_err(k);
                if (ret)
                        break;
                if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
 
                                   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
        k = bch2_btree_iter_peek_slot(iter);
 
-       ret = btree_iter_err(k);
+       ret = bkey_err(k);
        if (unlikely(ret))
                return ret;