bcachefs: trans->restarted
authorKent Overstreet <kent.overstreet@gmail.com>
Sun, 25 Jul 2021 21:19:52 +0000 (17:19 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:09 +0000 (17:09 -0400)
Start tracking when btree transactions have been restarted - and assert
that we're always calling bch2_trans_begin() immediately after
transaction restart.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
fs/bcachefs/btree_cache.c
fs/bcachefs/btree_gc.c
fs/bcachefs/btree_iter.c
fs/bcachefs/btree_iter.h
fs/bcachefs/btree_key_cache.c
fs/bcachefs/btree_types.h
fs/bcachefs/btree_update_interior.c
fs/bcachefs/btree_update_leaf.c

index 252801dee0288b4d0e80388918a1d97b07e188d7..5c12897964b6a138886215827944e291fa6b8179 100644 (file)
@@ -655,8 +655,10 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
         * Parent node must be locked, else we could read in a btree node that's
         * been freed:
         */
-       if (iter && !bch2_btree_node_relock(iter, level + 1))
+       if (iter && !bch2_btree_node_relock(iter, level + 1)) {
+               btree_trans_restart(iter->trans);
                return ERR_PTR(-EINTR);
+       }
 
        b = bch2_btree_node_mem_alloc(c);
        if (IS_ERR(b))
@@ -695,11 +697,15 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
 
        if (iter &&
            (!bch2_trans_relock(iter->trans) ||
-            !bch2_btree_iter_relock_intent(iter)))
+            !bch2_btree_iter_relock_intent(iter))) {
+               BUG_ON(!iter->trans->restarted);
                return ERR_PTR(-EINTR);
+       }
 
-       if (!six_relock_type(&b->c.lock, lock_type, seq))
+       if (!six_relock_type(&b->c.lock, lock_type, seq)) {
+               btree_trans_restart(iter->trans);
                return ERR_PTR(-EINTR);
+       }
 
        return b;
 }
@@ -824,7 +830,7 @@ lock_node:
 
                if (!btree_node_lock(b, k->k.p, level, iter, lock_type,
                                     lock_node_check_fn, (void *) k, trace_ip)) {
-                       if (b->hash_val != btree_ptr_hash_val(k))
+                       if (!trans->restarted)
                                goto retry;
                        return ERR_PTR(-EINTR);
                }
@@ -840,6 +846,7 @@ lock_node:
                                                              trace_ip,
                                                              iter->btree_id,
                                                              &iter->real_pos);
+                       btree_trans_restart(trans);
                        return ERR_PTR(-EINTR);
                }
        }
@@ -858,8 +865,10 @@ lock_node:
                 */
                if (iter &&
                    (!bch2_trans_relock(trans) ||
-                    !bch2_btree_iter_relock_intent(iter)))
+                    !bch2_btree_iter_relock_intent(iter))) {
+                       BUG_ON(!trans->restarted);
                        return ERR_PTR(-EINTR);
+               }
 
                if (!six_relock_type(&b->c.lock, lock_type, seq))
                        goto retry;
index f0a5b6b2b18949204113e8b38618de4282aadbd7..2a84685f4e6088b4eb778b6cd2349906c50f4bce 100644 (file)
@@ -1735,7 +1735,8 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id)
                                   BTREE_ITER_NOT_EXTENTS|
                                   BTREE_ITER_ALL_SNAPSHOTS);
 
-       while ((k = bch2_btree_iter_peek(iter)).k &&
+       while ((bch2_trans_begin(&trans),
+               k = bch2_btree_iter_peek(iter)).k &&
               !(ret = bkey_err(k))) {
                c->gc_gens_pos = iter->pos;
 
index df3aba54526c449890f1f741769534241cb31ce1..816b9369c833ed77d74c046d34326c238c20e9f8 100644 (file)
@@ -316,7 +316,7 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
        }
 
        if (unlikely(deadlock_iter)) {
-               trace_trans_restart_would_deadlock(iter->trans->ip, ip,
+               trace_trans_restart_would_deadlock(trans->ip, ip,
                                trans->in_traverse_all, reason,
                                deadlock_iter->btree_id,
                                btree_iter_type(deadlock_iter),
@@ -324,6 +324,7 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
                                iter->btree_id,
                                btree_iter_type(iter),
                                &pos);
+               btree_trans_restart(trans);
                return false;
        }
 
@@ -404,6 +405,7 @@ bool bch2_btree_iter_relock_intent(struct btree_iter *iter)
                                        ? iter->l[l].b->c.lock.state.seq
                                        : 0);
                        btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
+                       btree_trans_restart(iter->trans);
                        return false;
                }
        }
@@ -414,7 +416,11 @@ bool bch2_btree_iter_relock_intent(struct btree_iter *iter)
 __flatten
 bool bch2_btree_iter_relock(struct btree_iter *iter, unsigned long trace_ip)
 {
-       return btree_iter_get_locks(iter, false, trace_ip);
+       bool ret = btree_iter_get_locks(iter, false, trace_ip);
+
+       if (!ret)
+               btree_trans_restart(iter->trans);
+       return ret;
 }
 
 bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
@@ -457,6 +463,8 @@ bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
                        btree_iter_get_locks(linked, true, _THIS_IP_);
                }
 
+       if (iter->should_be_locked)
+               btree_trans_restart(iter->trans);
        return false;
 }
 
@@ -505,11 +513,15 @@ bool bch2_trans_relock(struct btree_trans *trans)
 {
        struct btree_iter *iter;
 
+       if (unlikely(trans->restarted))
+               return false;
+
        trans_for_each_iter(trans, iter)
                if (btree_iter_should_be_locked(iter) &&
                    !bch2_btree_iter_relock(iter, _RET_IP_)) {
                        trace_trans_restart_relock(trans->ip, _RET_IP_,
                                        iter->btree_id, &iter->real_pos);
+                       BUG_ON(!trans->restarted);
                        return false;
                }
        return true;
@@ -1088,11 +1100,12 @@ static int lock_root_check_fn(struct six_lock *lock, void *p)
        return b == *rootp ? 0 : -1;
 }
 
-static inline int btree_iter_lock_root(struct btree_iter *iter,
+static inline int btree_iter_lock_root(struct btree_trans *trans,
+                                      struct btree_iter *iter,
                                       unsigned depth_want,
                                       unsigned long trace_ip)
 {
-       struct bch_fs *c = iter->trans->c;
+       struct bch_fs *c = trans->c;
        struct btree *b, **rootp = &c->btree_roots[iter->btree_id].b;
        enum six_lock_type lock_type;
        unsigned i;
@@ -1120,8 +1133,11 @@ static inline int btree_iter_lock_root(struct btree_iter *iter,
                if (unlikely(!btree_node_lock(b, SPOS_MAX, iter->level,
                                              iter, lock_type,
                                              lock_root_check_fn, rootp,
-                                             trace_ip)))
-                       return -EINTR;
+                                             trace_ip))) {
+                       if (trans->restarted)
+                               return -EINTR;
+                       continue;
+               }
 
                if (likely(b == READ_ONCE(*rootp) &&
                           b->c.level == iter->level &&
@@ -1199,10 +1215,10 @@ static noinline void btree_node_mem_ptr_set(struct btree_iter *iter,
                btree_node_unlock(iter, plevel);
 }
 
-static __always_inline int btree_iter_down(struct btree_iter *iter,
+static __always_inline int btree_iter_down(struct btree_trans *trans,
+                                          struct btree_iter *iter,
                                           unsigned long trace_ip)
 {
-       struct btree_trans *trans  = iter->trans;
        struct bch_fs *c = trans->c;
        struct btree_iter_level *l = &iter->l[iter->level];
        struct btree *b;
@@ -1257,6 +1273,8 @@ static int __btree_iter_traverse_all(struct btree_trans *trans, int ret,
 
        trans->in_traverse_all = true;
 retry_all:
+       trans->restarted = false;
+
        nr_sorted = 0;
 
        trans_for_each_iter(trans, iter) {
@@ -1319,11 +1337,11 @@ retry_all:
        }
 
        if (hweight64(trans->iters_live) > 1)
-               ret = -EINTR;
+               ret = btree_trans_restart(trans);
        else
                trans_for_each_iter(trans, iter)
                        if (iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) {
-                               ret = -EINTR;
+                               ret = btree_trans_restart(trans);
                                break;
                        }
 out:
@@ -1414,8 +1432,8 @@ static int btree_iter_traverse_one(struct btree_iter *iter,
         */
        while (iter->level > depth_want) {
                ret = btree_iter_node(iter, iter->level)
-                       ? btree_iter_down(iter, trace_ip)
-                       : btree_iter_lock_root(iter, depth_want, trace_ip);
+                       ? btree_iter_down(trans, iter, trace_ip)
+                       : btree_iter_lock_root(trans, iter, depth_want, trace_ip);
                if (unlikely(ret)) {
                        if (ret == 1) {
                                /*
@@ -1443,6 +1461,7 @@ static int btree_iter_traverse_one(struct btree_iter *iter,
 
        iter->uptodate = BTREE_ITER_NEED_PEEK;
 out:
+       BUG_ON((ret == -EINTR) != !!trans->restarted);
        trace_iter_traverse(trans->ip, trace_ip,
                            btree_iter_type(iter) == BTREE_ITER_CACHED,
                            iter->btree_id, &iter->real_pos, ret);
@@ -1589,6 +1608,8 @@ static void btree_iter_set_search_pos(struct btree_iter *iter, struct bpos new_p
        int cmp = bpos_cmp(new_pos, iter->real_pos);
        unsigned l = iter->level;
 
+       EBUG_ON(iter->trans->restarted);
+
        if (!cmp)
                goto out;
 
@@ -2158,6 +2179,8 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
        struct btree_iter *iter, *best = NULL;
        struct bpos real_pos, pos_min = POS_MIN;
 
+       EBUG_ON(trans->restarted);
+
        if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES &&
            btree_node_type_is_extents(btree_id) &&
            !(flags & BTREE_ITER_NOT_EXTENTS) &&
@@ -2322,6 +2345,7 @@ void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
 
                if (old_bytes) {
                        trace_trans_restart_mem_realloced(trans->ip, _RET_IP_, new_bytes);
+                       btree_trans_restart(trans);
                        return ERR_PTR(-EINTR);
                }
        }
@@ -2396,6 +2420,8 @@ void bch2_trans_reset(struct btree_trans *trans, unsigned flags)
        if (!(flags & TRANS_RESET_NOTRAVERSE) &&
            trans->iters_linked)
                bch2_btree_iter_traverse_all(trans);
+
+       trans->restarted = false;
 }
 
 static void bch2_trans_alloc_iters(struct btree_trans *trans, struct bch_fs *c)
index bcb8f0ebbdf4cc5984d8d8e06e62bf83347c6b2a..243f65f0b7addf69371dc0531e358073db7ed7cd 100644 (file)
@@ -117,6 +117,14 @@ bool bch2_btree_iter_relock(struct btree_iter *, unsigned long);
 bool bch2_trans_relock(struct btree_trans *);
 void bch2_trans_unlock(struct btree_trans *);
 
+__always_inline
+static inline int btree_trans_restart(struct btree_trans *trans)
+{
+       trans->restarted = true;
+       bch2_trans_unlock(trans);
+       return -EINTR;
+}
+
 bool __bch2_btree_iter_upgrade(struct btree_iter *, unsigned);
 
 static inline bool bch2_btree_iter_upgrade(struct btree_iter *iter,
index cb71fe0dd742f712c2b3feeccf554fb3e91bf581..8fb18ad2e1ae7b3a20a59bd42991fd194a92b5e2 100644 (file)
@@ -215,7 +215,7 @@ static int btree_key_cache_fill(struct btree_trans *trans,
 
        if (!bch2_btree_node_relock(ck_iter, 0)) {
                trace_transaction_restart_ip(trans->ip, _THIS_IP_);
-               ret = -EINTR;
+               ret = btree_trans_restart(trans);
                goto err;
        }
 
@@ -234,6 +234,10 @@ static int btree_key_cache_fill(struct btree_trans *trans,
                }
        }
 
+       /*
+        * XXX: not allowed to be holding read locks when we take a write lock,
+        * currently
+        */
        bch2_btree_node_lock_write(ck_iter->l[0].b, ck_iter);
        if (new_k) {
                kfree(ck->k);
@@ -300,10 +304,8 @@ retry:
 
                if (!btree_node_lock((void *) ck, iter->pos, 0, iter, lock_want,
                                     bkey_cached_check_fn, iter, _THIS_IP_)) {
-                       if (ck->key.btree_id != iter->btree_id ||
-                           bpos_cmp(ck->key.pos, iter->pos)) {
+                       if (!trans->restarted)
                                goto retry;
-                       }
 
                        trace_transaction_restart_ip(trans->ip, _THIS_IP_);
                        ret = -EINTR;
@@ -323,10 +325,10 @@ retry:
        iter->l[0].b            = (void *) ck;
 fill:
        if (!ck->valid && !(iter->flags & BTREE_ITER_CACHED_NOFILL)) {
-               if (!btree_node_intent_locked(iter, 0))
-                       bch2_btree_iter_upgrade(iter, 1);
-               if (!btree_node_intent_locked(iter, 0)) {
+               if (!iter->locks_want &&
+                   !!__bch2_btree_iter_upgrade(iter, 1)) {
                        trace_transaction_restart_ip(trans->ip, _THIS_IP_);
+                       BUG_ON(!trans->restarted);
                        ret = -EINTR;
                        goto err;
                }
@@ -342,9 +344,12 @@ fill:
        iter->uptodate = BTREE_ITER_NEED_PEEK;
 
        if ((iter->flags & BTREE_ITER_INTENT) &&
-           !iter->locks_want &&
-           __bch2_btree_iter_upgrade(iter, 1))
+           !bch2_btree_iter_upgrade(iter, 1)) {
+               BUG_ON(!trans->restarted);
                ret = -EINTR;
+       }
+
+       BUG_ON(!ret && !btree_node_locked(iter, 0));
 
        return ret;
 err:
index 78b312e5bcf35476948abf346a778f58505fd360..4fa37fbc41fad6a17e7543f29ac156b26c2b92ff 100644 (file)
@@ -380,9 +380,10 @@ struct btree_trans {
        int                     srcu_idx;
 
        u8                      nr_updates;
-       unsigned                used_mempool:1;
-       unsigned                error:1;
-       unsigned                in_traverse_all:1;
+       bool                    used_mempool:1;
+       bool                    error:1;
+       bool                    in_traverse_all:1;
+       bool                    restarted:1;
        /*
         * For when bch2_trans_update notices we'll be splitting a compressed
         * extent:
index 23a5a4941df0de3a746f62aefdc6957572499871..2e8697196ac95c5e0e2244c3720bfa42b4d03bce 100644 (file)
@@ -1006,6 +1006,7 @@ retry:
 
                if (flags & BTREE_INSERT_JOURNAL_RECLAIM) {
                        bch2_btree_update_free(as);
+                       btree_trans_restart(trans);
                        return ERR_PTR(ret);
                }
 
index b354624133a173086ed81de8bca9c93492242246..3fbdf3e5fe01896c1cc6ebb81f71ee3ae736c30a 100644 (file)
@@ -384,6 +384,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
 
        if (race_fault()) {
                trace_trans_restart_fault_inject(trans->ip, trace_ip);
+               trans->restarted = true;
                return -EINTR;
        }
 
@@ -520,10 +521,17 @@ static noinline int maybe_do_btree_merge(struct btree_trans *trans, struct btree
                u64s_delta -= !bkey_deleted(old.k) ? old.k->u64s : 0;
        }
 
-       return u64s_delta <= 0
-               ? (bch2_foreground_maybe_merge(trans, iter, iter->level,
-                               trans->flags & ~BTREE_INSERT_NOUNLOCK) ?: -EINTR)
-               : 0;
+       if (u64s_delta > 0)
+               return 0;
+
+       ret = bch2_foreground_maybe_merge(trans, iter, iter->level,
+                               trans->flags & ~BTREE_INSERT_NOUNLOCK);
+       if (!ret) {
+               ret = -EINTR;
+               trans->restarted = true;
+       }
+
+       return ret;
 }
 
 /*
@@ -587,6 +595,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
                                        trace_trans_restart_upgrade(trans->ip, trace_ip,
                                                                    iter->btree_id,
                                                                    &iter->real_pos);
+                                       trans->restarted = true;
                                        return -EINTR;
                                }
                        } else {
@@ -696,6 +705,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
                        trace_trans_restart_btree_node_split(trans->ip, trace_ip,
                                                             i->iter->btree_id,
                                                             &i->iter->real_pos);
+                       trans->restarted = true;
                        ret = -EINTR;
                }
                break;
@@ -704,7 +714,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
 
                ret = bch2_replicas_delta_list_mark(c, trans->fs_usage_deltas);
                if (ret)
-                       return ret;
+                       break;
 
                if (bch2_trans_relock(trans))
                        return 0;
@@ -716,12 +726,15 @@ int bch2_trans_commit_error(struct btree_trans *trans,
                bch2_trans_unlock(trans);
 
                if ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM) &&
-                   !(trans->flags & BTREE_INSERT_JOURNAL_RESERVED))
-                       return -EAGAIN;
+                   !(trans->flags & BTREE_INSERT_JOURNAL_RESERVED)) {
+                       trans->restarted = true;
+                       ret = -EAGAIN;
+                       break;
+               }
 
                ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_CHECK);
                if (ret)
-                       return ret;
+                       break;
 
                if (bch2_trans_relock(trans))
                        return 0;
@@ -737,7 +750,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
                wait_event_freezable(c->journal.reclaim_wait,
                                     (ret = journal_reclaim_wait_done(c)));
                if (ret < 0)
-                       return ret;
+                       break;
 
                if (bch2_trans_relock(trans))
                        return 0;
@@ -750,6 +763,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
                break;
        }
 
+       BUG_ON((ret == EINTR || ret == -EAGAIN) && !trans->restarted);
        BUG_ON(ret == -ENOSPC && (flags & BTREE_INSERT_NOFAIL));
 
        return ret;
@@ -972,6 +986,7 @@ int __bch2_trans_commit(struct btree_trans *trans)
                        trace_trans_restart_upgrade(trans->ip, _RET_IP_,
                                                    i->iter->btree_id,
                                                    &i->iter->pos);
+                       trans->restarted = true;
                        ret = -EINTR;
                        goto out;
                }
@@ -994,6 +1009,7 @@ int __bch2_trans_commit(struct btree_trans *trans)
                        goto err;
        }
 retry:
+       BUG_ON(trans->restarted);
        memset(&trans->journal_res, 0, sizeof(trans->journal_res));
 
        ret = do_bch2_trans_commit(trans, &i, _RET_IP_);