bcachefs: Don't reexecute triggers when retrying transaction commit
authorKent Overstreet <kent.overstreet@gmail.com>
Tue, 24 Dec 2019 23:03:53 +0000 (18:03 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:33 +0000 (17:08 -0400)
This was causing a bug with transaction iterators overflowing; now, if
triggers have to be reexecuted we always return -EINTR and retry from
the start of the transaction.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_types.h
fs/bcachefs/btree_update.h
fs/bcachefs/btree_update_leaf.c
fs/bcachefs/recovery.c

index 5f0b55c98f861aceeb7cc8db38ed3e75ef5c6535..98451b3dd1a5a3e6b1acbf2ed0e7c43ff009769e 100644 (file)
@@ -281,6 +281,7 @@ struct btree_trans {
        struct disk_reservation *disk_res;
        unsigned                flags;
        unsigned                journal_u64s;
+       unsigned                journal_preres_u64s;
        struct replicas_delta_list *fs_usage_deltas;
 
        struct btree_iter       iters_onstack[2];
index aa87477b51e1c1619c76275acc7087762bc2dd99..1534e937a95d7161f2910efa819ed79e6ced187d 100644 (file)
@@ -26,7 +26,6 @@ enum {
        __BTREE_INSERT_JOURNAL_RESERVED,
        __BTREE_INSERT_NOMARK_OVERWRITES,
        __BTREE_INSERT_NOMARK,
-       __BTREE_INSERT_NO_CLEAR_REPLICAS,
        __BTREE_INSERT_BUCKET_INVALIDATE,
        __BTREE_INSERT_NOWAIT,
        __BTREE_INSERT_GC_LOCK_HELD,
@@ -60,8 +59,6 @@ enum {
 /* Don't call mark new key at all: */
 #define BTREE_INSERT_NOMARK            (1 << __BTREE_INSERT_NOMARK)
 
-#define BTREE_INSERT_NO_CLEAR_REPLICAS (1 << __BTREE_INSERT_NO_CLEAR_REPLICAS)
-
 #define BTREE_INSERT_BUCKET_INVALIDATE (1 << __BTREE_INSERT_BUCKET_INVALIDATE)
 
 /* Don't block on allocation failure (for new btree nodes: */
index 94c1e1e2118a59f9229bacedd52bc301ac54cf17..09f5cd6493f440defe7b4a3f3e903247426ad048 100644 (file)
@@ -515,44 +515,18 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
 {
        struct btree_insert_entry *i;
        struct btree_iter *iter;
-       unsigned idx, u64s, journal_preres_u64s = 0;
+       unsigned idx;
        int ret;
 
-       /*
-        * note: running triggers will append more updates to the list of
-        * updates as we're walking it:
-        */
-       trans_for_each_update(trans, i) {
-               /* we know trans->nounlock won't be set here: */
-               if (unlikely(!(i->iter->locks_want < 1
-                              ? __bch2_btree_iter_upgrade(i->iter, 1)
-                              : i->iter->uptodate <= BTREE_ITER_NEED_PEEK))) {
-                       trace_trans_restart_upgrade(trans->ip);
-                       return -EINTR;
-               }
-
-               if (likely(!(trans->flags & BTREE_INSERT_NOMARK)) &&
-                   update_has_trans_triggers(i)) {
-                       ret = bch2_trans_mark_update(trans, i->iter, i->k);
-                       if (unlikely(ret)) {
-                               if (ret == -EINTR)
-                                       trace_trans_restart_mark(trans->ip);
-                               return ret;
-                       }
-               }
-
-               u64s = jset_u64s(i->k->k.u64s);
-               if (0)
-                       journal_preres_u64s += u64s;
-               trans->journal_u64s += u64s;
-       }
+       trans_for_each_update(trans, i)
+               BUG_ON(!btree_node_intent_locked(i->iter, 0));
 
        ret = bch2_journal_preres_get(&trans->c->journal,
-                       &trans->journal_preres, journal_preres_u64s,
+                       &trans->journal_preres, trans->journal_preres_u64s,
                        JOURNAL_RES_GET_NONBLOCK);
        if (unlikely(ret == -EAGAIN))
                ret = bch2_trans_journal_preres_get_cold(trans,
-                                               journal_preres_u64s);
+                                               trans->journal_preres_u64s);
        if (unlikely(ret))
                return ret;
 
@@ -740,8 +714,7 @@ int __bch2_trans_commit(struct btree_trans *trans)
 {
        struct btree_insert_entry *i = NULL;
        struct btree_iter *iter;
-       unsigned orig_nr_updates        = trans->nr_updates;
-       unsigned orig_mem_top           = trans->mem_top;
+       unsigned u64s;
        int ret = 0;
 
        if (!trans->nr_updates)
@@ -752,26 +725,50 @@ int __bch2_trans_commit(struct btree_trans *trans)
 
        memset(&trans->journal_preres, 0, sizeof(trans->journal_preres));
 
+       trans->journal_u64s             = 0;
+       trans->journal_preres_u64s      = 0;
+
        if (!(trans->flags & BTREE_INSERT_NOCHECK_RW) &&
            unlikely(!percpu_ref_tryget(&trans->c->writes))) {
                ret = bch2_trans_commit_get_rw_cold(trans);
                if (ret)
                        return ret;
        }
+
+       /*
+        * note: running triggers will append more updates to the list of
+        * updates as we're walking it:
+        */
+       trans_for_each_update(trans, i) {
+               /* we know trans->nounlock won't be set here: */
+               if (unlikely(!(i->iter->locks_want < 1
+                              ? __bch2_btree_iter_upgrade(i->iter, 1)
+                              : i->iter->uptodate <= BTREE_ITER_NEED_PEEK))) {
+                       trace_trans_restart_upgrade(trans->ip);
+                       ret = -EINTR;
+                       goto out;
+               }
+
+               if (likely(!(trans->flags & BTREE_INSERT_NOMARK)) &&
+                   update_has_trans_triggers(i)) {
+                       ret = bch2_trans_mark_update(trans, i->iter, i->k);
+                       if (unlikely(ret)) {
+                               if (ret == -EINTR)
+                                       trace_trans_restart_mark(trans->ip);
+                               goto out;
+                       }
+               }
+
+               u64s = jset_u64s(i->k->k.u64s);
+               if (0)
+                       trans->journal_preres_u64s += u64s;
+               trans->journal_u64s += u64s;
+       }
 retry:
        memset(&trans->journal_res, 0, sizeof(trans->journal_res));
-       trans->journal_u64s     = 0;
 
        ret = do_bch2_trans_commit(trans, &i);
 
-       if (trans->fs_usage_deltas) {
-               trans->fs_usage_deltas->used = 0;
-               memset((void *) trans->fs_usage_deltas +
-                      offsetof(struct replicas_delta_list, memset_start), 0,
-                      (void *) &trans->fs_usage_deltas->memset_end -
-                      (void *) &trans->fs_usage_deltas->memset_start);
-       }
-
        /* make sure we didn't drop or screw up locks: */
        bch2_btree_trans_verify_locks(trans);
 
@@ -793,19 +790,20 @@ out_noupdates:
        trans->nr_updates       = 0;
        trans->mem_top          = 0;
 
+       if (trans->fs_usage_deltas) {
+               trans->fs_usage_deltas->used = 0;
+               memset((void *) trans->fs_usage_deltas +
+                      offsetof(struct replicas_delta_list, memset_start), 0,
+                      (void *) &trans->fs_usage_deltas->memset_end -
+                      (void *) &trans->fs_usage_deltas->memset_start);
+       }
+
        return ret;
 err:
        ret = bch2_trans_commit_error(trans, i, ret);
-
-       /* can't loop if it was passed in and we changed it: */
-       if (unlikely(trans->flags & BTREE_INSERT_NO_CLEAR_REPLICAS) && !ret)
-               ret = -EINTR;
        if (ret)
                goto out;
 
-       /* free updates and memory used by triggers, they'll be reexecuted: */
-       trans->nr_updates       = orig_nr_updates;
-       trans->mem_top          = orig_mem_top;
        goto retry;
 }
 
index 44a1dcdb135d4b0942b97b388365fb968aa2edad..c366050d572ce0185bde390027c8882f509cebc2 100644 (file)
@@ -311,8 +311,7 @@ retry:
                      bch2_trans_commit(&trans, &disk_res, NULL,
                                        BTREE_INSERT_NOFAIL|
                                        BTREE_INSERT_LAZY_RW|
-                                       BTREE_INSERT_NOMARK_OVERWRITES|
-                                       BTREE_INSERT_NO_CLEAR_REPLICAS);
+                                       BTREE_INSERT_NOMARK_OVERWRITES);
        } else {
                ret = bch2_trans_commit(&trans, &disk_res, NULL,
                                        BTREE_INSERT_NOFAIL|