bcachefs: bch2_trans_revalidate_updates_in_node()
authorKent Overstreet <kent.overstreet@linux.dev>
Wed, 23 Nov 2022 23:46:03 +0000 (18:46 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:48 +0000 (17:09 -0400)
When we started stashing the key being overwritten in
btree_insert_entry, this introduced a typical iterator invalidation
problem, triggered by btree node splits or resorts.

Previously, dealt with this by unconditionally re-validating those
stashed pointers in the transaction commit path. This patch gets rid of
that by doing it only when needed, in bch2_trans_node_add() or
bch2_trans_node_reinit_iter().

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_iter.c
fs/bcachefs/btree_update_leaf.c

index 8c951cfa74ae0ab5d75166acaf15a869d47f7164..c6ccf3add7338e7247726dece7f15102c0700056 100644 (file)
@@ -652,6 +652,32 @@ void bch2_btree_path_level_init(struct btree_trans *trans,
 
 /* Btree path: fixups after btree node updates: */
 
+static void bch2_trans_revalidate_updates_in_node(struct btree_trans *trans, struct btree *b)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_insert_entry *i;
+
+       trans_for_each_update(trans, i)
+               if (!i->cached &&
+                   i->level    == b->c.level &&
+                   i->btree_id == b->c.btree_id &&
+                   bpos_cmp(i->k->k.p, b->data->min_key) >= 0 &&
+                   bpos_cmp(i->k->k.p, b->data->max_key) <= 0) {
+                       i->old_v = bch2_btree_path_peek_slot(i->path, &i->old_k).v;
+
+                       if (unlikely(trans->journal_replay_not_finished)) {
+                               struct bkey_i *j_k =
+                                       bch2_journal_keys_peek_slot(c, i->btree_id, i->level,
+                                                                   i->k->k.p);
+
+                               if (j_k) {
+                                       i->old_k = j_k->k;
+                                       i->old_v = &j_k->v;
+                               }
+                       }
+               }
+}
+
 /*
  * A btree node is being replaced - update the iterator to point to the new
  * node:
@@ -675,6 +701,8 @@ void bch2_trans_node_add(struct btree_trans *trans, struct btree *b)
 
                        bch2_btree_path_level_init(trans, path, b);
                }
+
+       bch2_trans_revalidate_updates_in_node(trans, b);
 }
 
 /*
@@ -687,6 +715,8 @@ void bch2_trans_node_reinit_iter(struct btree_trans *trans, struct btree *b)
 
        trans_for_each_path_with_node(trans, b, path)
                __btree_path_level_init(path, b->c.level);
+
+       bch2_trans_revalidate_updates_in_node(trans, b);
 }
 
 /* Btree path: traverse, set_pos: */
index c7d8d2a5555153f9826d4d8f589b9e50fb6200f8..bdd703289ecb0ba3a870eeb40ea3186e3e470ed8 100644 (file)
@@ -40,6 +40,28 @@ struct bkey_s_c bch2_btree_path_peek_slot_exact(struct btree_path *path, struct
        return (struct bkey_s_c) { u, NULL };
 }
 
+static void verify_update_old_key(struct btree_trans *trans, struct btree_insert_entry *i)
+{
+#ifdef CONFIG_BCACHEFS_DEBUG
+       struct bch_fs *c = trans->c;
+       struct bkey u;
+       struct bkey_s_c k = bch2_btree_path_peek_slot_exact(i->path, &u);
+
+       if (unlikely(trans->journal_replay_not_finished)) {
+               struct bkey_i *j_k =
+                       bch2_journal_keys_peek_slot(c, i->btree_id, i->level, i->k->k.p);
+
+               if (j_k)
+                       k = bkey_i_to_s_c(j_k);
+       }
+
+       i->old_k.needs_whiteout = k.k->needs_whiteout;
+
+       BUG_ON(memcmp(&i->old_k, k.k, sizeof(struct bkey)));
+       BUG_ON(i->old_v != k.v);
+#endif
+}
+
 static int __must_check
 bch2_trans_update_by_path(struct btree_trans *, struct btree_path *,
                          struct bkey_i *, enum btree_update_flags);
@@ -354,6 +376,7 @@ static int btree_key_can_insert_cached(struct btree_trans *trans,
 {
        struct bch_fs *c = trans->c;
        struct bkey_cached *ck = (void *) path->l[0].b;
+       struct btree_insert_entry *i;
        unsigned new_u64s;
        struct bkey_i *new_k;
 
@@ -381,6 +404,10 @@ static int btree_key_can_insert_cached(struct btree_trans *trans,
                return -ENOMEM;
        }
 
+       trans_for_each_update(trans, i)
+               if (i->old_v == &ck->k->v)
+                       i->old_v = &new_k->v;
+
        ck->u64s        = new_u64s;
        ck->k           = new_k;
        return 0;
@@ -396,6 +423,8 @@ static int run_one_mem_trigger(struct btree_trans *trans,
        struct bkey_i *new = i->k;
        int ret;
 
+       verify_update_old_key(trans, i);
+
        if (unlikely(flags & BTREE_TRIGGER_NORUN))
                return 0;
 
@@ -433,6 +462,8 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_
        struct bkey old_k = i->old_k;
        struct bkey_s_c old = { &old_k, i->old_v };
 
+       verify_update_old_key(trans, i);
+
        if ((i->flags & BTREE_TRIGGER_NORUN) ||
            !(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type)))
                return 0;
@@ -611,33 +642,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
 
                if (btree_node_type_needs_gc(i->bkey_type))
                        marking = true;
-
-               /*
-                * Revalidate before calling mem triggers - XXX, ugly:
-                *
-                * - successful btree node splits don't cause transaction
-                *   restarts and will have invalidated the pointer to the bkey
-                *   value
-                * - btree_node_lock_for_insert() -> btree_node_prep_for_write()
-                *   when it has to resort
-                * - btree_key_can_insert_cached() when it has to reallocate
-                *
-                *   Ugly because we currently have no way to tell if the
-                *   pointer's been invalidated, which means it's debatabale
-                *   whether we should be stashing the old key at all.
-                */
-               i->old_v = bch2_btree_path_peek_slot(i->path, &i->old_k).v;
-
-               if (unlikely(trans->journal_replay_not_finished)) {
-                       struct bkey_i *j_k =
-                               bch2_journal_keys_peek_slot(c, i->btree_id, i->level,
-                                                           i->k->k.p);
-
-                       if (j_k) {
-                               i->old_k = j_k->k;
-                               i->old_v = &j_k->v;
-                       }
-               }
        }
 
        /*
@@ -707,6 +711,8 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
                        if (i->flags & BTREE_UPDATE_NOJOURNAL)
                                continue;
 
+                       verify_update_old_key(trans, i);
+
                        if (trans->journal_transaction_names) {
                                entry = bch2_journal_add_entry(j, &trans->journal_res,
                                                       BCH_JSET_ENTRY_overwrite,