bcachefs: All held locks must be in a btree path
authorKent Overstreet <kent.overstreet@linux.dev>
Fri, 16 Sep 2022 18:42:38 +0000 (14:42 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:40 +0000 (17:09 -0400)
With the new deadlock cycle detector, it's critical that all held locks
be marked in a btree_path, because that's what the cycle detector
traverses - any locks that aren't correctly marked will cause deadlocks.

This changes the btree_path to allocate some btree_paths for the new
nodes, since until the final update is done we otherwise don't have a
path referencing them.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_iter.c
fs/bcachefs/btree_iter.h
fs/bcachefs/btree_update_interior.c

index b1c81278ad7500312d098338d92169d590ca2267..e65c300ffe40acc3885f40674d04f6e1104fc6da 100644 (file)
@@ -619,61 +619,6 @@ static inline bool btree_path_advance_to_pos(struct btree_path *path,
        return true;
 }
 
-/*
- * Verify that iterator for parent node points to child node:
- */
-static void btree_path_verify_new_node(struct btree_trans *trans,
-                                      struct btree_path *path, struct btree *b)
-{
-       struct bch_fs *c = trans->c;
-       struct btree_path_level *l;
-       unsigned plevel;
-       bool parent_locked;
-       struct bkey_packed *k;
-
-       if (!IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
-               return;
-
-       if (trans->journal_replay_not_finished)
-               return;
-
-       plevel = b->c.level + 1;
-       if (!btree_path_node(path, plevel))
-               return;
-
-       parent_locked = btree_node_locked(path, plevel);
-
-       if (!bch2_btree_node_relock(trans, path, plevel))
-               return;
-
-       l = &path->l[plevel];
-       k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
-       if (!k ||
-           bkey_deleted(k) ||
-           bkey_cmp_left_packed(l->b, k, &b->key.k.p)) {
-               struct printbuf buf1 = PRINTBUF;
-               struct printbuf buf2 = PRINTBUF;
-               struct printbuf buf3 = PRINTBUF;
-               struct printbuf buf4 = PRINTBUF;
-               struct bkey uk = bkey_unpack_key(b, k);
-
-               bch2_dump_btree_node(c, l->b);
-               bch2_bpos_to_text(&buf1, path->pos);
-               bch2_bkey_to_text(&buf2, &uk);
-               bch2_bpos_to_text(&buf3, b->data->min_key);
-               bch2_bpos_to_text(&buf3, b->data->max_key);
-               panic("parent iter doesn't point to new node:\n"
-                     "iter pos %s %s\n"
-                     "iter key %s\n"
-                     "new node %s-%s\n",
-                     bch2_btree_ids[path->btree_id],
-                     buf1.buf, buf2.buf, buf3.buf, buf4.buf);
-       }
-
-       if (!parent_locked)
-               btree_node_unlock(trans, path, plevel);
-}
-
 static inline void __btree_path_level_init(struct btree_path *path,
                                           unsigned level)
 {
@@ -689,14 +634,12 @@ static inline void __btree_path_level_init(struct btree_path *path,
                bch2_btree_node_iter_peek(&l->iter, l->b);
 }
 
-static inline void btree_path_level_init(struct btree_trans *trans,
-                                        struct btree_path *path,
-                                        struct btree *b)
+inline void bch2_btree_path_level_init(struct btree_trans *trans,
+                                      struct btree_path *path,
+                                      struct btree *b)
 {
        BUG_ON(path->cached);
 
-       btree_path_verify_new_node(trans, path, b);
-
        EBUG_ON(!btree_path_pos_in_node(path, b));
        EBUG_ON(b->c.lock.state.seq & 1);
 
@@ -728,7 +671,7 @@ void bch2_trans_node_add(struct btree_trans *trans, struct btree *b)
                                mark_btree_node_locked(trans, path, b->c.level, (enum six_lock_type) t);
                        }
 
-                       btree_path_level_init(trans, path, b);
+                       bch2_btree_path_level_init(trans, path, b);
                }
 }
 
@@ -809,7 +752,7 @@ static inline int btree_path_lock_root(struct btree_trans *trans,
                                path->l[i].b = NULL;
 
                        mark_btree_node_locked(trans, path, path->level, lock_type);
-                       btree_path_level_init(trans, path, b);
+                       bch2_btree_path_level_init(trans, path, b);
                        return 0;
                }
 
@@ -982,7 +925,7 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
 
        mark_btree_node_locked(trans, path, level, lock_type);
        path->level = level;
-       btree_path_level_init(trans, path, b);
+       bch2_btree_path_level_init(trans, path, b);
 
        bch2_btree_path_verify_locks(path);
 err:
index bdc703324b9af6e58fddbc6b22c081627134e8a9..2f47889c688a2a52af6c0f73344e58da5c047a5e 100644 (file)
@@ -179,6 +179,9 @@ inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *, struct bke
 struct bkey_i *bch2_btree_journal_peek_slot(struct btree_trans *,
                                        struct btree_iter *, struct bpos);
 
+inline void bch2_btree_path_level_init(struct btree_trans *,
+                                      struct btree_path *, struct btree *);
+
 #ifdef CONFIG_BCACHEFS_DEBUG
 void bch2_trans_verify_paths(struct btree_trans *);
 void bch2_assert_pos_locked(struct btree_trans *, enum btree_id,
index 783b63bcce2f8adc080e9dfa5e81ed5596911029..7028597358d5f9a753874e2c5193157d8aac98c1 100644 (file)
@@ -28,6 +28,22 @@ static void bch2_btree_insert_node(struct btree_update *, struct btree_trans *,
                                   struct keylist *, unsigned);
 static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *);
 
+static struct btree_path *get_unlocked_mut_path(struct btree_trans *trans,
+                                               enum btree_id btree_id,
+                                               unsigned level,
+                                               struct bpos pos)
+{
+       struct btree_path *path;
+
+       path = bch2_path_get(trans, btree_id, pos, level + 1, level,
+                            BTREE_ITER_NOPRESERVE|
+                            BTREE_ITER_INTENT);
+       path = bch2_btree_path_make_mut(trans, path, true);
+       bch2_btree_path_downgrade(trans, path);
+       __bch2_btree_path_unlock(trans, path);
+       return path;
+}
+
 /* Debug code: */
 
 /*
@@ -618,7 +634,10 @@ static void btree_update_nodes_written(struct btree_update *as)
                             "error %i in btree_update_nodes_written()", ret);
 err:
        if (as->b) {
+               struct btree_path *path;
+
                b = as->b;
+               path = get_unlocked_mut_path(&trans, as->btree_id, b->c.level, b->key.k.p);
                /*
                 * @b is the node we did the final insert into:
                 *
@@ -632,7 +651,12 @@ err:
                 */
 
                btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_intent);
-               btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_write);
+               mark_btree_node_locked(&trans, path, b->c.level, SIX_LOCK_intent);
+               path->l[b->c.level].lock_seq = b->c.lock.state.seq;
+               path->l[b->c.level].b = b;
+
+               bch2_btree_node_lock_write_nofail(&trans, path, &b->c);
+
                mutex_lock(&c->btree_interior_update_lock);
 
                list_del(&as->write_blocked_list);
@@ -666,10 +690,13 @@ err:
                }
 
                mutex_unlock(&c->btree_interior_update_lock);
+
+               mark_btree_node_locked_noreset(path, b->c.level, SIX_LOCK_intent);
                six_unlock_write(&b->c.lock);
 
                btree_node_write_if_need(c, b, SIX_LOCK_intent);
-               six_unlock_intent(&b->c.lock);
+               btree_node_unlock(&trans, path, b->c.level);
+               bch2_path_put(&trans, path, true);
        }
 
        bch2_journal_pin_drop(&c->journal, &as->journal);
@@ -1428,6 +1455,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
        struct bch_fs *c = as->c;
        struct btree *parent = btree_node_parent(path, b);
        struct btree *n1, *n2 = NULL, *n3 = NULL;
+       struct btree_path *path1 = NULL, *path2 = NULL;
        u64 start_time = local_clock();
 
        BUG_ON(!parent && (b != btree_node_root(c, b)));
@@ -1450,6 +1478,16 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
                six_unlock_write(&n2->c.lock);
                six_unlock_write(&n1->c.lock);
 
+               path1 = get_unlocked_mut_path(trans, path->btree_id, n1->c.level, n1->key.k.p);
+               six_lock_increment(&n1->c.lock, SIX_LOCK_intent);
+               mark_btree_node_locked(trans, path1, n1->c.level, SIX_LOCK_intent);
+               bch2_btree_path_level_init(trans, path1, n1);
+
+               path2 = get_unlocked_mut_path(trans, path->btree_id, n2->c.level, n2->key.k.p);
+               six_lock_increment(&n2->c.lock, SIX_LOCK_intent);
+               mark_btree_node_locked(trans, path2, n2->c.level, SIX_LOCK_intent);
+               bch2_btree_path_level_init(trans, path2, n2);
+
                bch2_btree_update_add_new_node(as, n1);
 
                bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
@@ -1467,6 +1505,12 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
                        /* Depth increases, make a new root */
                        n3 = __btree_root_alloc(as, trans, b->c.level + 1);
 
+                       path2->locks_want++;
+                       BUG_ON(btree_node_locked(path2, n3->c.level));
+                       six_lock_increment(&n3->c.lock, SIX_LOCK_intent);
+                       mark_btree_node_locked(trans, path2, n3->c.level, SIX_LOCK_intent);
+                       bch2_btree_path_level_init(trans, path2, n3);
+
                        n3->sib_u64s[0] = U16_MAX;
                        n3->sib_u64s[1] = U16_MAX;
 
@@ -1480,6 +1524,11 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
                bch2_btree_build_aux_trees(n1);
                six_unlock_write(&n1->c.lock);
 
+               path1 = get_unlocked_mut_path(trans, path->btree_id, n1->c.level, n1->key.k.p);
+               six_lock_increment(&n1->c.lock, SIX_LOCK_intent);
+               mark_btree_node_locked(trans, path1, n1->c.level, SIX_LOCK_intent);
+               bch2_btree_path_level_init(trans, path1, n1);
+
                bch2_btree_update_add_new_node(as, n1);
 
                bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
@@ -1526,6 +1575,15 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
                six_unlock_intent(&n2->c.lock);
        six_unlock_intent(&n1->c.lock);
 
+       if (path2) {
+               __bch2_btree_path_unlock(trans, path2);
+               bch2_path_put(trans, path2, true);
+       }
+       if (path1) {
+               __bch2_btree_path_unlock(trans, path1);
+               bch2_path_put(trans, path1, true);
+       }
+
        bch2_trans_verify_locks(trans);
 
        bch2_time_stats_update(&c->times[n2
@@ -1642,7 +1700,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
                                  enum btree_node_sibling sib)
 {
        struct bch_fs *c = trans->c;
-       struct btree_path *sib_path = NULL;
+       struct btree_path *sib_path = NULL, *new_path = NULL;
        struct btree_update *as;
        struct bkey_format_state new_s;
        struct bkey_format new_f;
@@ -1766,6 +1824,11 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
        bch2_btree_build_aux_trees(n);
        six_unlock_write(&n->c.lock);
 
+       new_path = get_unlocked_mut_path(trans, path->btree_id, n->c.level, n->key.k.p);
+       six_lock_increment(&n->c.lock, SIX_LOCK_intent);
+       mark_btree_node_locked(trans, new_path, n->c.level, SIX_LOCK_intent);
+       bch2_btree_path_level_init(trans, new_path, n);
+
        bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
 
        bkey_init(&delete.k);
@@ -1795,6 +1858,8 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
        bch2_time_stats_update(&c->times[BCH_TIME_btree_node_merge], start_time);
 out:
 err:
+       if (new_path)
+               bch2_path_put(trans, new_path, true);
        bch2_path_put(trans, sib_path, true);
        bch2_trans_verify_locks(trans);
        return ret;
@@ -1809,6 +1874,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
                            unsigned flags)
 {
        struct bch_fs *c = trans->c;
+       struct btree_path *new_path = NULL;
        struct btree *n, *parent;
        struct btree_update *as;
        int ret;
@@ -1830,6 +1896,11 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
        bch2_btree_build_aux_trees(n);
        six_unlock_write(&n->c.lock);
 
+       new_path = get_unlocked_mut_path(trans, iter->btree_id, n->c.level, n->key.k.p);
+       six_lock_increment(&n->c.lock, SIX_LOCK_intent);
+       mark_btree_node_locked(trans, new_path, n->c.level, SIX_LOCK_intent);
+       bch2_btree_path_level_init(trans, new_path, n);
+
        trace_and_count(c, btree_node_rewrite, c, b);
 
        bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
@@ -1850,6 +1921,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
        six_unlock_intent(&n->c.lock);
 
        bch2_btree_update_done(as, trans);
+       bch2_path_put(trans, new_path, true);
 out:
        bch2_btree_path_downgrade(trans, iter->path);
        return ret;