bcachefs: Fix a deadlock in bch2_btree_node_get_sibling()
authorKent Overstreet <kent.overstreet@gmail.com>
Tue, 2 Jun 2020 23:41:47 +0000 (19:41 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:40 +0000 (17:08 -0400)
There was a bad interaction with bch2_btree_iter_set_pos_same_leaf(),
which can leave a btree node locked that is just outside iter->pos,
breaking the lock ordering checks in __bch2_btree_node_lock(). Ideally
we should get rid of this corner case, but for now fix it locally with
verbose comments.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_cache.c
fs/bcachefs/btree_iter.c
fs/bcachefs/btree_iter.h
fs/bcachefs/btree_types.h

index ef7ca552a0a38dc605ae55506b42502029ad33f4..fa55bab5944e3c395a586abae05525278e41fb9f 100644 (file)
@@ -851,6 +851,18 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
        if (!parent)
                return NULL;
 
+       /*
+        * There's a corner case where a btree_iter might have a node locked
+        * that is just outside its current pos - when
+        * bch2_btree_iter_set_pos_same_leaf() gets to the end of the node.
+        *
+        * But the lock ordering checks in __bch2_btree_node_lock() go off of
+        * iter->pos, not the node's key: so if the iterator is marked as
+        * needing to be traversed, we risk deadlock if we don't bail out here:
+        */
+       if (iter->uptodate >= BTREE_ITER_NEED_TRAVERSE)
+               return ERR_PTR(-EINTR);
+
        if (!bch2_btree_node_relock(iter, level + 1)) {
                ret = ERR_PTR(-EINTR);
                goto out;
index f7de2def58df44d7a8e43273d14a2af3afdf3d2c..43ea3ceafcf22e2927dd208cb485149e50125365 100644 (file)
@@ -205,8 +205,9 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
                if (!linked->nodes_locked)
                        continue;
 
-               /* * Must lock btree nodes in key order: */
-               if (__btree_iter_cmp(iter->btree_id, pos, linked) < 0)
+               /* Must lock btree nodes in key order: */
+               if ((cmp_int(iter->btree_id, linked->btree_id) ?:
+                    bkey_cmp(pos, linked->pos)) < 0)
                        ret = false;
 
                /*
@@ -1320,6 +1321,16 @@ void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *iter, struct bpos new_
 
        btree_iter_advance_to_pos(iter, l, -1);
 
+       /*
+        * XXX:
+        * keeping a node locked that's outside (even just outside) iter->pos
+        * breaks __bch2_btree_node_lock(). This seems to only affect
+        * bch2_btree_node_get_sibling so for now it's fixed there, but we
+        * should try to get rid of this corner case.
+        *
+        * (this behaviour is currently needed for BTREE_INSERT_NOUNLOCK)
+        */
+
        if (bch2_btree_node_iter_end(&l->iter) &&
            btree_iter_pos_after_node(iter, l->b))
                btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
@@ -2195,6 +2206,7 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
                bch2_trans_preload_mem(trans, expected_mem_bytes);
 
 #ifdef CONFIG_BCACHEFS_DEBUG
+       trans->pid = current->pid;
        mutex_lock(&c->btree_trans_lock);
        list_add(&trans->list, &c->btree_trans_list);
        mutex_unlock(&c->btree_trans_lock);
@@ -2233,7 +2245,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
 
        mutex_lock(&c->btree_trans_lock);
        list_for_each_entry(trans, &c->btree_trans_list, list) {
-               pr_buf(out, "%ps\n", (void *) trans->ip);
+               pr_buf(out, "%i %ps\n", trans->pid, (void *) trans->ip);
 
                trans_for_each_iter(trans, iter) {
                        if (!iter->nodes_locked)
index 80577853a04eff6f861c78a6458e491c5129f07b..f9dcbdc9ab5259ba4c670adc97ccfb6dd25dc657 100644 (file)
@@ -172,17 +172,10 @@ void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *, struct bpos);
 void __bch2_btree_iter_set_pos(struct btree_iter *, struct bpos, bool);
 void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos);
 
-static inline int __btree_iter_cmp(enum btree_id id,
-                                  struct bpos pos,
-                                  const struct btree_iter *r)
-{
-       return cmp_int(id, r->btree_id) ?: bkey_cmp(pos, r->pos);
-}
-
 static inline int btree_iter_cmp(const struct btree_iter *l,
                                 const struct btree_iter *r)
 {
-       return __btree_iter_cmp(l->btree_id, l->pos, r);
+       return cmp_int(l->btree_id, r->btree_id) ?: bkey_cmp(l->pos, r->pos);
 }
 
 /*
index 316461e2e01682202a9e371329c5e851c5de97c4..78fbf922341eb2a02d46cf9b6e9e0d87e8aec904 100644 (file)
@@ -287,6 +287,7 @@ struct btree_trans {
 #ifdef CONFIG_BCACHEFS_DEBUG
        struct list_head        list;
        struct btree            *locking;
+       pid_t                   pid;
 #endif
        unsigned long           ip;