bcachefs: Fix a subtle race in the btree split path
authorKent Overstreet <kent.overstreet@gmail.com>
Fri, 11 Oct 2019 18:45:22 +0000 (14:45 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:29 +0000 (17:08 -0400)
We have to free the old (in memory) btree node _before_ unlocking the
new nodes - else, some other thread with a read lock on the old node
could see stale data after another thread has already updated the new
node.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_gc.c
fs/bcachefs/btree_iter.c
fs/bcachefs/btree_update_interior.c

index f7d9abfdb3dea1c2435c290716c742fae159ee05..4a66c44764f6403e1c02605a03529c5086b7ae65 100644 (file)
@@ -1042,11 +1042,12 @@ next:
                        old_nodes[i] = new_nodes[i];
                } else {
                        old_nodes[i] = NULL;
-                       if (new_nodes[i])
-                               six_unlock_intent(&new_nodes[i]->c.lock);
                }
        }
 
+       for (i = 0; i < nr_new_nodes; i++)
+               six_unlock_intent(&new_nodes[i]->c.lock);
+
        bch2_btree_update_done(as);
        bch2_keylist_free(&keylist, NULL);
 }
index 8c6d3193c3fe9acd9a13c63e165a3a09eec8a833..a91cee79770304e0cc177d90a8330a200c1e04a8 100644 (file)
@@ -833,8 +833,6 @@ void bch2_btree_iter_node_replace(struct btree_iter *iter, struct btree *b)
 
                        btree_iter_node_set(linked, b);
                }
-
-       six_unlock_intent(&b->c.lock);
 }
 
 void bch2_btree_iter_node_drop(struct btree_iter *iter, struct btree *b)
index 946254c51a69c5aa7b70bb814392091c97589174..3b134d3a998453b819d58082c3cc5d47df51df79 100644 (file)
@@ -1446,8 +1446,20 @@ static void btree_split(struct btree_update *as, struct btree *b,
                bch2_btree_iter_node_replace(iter, n2);
        bch2_btree_iter_node_replace(iter, n1);
 
+       /*
+        * The old node must be freed (in memory) _before_ unlocking the new
+        * nodes - else another thread could re-acquire a read lock on the old
+        * node after another thread has locked and updated the new node, thus
+        * seeing stale data:
+        */
        bch2_btree_node_free_inmem(c, b, iter);
 
+       if (n3)
+               six_unlock_intent(&n3->c.lock);
+       if (n2)
+               six_unlock_intent(&n2->c.lock);
+       six_unlock_intent(&n1->c.lock);
+
        bch2_btree_trans_verify_locks(iter->trans);
 
        bch2_time_stats_update(&c->times[BCH_TIME_btree_node_split],
@@ -1761,6 +1773,8 @@ retry:
        bch2_btree_node_free_inmem(c, b, iter);
        bch2_btree_node_free_inmem(c, m, iter);
 
+       six_unlock_intent(&n->c.lock);
+
        bch2_btree_update_done(as);
 
        if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
@@ -1855,6 +1869,7 @@ static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
        bch2_btree_iter_node_drop(iter, b);
        bch2_btree_iter_node_replace(iter, n);
        bch2_btree_node_free_inmem(c, b, iter);
+       six_unlock_intent(&n->c.lock);
 
        bch2_btree_update_done(as);
        return 0;