From bf3efff5e4fc2dcd6e6c15578d3f08c301a13229 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 27 Feb 2022 09:56:33 -0500 Subject: [PATCH] bcachefs: Fix race leading to btree node write getting stuck Checking btree_node_may_write() isn't atomic with the other btree flags, dirty and need_write in particular. There was a rare race where we'd unblock a node from writing while __btree_node_flush() was setting need_write, and no thread would notice that the node was now both able to write and needed to be written. Fix this by adding btree node flags for will_make_reachable and write_blocked that can be checked in the cmpxchg loop in __bch2_btree_node_write. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_cache.c | 7 +++---- fs/bcachefs/btree_io.c | 10 +++++++--- fs/bcachefs/btree_io.h | 6 ------ fs/bcachefs/btree_types.h | 2 ++ fs/bcachefs/btree_update_interior.c | 7 +++++++ 5 files changed, 19 insertions(+), 13 deletions(-) diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 7b264619c2765..5f96c5d1a064a 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -223,10 +223,9 @@ wait_on_io: goto wait_on_io; } - if (btree_node_noevict(b)) - goto out_unlock; - - if (!btree_node_may_write(b)) + if (btree_node_noevict(b) || + btree_node_write_blocked(b) || + btree_node_will_make_reachable(b)) goto out_unlock; if (btree_node_dirty(b)) { diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 540bfe07c1284..53f83340f69a2 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1606,7 +1606,8 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b) if ((old & (1U << BTREE_NODE_dirty)) && (old & (1U << BTREE_NODE_need_write)) && !(old & (1U << BTREE_NODE_never_write)) && - btree_node_may_write(b)) { + !(old & (1U << BTREE_NODE_write_blocked)) && + !(old & (1U << BTREE_NODE_will_make_reachable))) { new &= ~(1U << BTREE_NODE_dirty); new &= ~(1U << BTREE_NODE_need_write); new |= (1U << BTREE_NODE_write_in_flight); @@ -1778,10 +1779,13 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags) !(old & (1 << BTREE_NODE_need_write))) return; - if (!btree_node_may_write(b)) + if (old & + ((1 << BTREE_NODE_never_write)| + (1 << BTREE_NODE_write_blocked))) return; - if (old & (1 << BTREE_NODE_never_write)) + if (b->written && + (old & (1 << BTREE_NODE_will_make_reachable))) return; if (old & (1 << BTREE_NODE_write_in_flight)) diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h index 7ed88089f6f94..d818d87661e86 100644 --- a/fs/bcachefs/btree_io.h +++ b/fs/bcachefs/btree_io.h @@ -62,12 +62,6 @@ void __bch2_btree_node_wait_on_write(struct btree *); void bch2_btree_node_wait_on_read(struct btree *); void bch2_btree_node_wait_on_write(struct btree *); -static inline bool btree_node_may_write(struct btree *b) -{ - return list_empty_careful(&b->write_blocked) && - (!b->written || !b->will_make_reachable); -} - enum compact_mode { COMPACT_LAZY, COMPACT_ALL, diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 165466db222df..561406b4b7c26 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -434,6 +434,8 @@ struct btree_trans { x(read_error) \ x(dirty) \ x(need_write) \ + x(write_blocked) \ + x(will_make_reachable) \ x(noevict) \ x(write_idx) \ x(accessed) \ diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index fe0fc5ff15495..17d65c9e2bd49 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -606,6 +606,8 @@ err: mutex_lock(&c->btree_interior_update_lock); list_del(&as->write_blocked_list); + if (list_empty(&b->write_blocked)) + clear_btree_node_write_blocked(b); /* * Node might have been freed, recheck under @@ -650,6 +652,7 @@ err: BUG_ON(b->will_make_reachable != (unsigned long) as); b->will_make_reachable = 0; + clear_btree_node_will_make_reachable(b); } mutex_unlock(&c->btree_interior_update_lock); @@ -716,6 +719,8 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b) as->mode = BTREE_INTERIOR_UPDATING_NODE; as->b = b; + + set_btree_node_write_blocked(b); list_add(&as->write_blocked_list, &b->write_blocked); mutex_unlock(&c->btree_interior_update_lock); @@ -781,6 +786,7 @@ static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree as->new_nodes[as->nr_new_nodes++] = b; b->will_make_reachable = 1UL|(unsigned long) as; + set_btree_node_will_make_reachable(b); mutex_unlock(&c->btree_interior_update_lock); @@ -803,6 +809,7 @@ static void btree_update_drop_new_node(struct bch_fs *c, struct btree *b) * xchg() is for synchronization with bch2_btree_complete_write: */ v = xchg(&b->will_make_reachable, 0); + clear_btree_node_will_make_reachable(b); as = (struct btree_update *) (v & ~1UL); if (!as) { -- 2.30.2