From 9f6e1f7bb0e136871159a71d2bcd31c5dd8cbe81 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 13 Jul 2021 16:12:00 -0400 Subject: [PATCH] bcachefs: Fix an allocator shutdown deadlock On fstest generic/388, we were seeing sporadic deadlocks in the emergency shutdown, where we'd get stuck shutting down the allocator because bch2_btree_update_start() -> bch2_btree_reserve_get() allocated and then deallocated some btree nodes, putting them back on the btree_reserve_cache, after the allocator shutdown code had already cleared out that cache. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update_interior.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 37dadbae41e54..0b78fb9d35610 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -948,13 +948,6 @@ bch2_btree_update_start(struct btree_iter *iter, unsigned level, closure_init_stack(&cl); retry: - /* - * This check isn't necessary for correctness - it's just to potentially - * prevent us from doing a lot of work that'll end up being wasted: - */ - ret = bch2_journal_error(&c->journal); - if (ret) - return ERR_PTR(ret); /* * XXX: figure out how far we might need to split, @@ -995,6 +988,22 @@ retry: bch2_keylist_init(&as->new_keys, as->_new_keys); bch2_keylist_init(&as->parent_keys, as->inline_keys); + mutex_lock(&c->btree_interior_update_lock); + list_add_tail(&as->list, &c->btree_interior_update_list); + mutex_unlock(&c->btree_interior_update_lock); + + /* + * We don't want to allocate if we're in an error state, that can cause + * deadlock on emergency shutdown due to open buckets getting stuck in + * the btree_reserve_cache after allocator shutdown has cleared it out. + * This check needs to come after adding us to the btree_interior_update + * list but before calling bch2_btree_reserve_get, to synchronize with + * __bch2_fs_read_only(). + */ + ret = bch2_journal_error(&c->journal); + if (ret) + goto err; + ret = bch2_journal_preres_get(&c->journal, &as->journal_preres, BTREE_UPDATE_JOURNAL_RES, journal_flags|JOURNAL_RES_GET_NONBLOCK); @@ -1046,10 +1055,6 @@ retry: atomic64_read(&c->journal.seq), &as->journal, NULL); - mutex_lock(&c->btree_interior_update_lock); - list_add_tail(&as->list, &c->btree_interior_update_list); - mutex_unlock(&c->btree_interior_update_lock); - return as; err: bch2_btree_update_free(as); -- 2.30.2