bcachefs: Fix an allocator shutdown deadlock
authorKent Overstreet <kent.overstreet@gmail.com>
Tue, 13 Jul 2021 20:12:00 +0000 (16:12 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:08 +0000 (17:09 -0400)
On fstest generic/388, we were seeing sporadic deadlocks in the
emergency shutdown, where we'd get stuck shutting down the allocator
because bch2_btree_update_start() -> bch2_btree_reserve_get() allocated
and then deallocated some btree nodes, putting them back on the
btree_reserve_cache, after the allocator shutdown code had already
cleared out that cache.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
fs/bcachefs/btree_update_interior.c

index 37dadbae41e5462a213c2200994afa88647e4509..0b78fb9d356109b7efa1e694f89eb105d9d5fd96 100644 (file)
@@ -948,13 +948,6 @@ bch2_btree_update_start(struct btree_iter *iter, unsigned level,
 
        closure_init_stack(&cl);
 retry:
-       /*
-        * This check isn't necessary for correctness - it's just to potentially
-        * prevent us from doing a lot of work that'll end up being wasted:
-        */
-       ret = bch2_journal_error(&c->journal);
-       if (ret)
-               return ERR_PTR(ret);
 
        /*
         * XXX: figure out how far we might need to split,
@@ -995,6 +988,22 @@ retry:
        bch2_keylist_init(&as->new_keys, as->_new_keys);
        bch2_keylist_init(&as->parent_keys, as->inline_keys);
 
+       mutex_lock(&c->btree_interior_update_lock);
+       list_add_tail(&as->list, &c->btree_interior_update_list);
+       mutex_unlock(&c->btree_interior_update_lock);
+
+       /*
+        * We don't want to allocate if we're in an error state, that can cause
+        * deadlock on emergency shutdown due to open buckets getting stuck in
+        * the btree_reserve_cache after allocator shutdown has cleared it out.
+        * This check needs to come after adding us to the btree_interior_update
+        * list but before calling bch2_btree_reserve_get, to synchronize with
+        * __bch2_fs_read_only().
+        */
+       ret = bch2_journal_error(&c->journal);
+       if (ret)
+               goto err;
+
        ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
                                      BTREE_UPDATE_JOURNAL_RES,
                                      journal_flags|JOURNAL_RES_GET_NONBLOCK);
@@ -1046,10 +1055,6 @@ retry:
                             atomic64_read(&c->journal.seq),
                             &as->journal, NULL);
 
-       mutex_lock(&c->btree_interior_update_lock);
-       list_add_tail(&as->list, &c->btree_interior_update_list);
-       mutex_unlock(&c->btree_interior_update_lock);
-
        return as;
 err:
        bch2_btree_update_free(as);