bcachefs: Fix a workqueue deadlock
authorKent Overstreet <kent.overstreet@gmail.com>
Wed, 13 May 2020 04:15:28 +0000 (00:15 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:39 +0000 (17:08 -0400)
writes running out of a workqueue (via dio path) could block and prevent
other writes from calling bch2_write_index() and completing.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/io.c
fs/bcachefs/io.h

index 3dcb166afa231c72af2e27ae3573ee5ebcabd482..7df2b6c3f1680181ae25e15562abef31be0b0f82 100644 (file)
@@ -604,7 +604,9 @@ static void bch2_write_index(struct closure *cl)
 
        __bch2_write_index(op);
 
-       if (!op->error && (op->flags & BCH_WRITE_FLUSH)) {
+       if (!(op->flags & BCH_WRITE_DONE)) {
+               continue_at(cl, __bch2_write, index_update_wq(op));
+       } else if (!op->error && (op->flags & BCH_WRITE_FLUSH)) {
                bch2_journal_flush_seq_async(&c->journal,
                                             *op_journal_seq(op),
                                             cl);
@@ -1104,8 +1106,15 @@ again:
                if (ret < 0)
                        goto err;
 
-               if (ret)
+               if (ret) {
                        skip_put = false;
+               } else {
+                       /*
+                        * for the skip_put optimization this has to be set
+                        * before we submit the bio:
+                        */
+                       op->flags |= BCH_WRITE_DONE;
+               }
 
                bio->bi_end_io  = bch2_write_endio;
                bio->bi_private = &op->cl;
@@ -1128,16 +1137,30 @@ again:
        return;
 err:
        op->error = ret;
+       op->flags |= BCH_WRITE_DONE;
 
        continue_at(cl, bch2_write_index, index_update_wq(op));
        return;
 flush_io:
+       /*
+        * If the write can't all be submitted at once, we generally want to
+        * block synchronously as that signals backpressure to the caller.
+        *
+        * However, if we're running out of a workqueue, we can't block here
+        * because we'll be blocking other work items from completing:
+        */
+       if (current->flags & PF_WQ_WORKER) {
+               continue_at(cl, bch2_write_index, index_update_wq(op));
+               return;
+       }
+
        closure_sync(cl);
 
        if (!bch2_keylist_empty(&op->insert_keys)) {
                __bch2_write_index(op);
 
                if (op->error) {
+                       op->flags |= BCH_WRITE_DONE;
                        continue_at_nobarrier(cl, bch2_write_done, NULL);
                        return;
                }
@@ -1183,6 +1206,8 @@ static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
        bch2_keylist_push(&op->insert_keys);
 
        op->flags |= BCH_WRITE_WROTE_DATA_INLINE;
+       op->flags |= BCH_WRITE_DONE;
+
        continue_at_nobarrier(cl, bch2_write_index, NULL);
        return;
 err:
index c250bceb77ea3db0af62f25f4c9166017c32e347..0a049cc14e42da6852daf2fab9f4c134b61d2648 100644 (file)
@@ -40,6 +40,7 @@ enum bch_write_flags {
        /* Internal: */
        BCH_WRITE_JOURNAL_SEQ_PTR       = (1 << 10),
        BCH_WRITE_SKIP_CLOSURE_PUT      = (1 << 11),
+       BCH_WRITE_DONE                  = (1 << 12),
 };
 
 static inline u64 *op_journal_seq(struct bch_write_op *op)