mirror: Don't call job_pause_point() under graph lock
authorKevin Wolf <kwolf@redhat.com>
Wed, 13 Mar 2024 15:30:00 +0000 (16:30 +0100)
committerKevin Wolf <kwolf@redhat.com>
Mon, 18 Mar 2024 11:03:04 +0000 (12:03 +0100)
Calling job_pause_point() while holding the graph reader lock
potentially results in a deadlock: bdrv_graph_wrlock() first drains
everything, including the mirror job, which pauses it. The job is only
unpaused at the end of the drain section, which is when the graph writer
lock has been successfully taken. However, if the job happens to be
paused at a pause point where it still holds the reader lock, the writer
lock can't be taken as long as the job is still paused.

Mark job_pause_point() as GRAPH_UNLOCKED and fix mirror accordingly.

Cc: qemu-stable@nongnu.org
Buglink: https://issues.redhat.com/browse/RHEL-28125
Fixes: 004915a96a7a ("block: Protect bs->backing with graph_lock")
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-ID: <20240313153000.33121-1-kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
block/mirror.c
include/qemu/job.h

index 5145eb53e1027c487a6cd42864e375af1b88a49f..1bdce3b657dd87759350ca1b25bb1597a9f1fa0f 100644 (file)
@@ -479,9 +479,9 @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset,
     return bytes_handled;
 }
 
-static void coroutine_fn GRAPH_RDLOCK mirror_iteration(MirrorBlockJob *s)
+static void coroutine_fn GRAPH_UNLOCKED mirror_iteration(MirrorBlockJob *s)
 {
-    BlockDriverState *source = s->mirror_top_bs->backing->bs;
+    BlockDriverState *source;
     MirrorOp *pseudo_op;
     int64_t offset;
     /* At least the first dirty chunk is mirrored in one iteration. */
@@ -489,6 +489,10 @@ static void coroutine_fn GRAPH_RDLOCK mirror_iteration(MirrorBlockJob *s)
     bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target));
     int max_io_bytes = MAX(s->buf_size / MAX_IN_FLIGHT, MAX_IO_BYTES);
 
+    bdrv_graph_co_rdlock();
+    source = s->mirror_top_bs->backing->bs;
+    bdrv_graph_co_rdunlock();
+
     bdrv_dirty_bitmap_lock(s->dirty_bitmap);
     offset = bdrv_dirty_iter_next(s->dbi);
     if (offset < 0) {
@@ -1066,9 +1070,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
                 mirror_wait_for_free_in_flight_slot(s);
                 continue;
             } else if (cnt != 0) {
-                bdrv_graph_co_rdlock();
                 mirror_iteration(s);
-                bdrv_graph_co_rdunlock();
             }
         }
 
index 9ea98b5927738d58dcf8ab635e4c8ee3e6eef8f4..2b873f2576893eb934f3f0514cdf276388664b7b 100644 (file)
@@ -483,7 +483,7 @@ void job_enter(Job *job);
  *
  * Called with job_mutex *not* held.
  */
-void coroutine_fn job_pause_point(Job *job);
+void coroutine_fn GRAPH_UNLOCKED job_pause_point(Job *job);
 
 /**
  * @job: The job that calls the function.