block: Really pause block jobs on drain

author Kevin Wolf <kwolf@redhat.com>

Thu, 22 Mar 2018 13:11:20 +0000 (14:11 +0100)

committer Kevin Wolf <kwolf@redhat.com>

Mon, 18 Jun 2018 13:03:25 +0000 (15:03 +0200)
author Kevin Wolf <kwolf@redhat.com>
Thu, 22 Mar 2018 13:11:20 +0000 (14:11 +0100)
committer Kevin Wolf <kwolf@redhat.com>
Mon, 18 Jun 2018 13:03:25 +0000 (15:03 +0200)
diff --git a/block.c b/block.c

index afe30caac3df8e009cb28889a4dc2d2663ea09f1..8cf9cd885508c5475a41373dd0dc574285cfc056 100644 (file)
--- a/block.c
+++ b/block.c
@@ -821,6 +821,12 @@ static void bdrv_child_cb_drained_begin(BdrvChild *child)
      bdrv_drained_begin(bs);
  }
  
+static bool bdrv_child_cb_drained_poll(BdrvChild *child)
+{
+    BlockDriverState *bs = child->opaque;
+    return bdrv_drain_poll(bs, NULL);
+}
+
  static void bdrv_child_cb_drained_end(BdrvChild *child)
  {
      BlockDriverState *bs = child->opaque;
@@ -905,6 +911,7 @@ const BdrvChildRole child_file = {
      .get_parent_desc = bdrv_child_get_parent_desc,
      .inherit_options = bdrv_inherited_options,
      .drained_begin   = bdrv_child_cb_drained_begin,
+    .drained_poll    = bdrv_child_cb_drained_poll,
      .drained_end     = bdrv_child_cb_drained_end,
      .attach          = bdrv_child_cb_attach,
      .detach          = bdrv_child_cb_detach,
@@ -929,6 +936,7 @@ const BdrvChildRole child_format = {
      .get_parent_desc = bdrv_child_get_parent_desc,
      .inherit_options = bdrv_inherited_fmt_options,
      .drained_begin   = bdrv_child_cb_drained_begin,
+    .drained_poll    = bdrv_child_cb_drained_poll,
      .drained_end     = bdrv_child_cb_drained_end,
      .attach          = bdrv_child_cb_attach,
      .detach          = bdrv_child_cb_detach,
@@ -1048,6 +1056,7 @@ const BdrvChildRole child_backing = {
      .detach          = bdrv_backing_detach,
      .inherit_options = bdrv_backing_options,
      .drained_begin   = bdrv_child_cb_drained_begin,
+    .drained_poll    = bdrv_child_cb_drained_poll,
      .drained_end     = bdrv_child_cb_drained_end,
      .inactivate      = bdrv_child_cb_inactivate,
      .update_filename = bdrv_backing_update_filename,
diff --git a/block/io.c b/block/io.c

index bc7a2d78b89a224f2772c55e2b0e7dadb74a7ede..5820e73bb25dcb44b5ba35c93a7b23a4b185059f 100644 (file)
--- a/block/io.c
+++ b/block/io.c
@@ -69,6 +69,23 @@ void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
      }
  }
  
+static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore)
+{
+    BdrvChild *c, *next;
+    bool busy = false;
+
+    QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
+        if (c == ignore) {
+            continue;
+        }
+        if (c->role->drained_poll) {
+            busy |= c->role->drained_poll(c);
+        }
+    }
+
+    return busy;
+}
+
  static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
  {
      dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
@@ -183,21 +200,32 @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
  }
  
  /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
-static bool bdrv_drain_poll(BlockDriverState *bs)
+bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent)
+{
+    if (bdrv_parent_drained_poll(bs, ignore_parent)) {
+        return true;
+    }
+
+    return atomic_read(&bs->in_flight);
+}
+
+static bool bdrv_drain_poll_top_level(BlockDriverState *bs,
+                                      BdrvChild *ignore_parent)
  {
      /* Execute pending BHs first and check everything else only after the BHs
       * have executed. */
      while (aio_poll(bs->aio_context, false));
-    return atomic_read(&bs->in_flight);
+
+    return bdrv_drain_poll(bs, ignore_parent);
  }
  
-static bool bdrv_drain_recurse(BlockDriverState *bs)
+static bool bdrv_drain_recurse(BlockDriverState *bs, BdrvChild *parent)
  {
      BdrvChild *child, *tmp;
      bool waited;
  
      /* Wait for drained requests to finish */
-    waited = BDRV_POLL_WHILE(bs, bdrv_drain_poll(bs));
+    waited = BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent));
  
      QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
          BlockDriverState *bs = child->bs;
@@ -214,7 +242,7 @@ static bool bdrv_drain_recurse(BlockDriverState *bs)
               */
              bdrv_ref(bs);
          }
-        waited |= bdrv_drain_recurse(bs);
+        waited |= bdrv_drain_recurse(bs, child);
          if (in_main_loop) {
              bdrv_unref(bs);
          }
@@ -290,7 +318,7 @@ void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
  
      bdrv_parent_drained_begin(bs, parent);
      bdrv_drain_invoke(bs, true);
-    bdrv_drain_recurse(bs);
+    bdrv_drain_recurse(bs, parent);
  
      if (recursive) {
          bs->recursive_quiesce_counter++;
diff --git a/block/mirror.c b/block/mirror.c

index 435268bbbfc938404bb9345492183ac42ec27471..c2146c1ab39189693979eaf171ce0beead2a623f 100644 (file)
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -964,6 +964,12 @@ static void mirror_pause(Job *job)
      mirror_wait_for_all_io(s);
  }
  
+static bool mirror_drained_poll(BlockJob *job)
+{
+    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+    return !!s->in_flight;
+}
+
  static void mirror_attached_aio_context(BlockJob *job, AioContext *new_context)
  {
      MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
@@ -997,6 +1003,7 @@ static const BlockJobDriver mirror_job_driver = {
          .pause                  = mirror_pause,
          .complete               = mirror_complete,
      },
+    .drained_poll           = mirror_drained_poll,
      .attached_aio_context   = mirror_attached_aio_context,
      .drain                  = mirror_drain,
  };
@@ -1012,6 +1019,7 @@ static const BlockJobDriver commit_active_job_driver = {
          .pause                  = mirror_pause,
          .complete               = mirror_complete,
      },
+    .drained_poll           = mirror_drained_poll,
      .attached_aio_context   = mirror_attached_aio_context,
      .drain                  = mirror_drain,
  };
diff --git a/blockjob.c b/blockjob.c

index 0306533a2e009dee812a0e3fd1d65beacd4aa93b..be5903aa9604158a752f2260c6a3111240a605b1 100644 (file)
--- a/blockjob.c
+++ b/blockjob.c
@@ -155,6 +155,28 @@ static void child_job_drained_begin(BdrvChild *c)
      job_pause(&job->job);
  }
  
+static bool child_job_drained_poll(BdrvChild *c)
+{
+    BlockJob *bjob = c->opaque;
+    Job *job = &bjob->job;
+    const BlockJobDriver *drv = block_job_driver(bjob);
+
+    /* An inactive or completed job doesn't have any pending requests. Jobs
+     * with !job->busy are either already paused or have a pause point after
+     * being reentered, so no job driver code will run before they pause. */
+    if (!job->busy || job_is_completed(job) || job->deferred_to_main_loop) {
+        return false;
+    }
+
+    /* Otherwise, assume that it isn't fully stopped yet, but allow the job to
+     * override this assumption. */
+    if (drv->drained_poll) {
+        return drv->drained_poll(bjob);
+    } else {
+        return true;
+    }
+}
+
  static void child_job_drained_end(BdrvChild *c)
  {
      BlockJob *job = c->opaque;
@@ -164,6 +186,7 @@ static void child_job_drained_end(BdrvChild *c)
  static const BdrvChildRole child_job = {
      .get_parent_desc    = child_job_get_parent_desc,
      .drained_begin      = child_job_drained_begin,
+    .drained_poll       = child_job_drained_poll,
      .drained_end        = child_job_drained_end,
      .stay_at_node       = true,
  };
diff --git a/include/block/block.h b/include/block/block.h

index e677080c4e839e2952f2008c272a32f92b8f6d03..cebbb39c6c729967c1938a8f41a754167eee82db 100644 (file)
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -567,6 +567,14 @@ void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore);
   */
  void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore);
  
+/**
+ * bdrv_drain_poll:
+ *
+ * Poll for pending requests in @bs and its parents (except for
+ * @ignore_parent). This is part of bdrv_drained_begin.
+ */
+bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent);
+
  /**
   * bdrv_drained_begin:
   *
diff --git a/include/block/block_int.h b/include/block/block_int.h

index 327e478a73e69c4d04a9feebd1e8364660662ea1..1b811db8ecf88134072c1bcf1b1e386d75faadc5 100644 (file)
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -605,6 +605,13 @@ struct BdrvChildRole {
      void (*drained_begin)(BdrvChild *child);
      void (*drained_end)(BdrvChild *child);
  
+    /*
+     * Returns whether the parent has pending requests for the child. This
+     * callback is polled after .drained_begin() has been called until all
+     * activity on the child has stopped.
+     */
+    bool (*drained_poll)(BdrvChild *child);
+
      /* Notifies the parent that the child has been activated/inactivated (e.g.
       * when migration is completing) and it can start/stop requesting
       * permissions and doing I/O on it. */
diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h

index 5cd50c6639d0d721fb5d3163f790c68a1a0e1c96..e4a318dd1505970492a589c306a09a4fe0e82146 100644 (file)
--- a/include/block/blockjob_int.h
+++ b/include/block/blockjob_int.h
@@ -38,6 +38,14 @@ struct BlockJobDriver {
      /** Generic JobDriver callbacks and settings */
      JobDriver job_driver;
  
+    /*
+     * Returns whether the job has pending requests for the child or will
+     * submit new requests before the next pause point. This callback is polled
+     * in the context of draining a job node after requesting that the job be
+     * paused, until all activity on the child has stopped.
+     */
+    bool (*drained_poll)(BlockJob *job);
+
      /*
       * If the callback is not NULL, it will be invoked before the job is
       * resumed in a new AioContext.  This is the place to move any resources
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c

index cc03bc171b2f730941c41040e99ccf5f2d8c0a4e..22d31c953eb4c0e406d06d30b23e1da519bd3624 100644 (file)
--- a/tests/test-bdrv-drain.c
+++ b/tests/test-bdrv-drain.c
@@ -686,7 +686,11 @@ static void coroutine_fn test_job_start(void *opaque)
  
      job_transition_to_ready(&s->common.job);
      while (!s->should_complete) {
-        job_sleep_ns(&s->common.job, 100000);
+        /* Avoid block_job_sleep_ns() because it marks the job as !busy. We
+         * want to emulate some actual activity (probably some I/O) here so
+         * that drain has to wait for this acitivity to stop. */
+        qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000);
+        job_pause_point(&s->common.job);
      }
  
      job_defer_to_main_loop(&s->common.job, test_job_completed, NULL);
@@ -733,7 +737,7 @@ static void test_blockjob_common(enum drain_type drain_type)
  
      g_assert_cmpint(job->job.pause_count, ==, 0);
      g_assert_false(job->job.paused);
-    g_assert_false(job->job.busy); /* We're in job_sleep_ns() */
+    g_assert_true(job->job.busy); /* We're in job_sleep_ns() */
  
      do_drain_begin(drain_type, src);
  
@@ -743,15 +747,14 @@ static void test_blockjob_common(enum drain_type drain_type)
      } else {
          g_assert_cmpint(job->job.pause_count, ==, 1);
      }
-    /* XXX We don't wait until the job is actually paused. Is this okay? */
-    /* g_assert_true(job->job.paused); */
+    g_assert_true(job->job.paused);
      g_assert_false(job->job.busy); /* The job is paused */
  
      do_drain_end(drain_type, src);
  
      g_assert_cmpint(job->job.pause_count, ==, 0);
      g_assert_false(job->job.paused);
-    g_assert_false(job->job.busy); /* We're in job_sleep_ns() */
+    g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */
  
      do_drain_begin(drain_type, target);
  
@@ -761,15 +764,14 @@ static void test_blockjob_common(enum drain_type drain_type)
      } else {
          g_assert_cmpint(job->job.pause_count, ==, 1);
      }
-    /* XXX We don't wait until the job is actually paused. Is this okay? */
-    /* g_assert_true(job->job.paused); */
+    g_assert_true(job->job.paused);
      g_assert_false(job->job.busy); /* The job is paused */
  
      do_drain_end(drain_type, target);
  
      g_assert_cmpint(job->job.pause_count, ==, 0);
      g_assert_false(job->job.paused);
-    g_assert_false(job->job.busy); /* We're in job_sleep_ns() */
+    g_assert_true(job->job.busy); /* We're in job_sleep_ns() */
  
      ret = job_complete_sync(&job->job, &error_abort);
      g_assert_cmpint(ret, ==, 0);
author	Kevin Wolf <kwolf@redhat.com>
	Thu, 22 Mar 2018 13:11:20 +0000 (14:11 +0100)
committer	Kevin Wolf <kwolf@redhat.com>
	Mon, 18 Jun 2018 13:03:25 +0000 (15:03 +0200)
block.c		patch \| blob \| history
block/io.c		patch \| blob \| history
block/mirror.c		patch \| blob \| history
blockjob.c		patch \| blob \| history
include/block/block.h		patch \| blob \| history
include/block/block_int.h		patch \| blob \| history
include/block/blockjob_int.h		patch \| blob \| history
tests/test-bdrv-drain.c		patch \| blob \| history