migration: Always set DEVICE state
authorPeter Xu <peterx@redhat.com>
Tue, 14 Jan 2025 23:07:44 +0000 (18:07 -0500)
committerFabiano Rosas <farosas@suse.de>
Wed, 29 Jan 2025 14:56:41 +0000 (11:56 -0300)
DEVICE state was introduced back in 2017:

https://lore.kernel.org/qemu-devel/20171020090556.18631-1-dgilbert@redhat.com/

Quote from Dave's cover letter, when the pre-switchover phase was enabled,
the state transition looks like this:

  The precopy flow is:
  active->pre-switchover->device->completed

  The postcopy flow is:
  active->pre-switchover->postcopy-active->completed

To supplement above, when the cap is not enabled:

  The precopy flow is:
  active->completed

  The postcopy flow is:
  active->postcopy-active->completed

It works for us, though we have some code just to special case these state
transitions, so the DEVICE state currently is special only to precopy, and
only conditionally.

I had a quick discussion with Libvirt developers, it turns out that this
may not be necessary. IOW, it seems okay we can have DEVICE state to be
generic, so that we don't have over-complicated state machines.  It not
only helps align all the migration state machine, help cleanup the code
path especially on pre-switchover handling (see the patch itself), another
side benefit is we can unconditionally have a specific state to mark the
switchover phase, which might be helpful for debugging too.

This patch makes the DEVICE state to be present always, marking that source
QEMU is switching over.  Then the state machine will be always as simple
as:

  active-> [pre-switchover->] -> device -> [postcopy-active->] -> complete

After the change, no matter whether pre-switchover or postcopy is enabled
or not, we always have DEVICE state showing the switchover phase.  When
pre-switchover enabled, we'll have an extra stage before that.  When
postcopy is enabled, we'll have an extra stage after that.

A few qtests need touch up in QEMU tree for this change:

  - A few iotest outputs (194, 203, 234, 262, 280)
  - Teach libqos's migrate() on "device" state

Cc: Jiri Denemark <jdenemar@redhat.com>
Cc: Daniel P. Berrangé <berrange@redhat.com>
Cc: Dr. David Alan Gilbert <dave@treblig.org>
Signed-off-by: Peter Xu <peterx@redhat.com>
Tested-by: Jiri Denemark <jdenemar@redhat.com>
Reviewed-by: Juraj Marcin <jmarcin@redhat.com>
Link: https://lore.kernel.org/r/20250114230746.3268797-15-peterx@redhat.com
Signed-off-by: Fabiano Rosas <farosas@suse.de>
migration/migration.c
qapi/migration.json
tests/qemu-iotests/194.out
tests/qemu-iotests/203.out
tests/qemu-iotests/234.out
tests/qemu-iotests/262.out
tests/qemu-iotests/280.out
tests/qtest/libqos/libqos.c

index d29f7448bdb044c0ca0588ac14564a92218edd14..5302b7b91bdb1225bc48428ddfda21f2e109d8ca 100644 (file)
@@ -105,7 +105,7 @@ static MigrationIncomingState *current_incoming;
 static GSList *migration_blockers[MIG_MODE__MAX];
 
 static bool migration_object_check(MigrationState *ms, Error **errp);
-static int migration_maybe_pause(MigrationState *s, int new_state);
+static bool migration_switchover_start(MigrationState *s);
 static void migrate_fd_cancel(MigrationState *s);
 static bool close_return_path_on_source(MigrationState *s);
 static void migration_completion_end(MigrationState *s);
@@ -2657,11 +2657,6 @@ static int postcopy_start(MigrationState *ms, Error **errp)
         }
     }
 
-    if (!migrate_pause_before_switchover()) {
-        migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE,
-                          MIGRATION_STATUS_POSTCOPY_ACTIVE);
-    }
-
     trace_postcopy_start();
     bql_lock();
     trace_postcopy_start_set_run();
@@ -2672,10 +2667,8 @@ static int postcopy_start(MigrationState *ms, Error **errp)
         goto fail;
     }
 
-    ret = migration_maybe_pause(ms, MIGRATION_STATUS_POSTCOPY_ACTIVE);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "%s: Failed in migration_maybe_pause()",
-                         __func__);
+    if (!migration_switchover_start(ms)) {
+        error_setg(errp, "migration_switchover_start() failed");
         goto fail;
     }
 
@@ -2800,6 +2793,10 @@ static int postcopy_start(MigrationState *ms, Error **errp)
      */
     migration_rate_set(migrate_max_postcopy_bandwidth());
 
+    /* Now, switchover looks all fine, switching to postcopy-active */
+    migrate_set_state(&ms->state, MIGRATION_STATUS_DEVICE,
+                      MIGRATION_STATUS_POSTCOPY_ACTIVE);
+
     bql_unlock();
 
     return ret;
@@ -2816,14 +2813,39 @@ fail:
 }
 
 /**
- * migration_maybe_pause: Pause if required to by
- * migrate_pause_before_switchover called with the BQL locked
- * Returns: 0 on success
+ * @migration_switchover_start: Start VM switchover procedure
+ *
+ * @s: The migration state object pointer
+ *
+ * Prepares for the switchover, depending on "pause-before-switchover"
+ * capability.
+ *
+ * If cap set, state machine goes like:
+ *   [postcopy-]active -> pre-switchover -> device
+ *
+ * If cap not set:
+ *   [postcopy-]active -> device
+ *
+ * Returns: true on success, false on interruptions.
  */
-static int migration_maybe_pause(MigrationState *s, int new_state)
+static bool migration_switchover_start(MigrationState *s)
 {
+    /* Concurrent cancellation?  Quit */
+    if (s->state == MIGRATION_STATUS_CANCELLING) {
+        return false;
+    }
+
+    /*
+     * No matter precopy or postcopy, since we still hold BQL it must not
+     * change concurrently to CANCELLING, so it must be either ACTIVE or
+     * POSTCOPY_ACTIVE.
+     */
+    assert(migration_is_active());
+
+    /* If the pre stage not requested, directly switch to DEVICE */
     if (!migrate_pause_before_switchover()) {
-        return 0;
+        migrate_set_state(&s->state, s->state, MIGRATION_STATUS_DEVICE);
+        return true;
     }
 
     /* Since leaving this state is not atomic with posting the semaphore
@@ -2836,23 +2858,22 @@ static int migration_maybe_pause(MigrationState *s, int new_state)
         /* This block intentionally left blank */
     }
 
+    /* Update [POSTCOPY_]ACTIVE to PRE_SWITCHOVER */
+    migrate_set_state(&s->state, s->state, MIGRATION_STATUS_PRE_SWITCHOVER);
+    bql_unlock();
+
+    qemu_sem_wait(&s->pause_sem);
+
+    bql_lock();
     /*
-     * If the migration is cancelled when it is in the completion phase,
-     * the migration state is set to MIGRATION_STATUS_CANCELLING.
-     * So we don't need to wait a semaphore, otherwise we would always
-     * wait for the 'pause_sem' semaphore.
+     * After BQL released and retaken, the state can be CANCELLING if it
+     * happend during sem_wait().. Only change the state if it's still
+     * pre-switchover.
      */
-    if (s->state != MIGRATION_STATUS_CANCELLING) {
-        migrate_set_state(&s->state, s->state,
-                          MIGRATION_STATUS_PRE_SWITCHOVER);
-        bql_unlock();
-        qemu_sem_wait(&s->pause_sem);
-        bql_lock();
-        migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER,
-                          new_state);
-    }
+    migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER,
+                      MIGRATION_STATUS_DEVICE);
 
-    return s->state == new_state ? 0 : -EINVAL;
+    return s->state == MIGRATION_STATUS_DEVICE;
 }
 
 static int migration_completion_precopy(MigrationState *s)
@@ -2868,8 +2889,7 @@ static int migration_completion_precopy(MigrationState *s)
         }
     }
 
-    ret = migration_maybe_pause(s, MIGRATION_STATUS_DEVICE);
-    if (ret < 0) {
+    if (!migration_switchover_start(s)) {
         goto out_unlock;
     }
 
index 4679ce9f2a532f26254b86ec3e01cc6b1111081a..43babd1df417c78b8b1a1c9670f868f7ab7d8001 100644 (file)
 #
 # @pre-switchover: Paused before device serialisation.  (since 2.11)
 #
-# @device: During device serialisation when pause-before-switchover is
-#     enabled (since 2.11)
+# @device: During device serialisation (also known as switchover phase).
+#     Before 9.2, this is only used when (1) in precopy, and (2) when
+#     pre-switchover capability is enabled.  After 10.0, this state will
+#     always be present for every migration procedure as the switchover
+#     phase.  (since 2.11)
 #
 # @wait-unplug: wait for device unplug request by guest OS to be
 #     completed.  (since 4.2)
index 376ed1d2e6346d8eb6ae921f9b1ba5d74a5d121d..6940e809cde6d149a076d44c1b0a103ca8dd94bf 100644 (file)
@@ -14,6 +14,7 @@ Starting migration...
 {"return": {}}
 {"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"data": {"status": "device"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 Gracefully ending the `drive-mirror` job on source...
 {"return": {}}
index 9d4abba8c5d8e9eb62640c1a94e0ede3659052d1..8e58705e515e039cd25b0a9562fad9e7966a7a21 100644 (file)
@@ -8,4 +8,5 @@ Starting migration...
 {"return": {}}
 {"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"data": {"status": "device"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
index ac8b64350c334efff601a5ed5a9de203ac920391..be3e138b5850f4de448a9cf843111cdc82f113af 100644 (file)
@@ -10,6 +10,7 @@ Starting migration to B...
 {"return": {}}
 {"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"data": {"status": "device"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
@@ -27,6 +28,7 @@ Starting migration back to A...
 {"return": {}}
 {"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"data": {"status": "device"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
index b8a2d3598d88dd85f7260f9d5e779e4f3e718ff6..bd7706b84b15fa2ec941d88ce008870af598afaf 100644 (file)
@@ -8,6 +8,7 @@ Starting migration to B...
 {"return": {}}
 {"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"data": {"status": "device"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
index 546dbb4a68a2dd715cd7305bffdbed14ff639f6c..37411144cafe7b61d5e2ab7c2cf11170dcb80a41 100644 (file)
@@ -7,6 +7,7 @@ Enabling migration QMP events on VM...
 {"return": {}}
 {"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"data": {"status": "device"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 
 VM is now stopped:
index 5c0fa1f7c57965e5e516d3dd649696baa65a6919..28a0901a0a6f632a6d44bf20707de9d0f25baa02 100644 (file)
@@ -117,13 +117,14 @@ void migrate(QOSState *from, QOSState *to, const char *uri)
         g_assert(qdict_haskey(sub, "status"));
         st = qdict_get_str(sub, "status");
 
-        /* "setup", "active", "completed", "failed", "cancelled" */
+        /* "setup", "active", "device", "completed", "failed", "cancelled" */
         if (strcmp(st, "completed") == 0) {
             qobject_unref(rsp);
             break;
         }
 
         if ((strcmp(st, "setup") == 0) || (strcmp(st, "active") == 0)
+            || (strcmp(st, "device") == 0)
             || (strcmp(st, "wait-unplug") == 0)) {
             qobject_unref(rsp);
             g_usleep(5000);