KVM: x86: Add an emulation type to handle completion of user exits
authorHou Wenlong <houwenlong93@linux.alibaba.com>
Tue, 2 Nov 2021 09:15:30 +0000 (17:15 +0800)
committerPaolo Bonzini <pbonzini@redhat.com>
Wed, 8 Dec 2021 09:25:15 +0000 (04:25 -0500)
The next patch would use kvm_emulate_instruction() with
EMULTYPE_SKIP in complete_userspace_io callback to fix a
problem in msr access emulation. However, EMULTYPE_SKIP
only updates RIP, more things like updating interruptibility
state and injecting single-step #DBs would be done in the
callback. Since the emulator also does those things after
x86_emulate_insn(), add a new emulation type to pair with
EMULTYPE_SKIP to do those things for completion of user exits
within the emulator.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Hou Wenlong <houwenlong93@linux.alibaba.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <8f8c8e268b65f31d55c2881a4b30670946ecfa0d.1635842679.git.houwenlong93@linux.alibaba.com>

arch/x86/include/asm/kvm_host.h
arch/x86/kvm/x86.c

index 361fc52d1c0af1979b105c9c4b62bee417143b98..d5fede05eb5fc41867852444a3c32ad8e8037e94 100644 (file)
@@ -1643,7 +1643,8 @@ extern u64 kvm_mce_cap_supported;
  *
  * EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to
  *                decode the instruction length.  For use *only* by
- *                kvm_x86_ops.skip_emulated_instruction() implementations.
+ *                kvm_x86_ops.skip_emulated_instruction() implementations if
+ *                EMULTYPE_COMPLETE_USER_EXIT is not set.
  *
  * EMULTYPE_ALLOW_RETRY_PF - Set when the emulator should resume the guest to
  *                          retry native execution under certain conditions,
@@ -1663,6 +1664,10 @@ extern u64 kvm_mce_cap_supported;
  *
  * EMULTYPE_PF - Set when emulating MMIO by way of an intercepted #PF, in which
  *              case the CR2/GPA value pass on the stack is valid.
+ *
+ * EMULTYPE_COMPLETE_USER_EXIT - Set when the emulator should update interruptibility
+ *                              state and inject single-step #DBs after skipping
+ *                              an instruction (after completing userspace I/O).
  */
 #define EMULTYPE_NO_DECODE         (1 << 0)
 #define EMULTYPE_TRAP_UD           (1 << 1)
@@ -1671,6 +1676,7 @@ extern u64 kvm_mce_cap_supported;
 #define EMULTYPE_TRAP_UD_FORCED            (1 << 4)
 #define EMULTYPE_VMWARE_GP         (1 << 5)
 #define EMULTYPE_PF                (1 << 6)
+#define EMULTYPE_COMPLETE_USER_EXIT (1 << 7)
 
 int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
 int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
index d0cc4051ee26dece5a31eb4828500c5d94283958..4464aa7931cd1a48183d8a5b7fa967b2941d49fa 100644 (file)
@@ -8134,9 +8134,10 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
        }
 
        /*
-        * Note, EMULTYPE_SKIP is intended for use *only* by vendor callbacks
-        * for kvm_skip_emulated_instruction().  The caller is responsible for
-        * updating interruptibility state and injecting single-step #DBs.
+        * EMULTYPE_SKIP without EMULTYPE_COMPLETE_USER_EXIT is intended for
+        * use *only* by vendor callbacks for kvm_skip_emulated_instruction().
+        * The caller is responsible for updating interruptibility state and
+        * injecting single-step #DBs.
         */
        if (emulation_type & EMULTYPE_SKIP) {
                if (ctxt->mode != X86EMUL_MODE_PROT64)
@@ -8144,6 +8145,11 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
                else
                        ctxt->eip = ctxt->_eip;
 
+               if (emulation_type & EMULTYPE_COMPLETE_USER_EXIT) {
+                       r = 1;
+                       goto writeback;
+               }
+
                kvm_rip_write(vcpu, ctxt->eip);
                if (ctxt->eflags & X86_EFLAGS_RF)
                        kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
@@ -8213,6 +8219,7 @@ restart:
        else
                r = 1;
 
+writeback:
        if (writeback) {
                unsigned long rflags = static_call(kvm_x86_get_rflags)(vcpu);
                toggle_interruptibility(vcpu, ctxt->interruptibility);