KVM: x86: Forcibly leave nested virt when SMM state is toggled
authorSean Christopherson <seanjc@google.com>
Tue, 25 Jan 2022 22:03:58 +0000 (22:03 +0000)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 1 Feb 2022 16:27:02 +0000 (17:27 +0100)
commit f7e570780efc5cec9b2ed1e0472a7da14e864fdb upstream.

Forcibly leave nested virtualization operation if userspace toggles SMM
state via KVM_SET_VCPU_EVENTS or KVM_SYNC_X86_EVENTS.  If userspace
forces the vCPU out of SMM while it's post-VMXON and then injects an SMI,
vmx_enter_smm() will overwrite vmx->nested.smm.vmxon and end up with both
vmxon=false and smm.vmxon=false, but all other nVMX state allocated.

Don't attempt to gracefully handle the transition as (a) most transitions
are nonsencial, e.g. forcing SMM while L2 is running, (b) there isn't
sufficient information to handle all transitions, e.g. SVM wants access
to the SMRAM save state, and (c) KVM_SET_VCPU_EVENTS must precede
KVM_SET_NESTED_STATE during state restore as the latter disallows putting
the vCPU into L2 if SMM is active, and disallows tagging the vCPU as
being post-VMXON in SMM if SMM is not active.

Abuse of KVM_SET_VCPU_EVENTS manifests as a WARN and memory leak in nVMX
due to failure to free vmcs01's shadow VMCS, but the bug goes far beyond
just a memory leak, e.g. toggling SMM on while L2 is active puts the vCPU
in an architecturally impossible state.

  WARNING: CPU: 0 PID: 3606 at free_loaded_vmcs arch/x86/kvm/vmx/vmx.c:2665 [inline]
  WARNING: CPU: 0 PID: 3606 at free_loaded_vmcs+0x158/0x1a0 arch/x86/kvm/vmx/vmx.c:2656
  Modules linked in:
  CPU: 1 PID: 3606 Comm: syz-executor725 Not tainted 5.17.0-rc1-syzkaller #0
  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
  RIP: 0010:free_loaded_vmcs arch/x86/kvm/vmx/vmx.c:2665 [inline]
  RIP: 0010:free_loaded_vmcs+0x158/0x1a0 arch/x86/kvm/vmx/vmx.c:2656
  Code: <0f> 0b eb b3 e8 8f 4d 9f 00 e9 f7 fe ff ff 48 89 df e8 92 4d 9f 00
  Call Trace:
   <TASK>
   kvm_arch_vcpu_destroy+0x72/0x2f0 arch/x86/kvm/x86.c:11123
   kvm_vcpu_destroy arch/x86/kvm/../../../virt/kvm/kvm_main.c:441 [inline]
   kvm_destroy_vcpus+0x11f/0x290 arch/x86/kvm/../../../virt/kvm/kvm_main.c:460
   kvm_free_vcpus arch/x86/kvm/x86.c:11564 [inline]
   kvm_arch_destroy_vm+0x2e8/0x470 arch/x86/kvm/x86.c:11676
   kvm_destroy_vm arch/x86/kvm/../../../virt/kvm/kvm_main.c:1217 [inline]
   kvm_put_kvm+0x4fa/0xb00 arch/x86/kvm/../../../virt/kvm/kvm_main.c:1250
   kvm_vm_release+0x3f/0x50 arch/x86/kvm/../../../virt/kvm/kvm_main.c:1273
   __fput+0x286/0x9f0 fs/file_table.c:311
   task_work_run+0xdd/0x1a0 kernel/task_work.c:164
   exit_task_work include/linux/task_work.h:32 [inline]
   do_exit+0xb29/0x2a30 kernel/exit.c:806
   do_group_exit+0xd2/0x2f0 kernel/exit.c:935
   get_signal+0x4b0/0x28c0 kernel/signal.c:2862
   arch_do_signal_or_restart+0x2a9/0x1c40 arch/x86/kernel/signal.c:868
   handle_signal_work kernel/entry/common.c:148 [inline]
   exit_to_user_mode_loop kernel/entry/common.c:172 [inline]
   exit_to_user_mode_prepare+0x17d/0x290 kernel/entry/common.c:207
   __syscall_exit_to_user_mode_work kernel/entry/common.c:289 [inline]
   syscall_exit_to_user_mode+0x19/0x60 kernel/entry/common.c:300
   do_syscall_64+0x42/0xb0 arch/x86/entry/common.c:86
   entry_SYSCALL_64_after_hwframe+0x44/0xae
   </TASK>

Cc: stable@vger.kernel.org
Reported-by: syzbot+8112db3ab20e70d50c31@syzkaller.appspotmail.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220125220358.2091737-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/svm/nested.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/svm/svm.h
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/x86.c

index 77ab14bcd47779f610a38f1685e8c4830dd5ba1a..01759199d72387697c97637b6093538e55de428e 100644 (file)
@@ -1487,6 +1487,7 @@ struct kvm_x86_ops {
 };
 
 struct kvm_x86_nested_ops {
+       void (*leave_nested)(struct kvm_vcpu *vcpu);
        int (*check_events)(struct kvm_vcpu *vcpu);
        bool (*hv_timer_pending)(struct kvm_vcpu *vcpu);
        void (*triple_fault)(struct kvm_vcpu *vcpu);
index 510b833cbd39915c021db25d357cf0bedf777a43..de80ae42d044c2831bcc735c9fed710f19a578c0 100644 (file)
@@ -942,9 +942,9 @@ void svm_free_nested(struct vcpu_svm *svm)
 /*
  * Forcibly leave nested mode in order to be able to reset the VCPU later on.
  */
-void svm_leave_nested(struct vcpu_svm *svm)
+void svm_leave_nested(struct kvm_vcpu *vcpu)
 {
-       struct kvm_vcpu *vcpu = &svm->vcpu;
+       struct vcpu_svm *svm = to_svm(vcpu);
 
        if (is_guest_mode(vcpu)) {
                svm->nested.nested_run_pending = 0;
@@ -1313,7 +1313,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
                return -EINVAL;
 
        if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) {
-               svm_leave_nested(svm);
+               svm_leave_nested(vcpu);
                svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
                return 0;
        }
@@ -1378,7 +1378,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
         */
 
        if (is_guest_mode(vcpu))
-               svm_leave_nested(svm);
+               svm_leave_nested(vcpu);
        else
                svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
 
@@ -1432,6 +1432,7 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
 }
 
 struct kvm_x86_nested_ops svm_nested_ops = {
+       .leave_nested = svm_leave_nested,
        .check_events = svm_check_nested_events,
        .triple_fault = nested_svm_triple_fault,
        .get_nested_state_pages = svm_get_nested_state_pages,
index da33ec9bb26e82a0a9cc793ac5c0215f2f5d256f..980abc437cdaaad5348d58f82d738f2f1f58df9c 100644 (file)
@@ -281,7 +281,7 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 
        if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) {
                if (!(efer & EFER_SVME)) {
-                       svm_leave_nested(svm);
+                       svm_leave_nested(vcpu);
                        svm_set_gif(svm, true);
                        /* #GP intercept is still needed for vmware backdoor */
                        if (!enable_vmware_backdoor)
index 5d30db599e10ddfcc76abe10f78ebf781e5b692e..ff0855c03c917fac21ca112352f2273806c6a9a2 100644 (file)
@@ -461,7 +461,7 @@ static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
 
 int enter_svm_guest_mode(struct kvm_vcpu *vcpu,
                         u64 vmcb_gpa, struct vmcb *vmcb12, bool from_vmrun);
-void svm_leave_nested(struct vcpu_svm *svm);
+void svm_leave_nested(struct kvm_vcpu *vcpu);
 void svm_free_nested(struct vcpu_svm *svm);
 int svm_allocate_nested(struct vcpu_svm *svm);
 int nested_svm_vmrun(struct kvm_vcpu *vcpu);
index e97a11abc1d85e24a927a56b25cb94b4e86fda31..a0193b11c381db9761aa6de2cefe7bb1d6d6b709 100644 (file)
@@ -6748,6 +6748,7 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
 }
 
 struct kvm_x86_nested_ops vmx_nested_ops = {
+       .leave_nested = vmx_leave_nested,
        .check_events = vmx_check_nested_events,
        .hv_timer_pending = nested_vmx_preemption_timer_pending,
        .triple_fault = nested_vmx_triple_fault,
index 2b80edffe02cd114c5e6371af45daca8b3b551c8..c89fc57065f1c89e62c99513a7a2e8361ec08c8c 100644 (file)
@@ -4727,8 +4727,10 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
                vcpu->arch.apic->sipi_vector = events->sipi_vector;
 
        if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
-               if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm)
+               if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
+                       kvm_x86_ops.nested_ops->leave_nested(vcpu);
                        kvm_smm_changed(vcpu, events->smi.smm);
+               }
 
                vcpu->arch.smi_pending = events->smi.pending;