KVM: x86: Introduce KVM_REQ_TLB_FLUSH_CURRENT to flush current ASID
authorSean Christopherson <sean.j.christopherson@intel.com>
Fri, 20 Mar 2020 21:28:20 +0000 (14:28 -0700)
committerPaolo Bonzini <pbonzini@redhat.com>
Tue, 21 Apr 2020 13:12:53 +0000 (09:12 -0400)
Add KVM_REQ_TLB_FLUSH_CURRENT to allow optimized TLB flushing of VMX's
EPTP/VPID contexts[*] from the KVM MMU and/or in a deferred manner, e.g.
to flush L2's context during nested VM-Enter.

Convert KVM_REQ_TLB_FLUSH to KVM_REQ_TLB_FLUSH_CURRENT in flows where
the flush is directly associated with vCPU-scoped instruction emulation,
i.e. MOV CR3 and INVPCID.

Add a comment in vmx_vcpu_load_vmcs() above its KVM_REQ_TLB_FLUSH to
make it clear that it deliberately requests a flush of all contexts.

Service any pending flush request on nested VM-Exit as it's possible a
nested VM-Exit could occur after requesting a flush for L2.  Add the
same logic for nested VM-Enter even though it's _extremely_ unlikely
for flush to be pending on nested VM-Enter, but theoretically possible
(in the future) due to RSM (SMM) emulation.

[*] Intel also has an Address Space Identifier (ASID) concept, e.g.
    EPTP+VPID+PCID == ASID, it's just not documented in the SDM because
    the rules of invalidation are different based on which piece of the
    ASID is being changed, i.e. whether the EPTP, VPID, or PCID context
    must be invalidated.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Message-Id: <20200320212833.3507-25-sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/svm/svm.c
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/x86.c
arch/x86/kvm/x86.h

index e6305d5e28fa97b71249f88f708751fc8fc0e5b7..72e9c4492f47638febe9a3fd525c96438c3a3077 100644 (file)
@@ -83,8 +83,9 @@
 #define KVM_REQ_GET_VMCS12_PAGES       KVM_ARCH_REQ(24)
 #define KVM_REQ_APICV_UPDATE \
        KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_TLB_FLUSH_CURRENT      KVM_ARCH_REQ(26)
 #define KVM_REQ_HV_TLB_FLUSH \
-       KVM_ARCH_REQ_FLAGS(26, KVM_REQUEST_NO_WAKEUP)
+       KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP)
 
 #define CR0_RESERVED_BITS                                               \
        (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -1104,6 +1105,7 @@ struct kvm_x86_ops {
        void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
 
        void (*tlb_flush_all)(struct kvm_vcpu *vcpu);
+       void (*tlb_flush_current)(struct kvm_vcpu *vcpu);
        int  (*tlb_remote_flush)(struct kvm *kvm);
        int  (*tlb_remote_flush_with_range)(struct kvm *kvm,
                        struct kvm_tlb_range *range);
index 72b976e64df9de37e0d9183c60a443a261d338e8..66123848448da18d828d154b52c773188a6962df 100644 (file)
@@ -3945,6 +3945,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
        .set_rflags = svm_set_rflags,
 
        .tlb_flush_all = svm_flush_tlb,
+       .tlb_flush_current = svm_flush_tlb,
        .tlb_flush_gva = svm_flush_tlb_gva,
        .tlb_flush_guest = svm_flush_tlb,
 
index ba1aedb535efd1532bbb526c8fbc61c89da1c8fe..567a7a0f30e0590cae1653db4f660e960c8ceb16 100644 (file)
@@ -3208,6 +3208,9 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
        u32 exit_reason = EXIT_REASON_INVALID_STATE;
        u32 exit_qual;
 
+       if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
+               kvm_vcpu_flush_tlb_current(vcpu);
+
        evaluate_pending_interrupts = exec_controls_get(vmx) &
                (CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING);
        if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
@@ -4274,6 +4277,10 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
        /* trying to cancel vmlaunch/vmresume is a bug */
        WARN_ON_ONCE(vmx->nested.nested_run_pending);
 
+       /* Service the TLB flush request for L2 before switching to L1. */
+       if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
+               kvm_vcpu_flush_tlb_current(vcpu);
+
        leave_guest_mode(vcpu);
 
        if (nested_cpu_has_preemption_timer(vmcs12))
index da868846d96d38ccb06fa7be631afb37f853c58a..d958cee52f411e7832a5bb973c3f3f713e2b01dd 100644 (file)
@@ -1338,6 +1338,10 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu)
                void *gdt = get_current_gdt_ro();
                unsigned long sysenter_esp;
 
+               /*
+                * Flush all EPTP/VPID contexts, the new pCPU may have stale
+                * TLB entries from its previous association with the vCPU.
+                */
                kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 
                /*
@@ -5468,7 +5472,7 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
 
                if (kvm_get_active_pcid(vcpu) == operand.pcid) {
                        kvm_mmu_sync_roots(vcpu);
-                       kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+                       kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
                }
 
                for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
@@ -7764,6 +7768,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
        .set_rflags = vmx_set_rflags,
 
        .tlb_flush_all = vmx_flush_tlb_all,
+       .tlb_flush_current = vmx_flush_tlb_current,
        .tlb_flush_gva = vmx_flush_tlb_gva,
        .tlb_flush_guest = vmx_flush_tlb_guest,
 
index cd2a3d01bffb567a33d1ecf45b8ebc91667212ca..ebbe34d89469688f3f2ded1b949b57a50bb97f45 100644 (file)
@@ -1019,7 +1019,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
        if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
                if (!skip_tlb_flush) {
                        kvm_mmu_sync_roots(vcpu);
-                       kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+                       kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
                }
                return 0;
        }
@@ -8222,10 +8222,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                        kvm_mmu_sync_roots(vcpu);
                if (kvm_check_request(KVM_REQ_LOAD_MMU_PGD, vcpu))
                        kvm_mmu_load_pgd(vcpu);
-               if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
+               if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
                        kvm_vcpu_flush_tlb_all(vcpu);
+
+                       /* Flushing all ASIDs flushes the current ASID... */
+                       kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+               }
+               if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
+                       kvm_vcpu_flush_tlb_current(vcpu);
                if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu))
                        kvm_vcpu_flush_tlb_guest(vcpu);
+
                if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
                        vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
                        r = 0;
index b968acc0516fe41876b104a2d2ea4815b1c71691..7b5ed8ed628e281b7b2f0595e96a13bd4f1519cc 100644 (file)
@@ -125,6 +125,12 @@ static inline bool mmu_is_nested(struct kvm_vcpu *vcpu)
        return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
 }
 
+static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu)
+{
+       ++vcpu->stat.tlb_flush;
+       kvm_x86_ops.tlb_flush_current(vcpu);
+}
+
 static inline int is_pae(struct kvm_vcpu *vcpu)
 {
        return kvm_read_cr4_bits(vcpu, X86_CR4_PAE);