nested_svm_vmexit(svm);
}
-static bool nested_svm_handle_page_fault_workaround(struct kvm_vcpu *vcpu,
- struct x86_exception *fault)
-{
- struct vcpu_svm *svm = to_svm(vcpu);
- struct vmcb *vmcb = svm->vmcb;
-
- WARN_ON(!is_guest_mode(vcpu));
-
- if (vmcb12_is_intercept(&svm->nested.ctl,
- INTERCEPT_EXCEPTION_OFFSET + PF_VECTOR) &&
- !WARN_ON_ONCE(svm->nested.nested_run_pending)) {
- vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + PF_VECTOR;
- vmcb->control.exit_code_hi = 0;
- vmcb->control.exit_info_1 = fault->error_code;
- vmcb->control.exit_info_2 = fault->address;
- nested_svm_vmexit(svm);
- return true;
- }
-
- return false;
-}
-
static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
{
struct vcpu_svm *svm = to_svm(vcpu);
return 0;
}
-static bool nested_exit_on_exception(struct vcpu_svm *svm)
+static bool nested_svm_is_exception_vmexit(struct kvm_vcpu *vcpu, u8 vector,
+ u32 error_code)
{
- unsigned int vector = svm->vcpu.arch.exception.vector;
+ struct vcpu_svm *svm = to_svm(vcpu);
return (svm->nested.ctl.intercepts[INTERCEPT_EXCEPTION] & BIT(vector));
}
static void nested_svm_inject_exception_vmexit(struct kvm_vcpu *vcpu)
{
- struct kvm_queued_exception *ex = &vcpu->arch.exception;
+ struct kvm_queued_exception *ex = &vcpu->arch.exception_vmexit;
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *vmcb = svm->vmcb;
* than #PF.
*/
if (ex->vector == PF_VECTOR) {
- if (ex->nested_apf)
- vmcb->control.exit_info_2 = vcpu->arch.apf.nested_apf_token;
- else if (ex->has_payload)
+ if (ex->has_payload)
vmcb->control.exit_info_2 = ex->payload;
else
vmcb->control.exit_info_2 = vcpu->arch.cr2;
return 0;
}
- if (vcpu->arch.exception.pending) {
+ if (vcpu->arch.exception_vmexit.pending) {
if (block_nested_exceptions)
return -EBUSY;
- if (!nested_exit_on_exception(svm))
- return 0;
nested_svm_inject_exception_vmexit(vcpu);
return 0;
}
+ if (vcpu->arch.exception.pending) {
+ if (block_nested_exceptions)
+ return -EBUSY;
+ return 0;
+ }
+
if (vcpu->arch.smi_pending && !svm_smi_blocked(vcpu)) {
if (block_nested_events)
return -EBUSY;
struct kvm_x86_nested_ops svm_nested_ops = {
.leave_nested = svm_leave_nested,
+ .is_exception_vmexit = nested_svm_is_exception_vmexit,
.check_events = svm_check_nested_events,
- .handle_page_fault_workaround = nested_svm_handle_page_fault_workaround,
.triple_fault = nested_svm_triple_fault,
.get_nested_state_pages = svm_get_nested_state_pages,
.get_state = svm_get_nested_state,
return inequality ^ bit;
}
-
-/*
- * KVM wants to inject page-faults which it got to the guest. This function
- * checks whether in a nested guest, we need to inject them to L1 or L2.
- */
-static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit_qual)
-{
- struct kvm_queued_exception *ex = &vcpu->arch.exception;
- struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-
- if (ex->vector == PF_VECTOR) {
- if (ex->nested_apf) {
- *exit_qual = vcpu->arch.apf.nested_apf_token;
- return 1;
- }
- if (nested_vmx_is_page_fault_vmexit(vmcs12, ex->error_code)) {
- *exit_qual = ex->has_payload ? ex->payload : vcpu->arch.cr2;
- return 1;
- }
- } else if (vmcs12->exception_bitmap & (1u << ex->vector)) {
- if (ex->vector == DB_VECTOR) {
- if (ex->has_payload) {
- *exit_qual = ex->payload;
- } else {
- *exit_qual = vcpu->arch.dr6;
- *exit_qual &= ~DR6_BT;
- *exit_qual ^= DR6_ACTIVE_LOW;
- }
- } else
- *exit_qual = 0;
- return 1;
- }
-
- return 0;
-}
-
-static bool nested_vmx_handle_page_fault_workaround(struct kvm_vcpu *vcpu,
- struct x86_exception *fault)
+static bool nested_vmx_is_exception_vmexit(struct kvm_vcpu *vcpu, u8 vector,
+ u32 error_code)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- WARN_ON(!is_guest_mode(vcpu));
+ /*
+ * Drop bits 31:16 of the error code when performing the #PF mask+match
+ * check. All VMCS fields involved are 32 bits, but Intel CPUs never
+ * set bits 31:16 and VMX disallows setting bits 31:16 in the injected
+ * error code. Including the to-be-dropped bits in the check might
+ * result in an "impossible" or missed exit from L1's perspective.
+ */
+ if (vector == PF_VECTOR)
+ return nested_vmx_is_page_fault_vmexit(vmcs12, (u16)error_code);
- if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) &&
- !WARN_ON_ONCE(to_vmx(vcpu)->nested.nested_run_pending)) {
- vmcs12->vm_exit_intr_error_code = fault->error_code;
- nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
- PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
- INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
- fault->address);
- return true;
- }
- return false;
+ return (vmcs12->exception_bitmap & (1u << vector));
}
static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
return -ENXIO;
}
-static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
- unsigned long exit_qual)
+static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu)
{
- struct kvm_queued_exception *ex = &vcpu->arch.exception;
+ struct kvm_queued_exception *ex = &vcpu->arch.exception_vmexit;
u32 intr_info = ex->vector | INTR_INFO_VALID_MASK;
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ unsigned long exit_qual;
+
+ if (ex->has_payload) {
+ exit_qual = ex->payload;
+ } else if (ex->vector == PF_VECTOR) {
+ exit_qual = vcpu->arch.cr2;
+ } else if (ex->vector == DB_VECTOR) {
+ exit_qual = vcpu->arch.dr6;
+ exit_qual &= ~DR6_BT;
+ exit_qual ^= DR6_ACTIVE_LOW;
+ } else {
+ exit_qual = 0;
+ }
if (ex->has_error_code) {
/*
{
struct kvm_lapic *apic = vcpu->arch.apic;
struct vcpu_vmx *vmx = to_vmx(vcpu);
- unsigned long exit_qual;
/*
* Only a pending nested run blocks a pending exception. If there is a
* previously injected event, the pending exception occurred while said
* across SMI/RSM as it should; that needs to be addressed in order to
* prioritize SMI over MTF and trap-like #DBs.
*/
+ if (vcpu->arch.exception_vmexit.pending &&
+ !vmx_is_low_priority_db_trap(&vcpu->arch.exception_vmexit)) {
+ if (block_nested_exceptions)
+ return -EBUSY;
+
+ nested_vmx_inject_exception_vmexit(vcpu);
+ return 0;
+ }
+
if (vcpu->arch.exception.pending &&
!vmx_is_low_priority_db_trap(&vcpu->arch.exception)) {
if (block_nested_exceptions)
return -EBUSY;
- if (!nested_vmx_check_exception(vcpu, &exit_qual))
- goto no_vmexit;
- nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
- return 0;
+ goto no_vmexit;
}
if (vmx->nested.mtf_pending) {
return 0;
}
- if (vcpu->arch.exception.pending) {
+ if (vcpu->arch.exception_vmexit.pending) {
if (block_nested_exceptions)
return -EBUSY;
- if (!nested_vmx_check_exception(vcpu, &exit_qual))
- goto no_vmexit;
- nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
+
+ nested_vmx_inject_exception_vmexit(vcpu);
return 0;
}
+ if (vcpu->arch.exception.pending) {
+ if (block_nested_exceptions)
+ return -EBUSY;
+ goto no_vmexit;
+ }
+
if (nested_vmx_preemption_timer_pending(vcpu)) {
if (block_nested_events)
return -EBUSY;
struct kvm_x86_nested_ops vmx_nested_ops = {
.leave_nested = vmx_leave_nested,
+ .is_exception_vmexit = nested_vmx_is_exception_vmexit,
.check_events = vmx_check_nested_events,
- .handle_page_fault_workaround = nested_vmx_handle_page_fault_workaround,
.hv_timer_pending = nested_vmx_preemption_timer_pending,
.triple_fault = nested_vmx_triple_fault,
.get_state = vmx_get_nested_state,
}
EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload);
+static void kvm_queue_exception_vmexit(struct kvm_vcpu *vcpu, unsigned int vector,
+ bool has_error_code, u32 error_code,
+ bool has_payload, unsigned long payload)
+{
+ struct kvm_queued_exception *ex = &vcpu->arch.exception_vmexit;
+
+ ex->vector = vector;
+ ex->injected = false;
+ ex->pending = true;
+ ex->has_error_code = has_error_code;
+ ex->error_code = error_code;
+ ex->has_payload = has_payload;
+ ex->payload = payload;
+}
+
static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
unsigned nr, bool has_error, u32 error_code,
bool has_payload, unsigned long payload, bool reinject)
kvm_make_request(KVM_REQ_EVENT, vcpu);
+ /*
+ * If the exception is destined for L2 and isn't being reinjected,
+ * morph it to a VM-Exit if L1 wants to intercept the exception. A
+ * previously injected exception is not checked because it was checked
+ * when it was original queued, and re-checking is incorrect if _L1_
+ * injected the exception, in which case it's exempt from interception.
+ */
+ if (!reinject && is_guest_mode(vcpu) &&
+ kvm_x86_ops.nested_ops->is_exception_vmexit(vcpu, nr, error_code)) {
+ kvm_queue_exception_vmexit(vcpu, nr, has_error, error_code,
+ has_payload, payload);
+ return;
+ }
+
if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) {
queue:
if (reinject) {
/*
- * On vmentry, vcpu->arch.exception.pending is only
- * true if an event injection was blocked by
- * nested_run_pending. In that case, however,
- * vcpu_enter_guest requests an immediate exit,
- * and the guest shouldn't proceed far enough to
- * need reinjection.
+ * On VM-Entry, an exception can be pending if and only
+ * if event injection was blocked by nested_run_pending.
+ * In that case, however, vcpu_enter_guest() requests an
+ * immediate exit, and the guest shouldn't proceed far
+ * enough to need reinjection.
*/
- WARN_ON_ONCE(vcpu->arch.exception.pending);
+ WARN_ON_ONCE(kvm_is_exception_pending(vcpu));
vcpu->arch.exception.injected = true;
if (WARN_ON_ONCE(has_payload)) {
/*
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
{
++vcpu->stat.pf_guest;
- vcpu->arch.exception.nested_apf =
- is_guest_mode(vcpu) && fault->async_page_fault;
- if (vcpu->arch.exception.nested_apf) {
- vcpu->arch.apf.nested_apf_token = fault->address;
- kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
- } else {
+
+ /*
+ * Async #PF in L2 is always forwarded to L1 as a VM-Exit regardless of
+ * whether or not L1 wants to intercept "regular" #PF.
+ */
+ if (is_guest_mode(vcpu) && fault->async_page_fault)
+ kvm_queue_exception_vmexit(vcpu, PF_VECTOR,
+ true, fault->error_code,
+ true, fault->address);
+ else
kvm_queue_exception_e_p(vcpu, PF_VECTOR, fault->error_code,
fault->address);
- }
}
EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
-/* Returns true if the page fault was immediately morphed into a VM-Exit. */
-bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
+void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
struct x86_exception *fault)
{
struct kvm_mmu *fault_mmu;
kvm_mmu_invalidate_gva(vcpu, fault_mmu, fault->address,
fault_mmu->root.hpa);
- /*
- * A workaround for KVM's bad exception handling. If KVM injected an
- * exception into L2, and L2 encountered a #PF while vectoring the
- * injected exception, manually check to see if L1 wants to intercept
- * #PF, otherwise queuing the #PF will lead to #DF or a lost exception.
- * In all other cases, defer the check to nested_ops->check_events(),
- * which will correctly handle priority (this does not). Note, other
- * exceptions, e.g. #GP, are theoretically affected, #PF is simply the
- * most problematic, e.g. when L0 and L1 are both intercepting #PF for
- * shadow paging.
- *
- * TODO: Rewrite exception handling to track injected and pending
- * (VM-Exit) exceptions separately.
- */
- if (unlikely(vcpu->arch.exception.injected && is_guest_mode(vcpu)) &&
- kvm_x86_ops.nested_ops->handle_page_fault_workaround(vcpu, fault))
- return true;
-
fault_mmu->inject_page_fault(vcpu, fault);
- return false;
}
EXPORT_SYMBOL_GPL(kvm_inject_emulated_page_fault);
return (kvm_arch_interrupt_allowed(vcpu) &&
kvm_cpu_accept_dm_intr(vcpu) &&
!kvm_event_needs_reinjection(vcpu) &&
- !vcpu->arch.exception.pending);
+ !kvm_is_exception_pending(vcpu));
}
static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events)
{
- struct kvm_queued_exception *ex = &vcpu->arch.exception;
+ struct kvm_queued_exception *ex;
process_nmi(vcpu);
if (kvm_check_request(KVM_REQ_SMI, vcpu))
process_smi(vcpu);
+ /*
+ * KVM's ABI only allows for one exception to be migrated. Luckily,
+ * the only time there can be two queued exceptions is if there's a
+ * non-exiting _injected_ exception, and a pending exiting exception.
+ * In that case, ignore the VM-Exiting exception as it's an extension
+ * of the injected exception.
+ */
+ if (vcpu->arch.exception_vmexit.pending &&
+ !vcpu->arch.exception.pending &&
+ !vcpu->arch.exception.injected)
+ ex = &vcpu->arch.exception_vmexit;
+ else
+ ex = &vcpu->arch.exception;
+
/*
* In guest mode, payload delivery should be deferred if the exception
* will be intercepted by L1, e.g. KVM should not modifying CR2 if L1
return -EINVAL;
process_nmi(vcpu);
+
+ /*
+ * Flag that userspace is stuffing an exception, the next KVM_RUN will
+ * morph the exception to a VM-Exit if appropriate. Do this only for
+ * pending exceptions, already-injected exceptions are not subject to
+ * intercpetion. Note, userspace that conflates pending and injected
+ * is hosed, and will incorrectly convert an injected exception into a
+ * pending exception, which in turn may cause a spurious VM-Exit.
+ */
+ vcpu->arch.exception_from_userspace = events->exception.pending;
+
+ vcpu->arch.exception_vmexit.pending = false;
+
vcpu->arch.exception.injected = events->exception.injected;
vcpu->arch.exception.pending = events->exception.pending;
vcpu->arch.exception.vector = events->exception.nr;
}
}
-static bool inject_emulated_exception(struct kvm_vcpu *vcpu)
+static void inject_emulated_exception(struct kvm_vcpu *vcpu)
{
struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
- if (ctxt->exception.vector == PF_VECTOR)
- return kvm_inject_emulated_page_fault(vcpu, &ctxt->exception);
- if (ctxt->exception.error_code_valid)
+ if (ctxt->exception.vector == PF_VECTOR)
+ kvm_inject_emulated_page_fault(vcpu, &ctxt->exception);
+ else if (ctxt->exception.error_code_valid)
kvm_queue_exception_e(vcpu, ctxt->exception.vector,
ctxt->exception.error_code);
else
kvm_queue_exception(vcpu, ctxt->exception.vector);
- return false;
}
static struct x86_emulate_ctxt *alloc_emulate_ctxt(struct kvm_vcpu *vcpu)
if (ctxt->have_exception) {
r = 1;
- if (inject_emulated_exception(vcpu))
- return r;
+ inject_emulated_exception(vcpu);
} else if (vcpu->arch.pio.count) {
if (!vcpu->arch.pio.in) {
/* FIXME: return into emulator if single-stepping. */
*/
if (vcpu->arch.exception.injected)
kvm_inject_exception(vcpu);
- else if (vcpu->arch.exception.pending)
+ else if (kvm_is_exception_pending(vcpu))
; /* see above */
else if (vcpu->arch.nmi_injected)
static_call(kvm_x86_inject_nmi)(vcpu);
if (r < 0)
goto out;
+ /*
+ * A pending exception VM-Exit should either result in nested VM-Exit
+ * or force an immediate re-entry and exit to/from L2, and exception
+ * VM-Exits cannot be injected (flag should _never_ be set).
+ */
+ WARN_ON_ONCE(vcpu->arch.exception_vmexit.injected ||
+ vcpu->arch.exception_vmexit.pending);
+
/*
* New events, other than exceptions, cannot be injected if KVM needs
* to re-inject a previous event. See above comments on re-injecting
kvm_x86_ops.nested_ops->hv_timer_pending(vcpu))
*req_immediate_exit = true;
- WARN_ON(vcpu->arch.exception.pending);
+ WARN_ON(kvm_is_exception_pending(vcpu));
return 0;
out:
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
{
+ struct kvm_queued_exception *ex = &vcpu->arch.exception;
struct kvm_run *kvm_run = vcpu->run;
int r;
}
}
+ /*
+ * If userspace set a pending exception and L2 is active, convert it to
+ * a pending VM-Exit if L1 wants to intercept the exception.
+ */
+ if (vcpu->arch.exception_from_userspace && is_guest_mode(vcpu) &&
+ kvm_x86_ops.nested_ops->is_exception_vmexit(vcpu, ex->vector,
+ ex->error_code)) {
+ kvm_queue_exception_vmexit(vcpu, ex->vector,
+ ex->has_error_code, ex->error_code,
+ ex->has_payload, ex->payload);
+ ex->injected = false;
+ ex->pending = false;
+ }
+ vcpu->arch.exception_from_userspace = false;
+
if (unlikely(vcpu->arch.complete_userspace_io)) {
int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
vcpu->arch.complete_userspace_io = NULL;
kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
vcpu->arch.exception.pending = false;
+ vcpu->arch.exception_vmexit.pending = false;
kvm_make_request(KVM_REQ_EVENT, vcpu);
}
if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
r = -EBUSY;
- if (vcpu->arch.exception.pending)
+ if (kvm_is_exception_pending(vcpu))
goto out;
if (dbg->control & KVM_GUESTDBG_INJECT_DB)
kvm_queue_exception(vcpu, DB_VECTOR);
if (vcpu->arch.pv.pv_unhalted)
return true;
- if (vcpu->arch.exception.pending)
+ if (kvm_is_exception_pending(vcpu))
return true;
if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
{
if (unlikely(!lapic_in_kernel(vcpu) ||
kvm_event_needs_reinjection(vcpu) ||
- vcpu->arch.exception.pending))
+ kvm_is_exception_pending(vcpu)))
return false;
if (kvm_hlt_in_guest(vcpu->kvm) && !kvm_can_deliver_async_pf(vcpu))