KVM: PPC: Book3S HV: Exit guest upon MCE when FWNMI capability is enabled

author Aravinda Prasad <aravinda@linux.vnet.ibm.com>

Thu, 11 May 2017 11:03:37 +0000 (16:33 +0530)

committer Paul Mackerras <paulus@ozlabs.org>

Thu, 22 Jun 2017 01:24:57 +0000 (11:24 +1000)
author Aravinda Prasad <aravinda@linux.vnet.ibm.com>
Thu, 11 May 2017 11:03:37 +0000 (16:33 +0530)
committer Paul Mackerras <paulus@ozlabs.org>
Thu, 22 Jun 2017 01:24:57 +0000 (11:24 +1000)
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h

index 05866391f406d2ac6d5b172aeae3bc40360b9507..7d64f99ea3b86f645cff792daebae9a5f56cd88e 100644 (file)
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -35,6 +35,7 @@
  #include <asm/page.h>
  #include <asm/cacheflush.h>
  #include <asm/hvcall.h>
+#include <asm/mce.h>
  
  #define KVM_MAX_VCPUS          NR_CPUS
  #define KVM_MAX_VCORES         NR_CPUS
@@ -727,6 +728,7 @@ struct kvm_vcpu_arch {
         int prev_cpu;
         bool timer_running;
         wait_queue_head_t cpu_run;
+       struct machine_check_event mce_evt; /* Valid if trap == 0x200 */
  
         struct kvm_vcpu_arch_shared *shared;
  #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h

index 07fbeb927834f3a96278414aedaa59ea580ae8de..8cf8f0c96906dcb3e213524d733bc6a5aa584885 100644 (file)
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -60,6 +60,12 @@ struct kvm_regs {
  
  #define KVM_SREGS_E_FSL_PIDn   (1 << 0) /* PID1/PID2 */
  
+/* flags for kvm_run.flags */
+#define KVM_RUN_PPC_NMI_DISP_MASK              (3 << 0)
+#define   KVM_RUN_PPC_NMI_DISP_FULLY_RECOV     (1 << 0)
+#define   KVM_RUN_PPC_NMI_DISP_LIMITED_RECOV   (2 << 0)
+#define   KVM_RUN_PPC_NMI_DISP_NOT_RECOV       (3 << 0)
+
  /*
   * Feature bits indicate which sections of the sregs struct are valid,
   * both in KVM_GET_SREGS and KVM_SET_SREGS.  On KVM_SET_SREGS, registers
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c

index f6a846c4f9843e808c58f5267cdd7aea39e0f933..c4ada89be6580beef30f8975d62bd1c3b6e37516 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1088,15 +1088,20 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
                 r = RESUME_GUEST;
                 break;
         case BOOK3S_INTERRUPT_MACHINE_CHECK:
-               /*
-                * Deliver a machine check interrupt to the guest.
-                * We have to do this, even if the host has handled the
-                * machine check, because machine checks use SRR0/1 and
-                * the interrupt might have trashed guest state in them.
-                */
-               kvmppc_book3s_queue_irqprio(vcpu,
-                                           BOOK3S_INTERRUPT_MACHINE_CHECK);
-               r = RESUME_GUEST;
+               /* Exit to guest with KVM_EXIT_NMI as exit reason */
+               run->exit_reason = KVM_EXIT_NMI;
+               run->hw.hardware_exit_reason = vcpu->arch.trap;
+               /* Clear out the old NMI status from run->flags */
+               run->flags &= ~KVM_RUN_PPC_NMI_DISP_MASK;
+               /* Now set the NMI status */
+               if (vcpu->arch.mce_evt.disposition == MCE_DISPOSITION_RECOVERED)
+                       run->flags |= KVM_RUN_PPC_NMI_DISP_FULLY_RECOV;
+               else
+                       run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV;
+
+               r = RESUME_HOST;
+               /* Print the MCE event to host console. */
+               machine_check_print_event_info(&vcpu->arch.mce_evt, false);
                 break;
         case BOOK3S_INTERRUPT_PROGRAM:
         {
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c

index 7ef0993214f36e1af970198978c88ba48ea88251..c356f9a40b244e8715eaabd4d7c5818aba547399 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -130,12 +130,28 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
  
  out:
         /*
+        * For guest that supports FWNMI capability, hook the MCE event into
+        * vcpu structure. We are going to exit the guest with KVM_EXIT_NMI
+        * exit reason. On our way to exit we will pull this event from vcpu
+        * structure and print it from thread 0 of the core/subcore.
+        *
+        * For guest that does not support FWNMI capability (old QEMU):
          * We are now going enter guest either through machine check
          * interrupt (for unhandled errors) or will continue from
          * current HSRR0 (for handled errors) in guest. Hence
          * queue up the event so that we can log it from host console later.
          */
-       machine_check_queue_event();
+       if (vcpu->kvm->arch.fwnmi_enabled) {
+               /*
+                * Hook up the mce event on to vcpu structure.
+                * First clear the old event.
+                */
+               memset(&vcpu->arch.mce_evt, 0, sizeof(vcpu->arch.mce_evt));
+               if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) {
+                       vcpu->arch.mce_evt = mce_evt;
+               }
+       } else
+               machine_check_queue_event();
  
         return handled;
  }
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S

index ae6d93ee99d40fbbacb29e9916ead5e6e7d91574..e3793bd510fe9c17783d40674b97677e18788643 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -153,15 +153,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
         stb     r0, HSTATE_HWTHREAD_REQ(r13)
  
         /*
-        * For external and machine check interrupts, we need
-        * to call the Linux handler to process the interrupt.
-        * We do that by jumping to absolute address 0x500 for
-        * external interrupts, or the machine_check_fwnmi label
-        * for machine checks (since firmware might have patched
-        * the vector area at 0x200).  The [h]rfid at the end of the
-        * handler will return to the book3s_hv_interrupts.S code.
-        * For other interrupts we do the rfid to get back
-        * to the book3s_hv_interrupts.S code here.
+        * For external interrupts we need to call the Linux
+        * handler to process the interrupt. We do that by jumping
+        * to absolute address 0x500 for external interrupts.
+        * The [h]rfid at the end of the handler will return to
+        * the book3s_hv_interrupts.S code. For other interrupts
+        * we do the rfid to get back to the book3s_hv_interrupts.S
+        * code here.
          */
         ld      r8, 112+PPC_LR_STKOFF(r1)
         addi    r1, r1, 112
@@ -176,7 +174,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
         andi.   r0, r0, MSR_IR          /* in real mode? */
         bne     .Lvirt_return
  
-       cmpwi   cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
         cmpwi   r12, BOOK3S_INTERRUPT_EXTERNAL
         beq     11f
         cmpwi   r12, BOOK3S_INTERRUPT_H_DOORBELL
@@ -191,7 +188,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
         mtmsrd  r6, 1                   /* Clear RI in MSR */
         mtsrr0  r8
         mtsrr1  r7
-       beq     cr1, 13f                /* machine check */
+       /*
+        * BOOK3S_INTERRUPT_MACHINE_CHECK is handled at the
+        * time of guest exit
+        */
         RFI
  
         /* On POWER7, we have external interrupts set to use HSRR0/1 */
@@ -199,8 +199,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
         mtspr   SPRN_HSRR1, r7
         ba      0x500
  
-13:    b       machine_check_fwnmi
-
  14:    mtspr   SPRN_HSRR0, r8
         mtspr   SPRN_HSRR1, r7
         b       hmi_exception_after_realmode
@@ -2640,22 +2638,32 @@ machine_check_realmode:
         ld      r9, HSTATE_KVM_VCPU(r13)
         li      r12, BOOK3S_INTERRUPT_MACHINE_CHECK
         /*
-        * Deliver unhandled/fatal (e.g. UE) MCE errors to guest through
-        * machine check interrupt (set HSRR0 to 0x200). And for handled
-        * errors (no-fatal), just go back to guest execution with current
-        * HSRR0 instead of exiting guest. This new approach will inject
-        * machine check to guest for fatal error causing guest to crash.
-        *
-        * The old code used to return to host for unhandled errors which
-        * was causing guest to hang with soft lockups inside guest and
-        * makes it difficult to recover guest instance.
+        * For the guest that is FWNMI capable, deliver all the MCE errors
+        * (handled/unhandled) by exiting the guest with KVM_EXIT_NMI exit
+        * reason. This new approach injects machine check errors in guest
+        * address space to guest with additional information in the form
+        * of RTAS event, thus enabling guest kernel to suitably handle
+        * such errors.
          *
+        * For the guest that is not FWNMI capable (old QEMU) fallback
+        * to old behaviour for backward compatibility:
+        * Deliver unhandled/fatal (e.g. UE) MCE errors to guest either
+        * through machine check interrupt (set HSRR0 to 0x200).
+        * For handled errors (no-fatal), just go back to guest execution
+        * with current HSRR0.
          * if we receive machine check with MSR(RI=0) then deliver it to
          * guest as machine check causing guest to crash.
          */
         ld      r11, VCPU_MSR(r9)
         rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
         bne     mc_cont                 /* if so, exit to host */
+       /* Check if guest is capable of handling NMI exit */
+       ld      r10, VCPU_KVM(r9)
+       lbz     r10, KVM_FWNMI(r10)
+       cmpdi   r10, 1                  /* FWNMI capable? */
+       beq     mc_cont                 /* if so, exit with KVM_EXIT_NMI. */
+
+       /* if not, fall through for backward compatibility. */
         andi.   r10, r11, MSR_RI        /* check for unrecoverable exception */
         beq     1f                      /* Deliver a machine check to guest */
         ld      r10, VCPU_PC(r9)
author	Aravinda Prasad <aravinda@linux.vnet.ibm.com>
	Thu, 11 May 2017 11:03:37 +0000 (16:33 +0530)
committer	Paul Mackerras <paulus@ozlabs.org>
	Thu, 22 Jun 2017 01:24:57 +0000 (11:24 +1000)
arch/powerpc/include/asm/kvm_host.h		patch \| blob \| history
arch/powerpc/include/uapi/asm/kvm.h		patch \| blob \| history
arch/powerpc/kvm/book3s_hv.c		patch \| blob \| history
arch/powerpc/kvm/book3s_hv_ras.c		patch \| blob \| history
arch/powerpc/kvm/book3s_hv_rmhandlers.S		patch \| blob \| history