unsigned long stolen;
        unsigned long core_stolen;
        u64 now;
+       unsigned long flags;
 
        dt = vcpu->arch.dtl_ptr;
        vpa = vcpu->arch.vpa.pinned_addr;
        core_stolen = vcore_stolen_time(vc, now);
        stolen = core_stolen - vcpu->arch.stolen_logged;
        vcpu->arch.stolen_logged = core_stolen;
-       spin_lock_irq(&vcpu->arch.tbacct_lock);
+       spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
        stolen += vcpu->arch.busy_stolen;
        vcpu->arch.busy_stolen = 0;
-       spin_unlock_irq(&vcpu->arch.tbacct_lock);
+       spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
        if (!dt || !vpa)
                return;
        memset(dt, 0, sizeof(struct dtl_entry));
        }
 }
 
-extern void __kvmppc_vcore_entry(void);
+extern int __kvmppc_vcore_entry(void);
 
 static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
                                   struct kvm_vcpu *vcpu)
                        smp_call_function_single(cpu + i, do_nothing, NULL, 1);
 }
 
+static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
+{
+       struct kvm *kvm = vcpu->kvm;
+
+       /*
+        * With radix, the guest can do TLB invalidations itself,
+        * and it could choose to use the local form (tlbiel) if
+        * it is invalidating a translation that has only ever been
+        * used on one vcpu.  However, that doesn't mean it has
+        * only ever been used on one physical cpu, since vcpus
+        * can move around between pcpus.  To cope with this, when
+        * a vcpu moves from one pcpu to another, we need to tell
+        * any vcpus running on the same core as this vcpu previously
+        * ran to flush the TLB.  The TLB is shared between threads,
+        * so we use a single bit in .need_tlb_flush for all 4 threads.
+        */
+       if (vcpu->arch.prev_cpu != pcpu) {
+               if (vcpu->arch.prev_cpu >= 0 &&
+                   cpu_first_thread_sibling(vcpu->arch.prev_cpu) !=
+                   cpu_first_thread_sibling(pcpu))
+                       radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
+               vcpu->arch.prev_cpu = pcpu;
+       }
+}
+
 static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
 {
        int cpu;
                cpu += vcpu->arch.ptid;
                vcpu->cpu = vc->pcpu;
                vcpu->arch.thread_cpu = cpu;
-
-               /*
-                * With radix, the guest can do TLB invalidations itself,
-                * and it could choose to use the local form (tlbiel) if
-                * it is invalidating a translation that has only ever been
-                * used on one vcpu.  However, that doesn't mean it has
-                * only ever been used on one physical cpu, since vcpus
-                * can move around between pcpus.  To cope with this, when
-                * a vcpu moves from one pcpu to another, we need to tell
-                * any vcpus running on the same core as this vcpu previously
-                * ran to flush the TLB.  The TLB is shared between threads,
-                * so we use a single bit in .need_tlb_flush for all 4 threads.
-                */
-               if (kvm_is_radix(kvm) && vcpu->arch.prev_cpu != cpu) {
-                       if (vcpu->arch.prev_cpu >= 0 &&
-                           cpu_first_thread_sibling(vcpu->arch.prev_cpu) !=
-                           cpu_first_thread_sibling(cpu))
-                               radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
-                       vcpu->arch.prev_cpu = cpu;
-               }
                cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest);
        }
        tpaca = &paca[cpu];
        spin_unlock(&lp->lock);
 }
 
+static bool recheck_signals(struct core_info *cip)
+{
+       int sub, i;
+       struct kvm_vcpu *vcpu;
+
+       for (sub = 0; sub < cip->n_subcores; ++sub)
+               for_each_runnable_thread(i, vcpu, cip->vc[sub])
+                       if (signal_pending(vcpu->arch.run_task))
+                               return true;
+       return false;
+}
+
 static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
 {
        int still_running = 0, i;
        return 0;
 }
 
+static void set_irq_happened(int trap)
+{
+       switch (trap) {
+       case BOOK3S_INTERRUPT_EXTERNAL:
+               local_paca->irq_happened |= PACA_IRQ_EE;
+               break;
+       case BOOK3S_INTERRUPT_H_DOORBELL:
+               local_paca->irq_happened |= PACA_IRQ_DBELL;
+               break;
+       case BOOK3S_INTERRUPT_HMI:
+               local_paca->irq_happened |= PACA_IRQ_HMI;
+               break;
+       }
+}
+
 /*
  * Run a set of guest threads on a physical core.
  * Called with vc->lock held.
        int pcpu, thr;
        int target_threads;
        int controlled_threads;
+       int trap;
 
        /*
         * Remove from the list any threads that have a signal pending
        if (vc->num_threads < target_threads)
                collect_piggybacks(&core_info, target_threads);
 
+       /*
+        * On radix, arrange for TLB flushing if necessary.
+        * This has to be done before disabling interrupts since
+        * it uses smp_call_function().
+        */
+       pcpu = smp_processor_id();
+       if (kvm_is_radix(vc->kvm)) {
+               for (sub = 0; sub < core_info.n_subcores; ++sub)
+                       for_each_runnable_thread(i, vcpu, core_info.vc[sub])
+                               kvmppc_prepare_radix_vcpu(vcpu, pcpu);
+       }
+
+       /*
+        * Hard-disable interrupts, and check resched flag and signals.
+        * If we need to reschedule or deliver a signal, clean up
+        * and return without going into the guest(s).
+        */
+       local_irq_disable();
+       hard_irq_disable();
+       if (lazy_irq_pending() || need_resched() ||
+           recheck_signals(&core_info)) {
+               local_irq_enable();
+               vc->vcore_state = VCORE_INACTIVE;
+               /* Unlock all except the primary vcore */
+               for (sub = 1; sub < core_info.n_subcores; ++sub) {
+                       pvc = core_info.vc[sub];
+                       /* Put back on to the preempted vcores list */
+                       kvmppc_vcore_preempt(pvc);
+                       spin_unlock(&pvc->lock);
+               }
+               for (i = 0; i < controlled_threads; ++i)
+                       kvmppc_release_hwthread(pcpu + i);
+               return;
+       }
+
+       kvmppc_clear_host_core(pcpu);
+
        /* Decide on micro-threading (split-core) mode */
        subcore_size = threads_per_subcore;
        cmd_bit = stat_bit = 0;
                /* order writes to split_info before kvm_split_mode pointer */
                smp_wmb();
        }
-       pcpu = smp_processor_id();
        for (thr = 0; thr < controlled_threads; ++thr)
                paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
 
                }
        }
 
-       kvmppc_clear_host_core(pcpu);
-
        /* Start all the threads */
        active = 0;
        for (sub = 0; sub < core_info.n_subcores; ++sub) {
        for (sub = 0; sub < core_info.n_subcores; ++sub)
                spin_unlock(&core_info.vc[sub]->lock);
 
+       /*
+        * Interrupts will be enabled once we get into the guest,
+        * so tell lockdep that we're about to enable interrupts.
+        */
+       trace_hardirqs_on();
+
        guest_enter();
 
        srcu_idx = srcu_read_lock(&vc->kvm->srcu);
 
-       __kvmppc_vcore_entry();
+       trap = __kvmppc_vcore_entry();
 
        srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
 
+       guest_exit();
+
+       trace_hardirqs_off();
+       set_irq_happened(trap);
+
        spin_lock(&vc->lock);
        /* prevent other vcpu threads from doing kvmppc_start_thread() now */
        vc->vcore_state = VCORE_EXITING;
                split_info.do_nap = 0;
        }
 
+       kvmppc_set_host_core(pcpu);
+
+       local_irq_enable();
+
        /* Let secondaries go back to the offline loop */
        for (i = 0; i < controlled_threads; ++i) {
                kvmppc_release_hwthread(pcpu + i);
                cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest);
        }
 
-       kvmppc_set_host_core(pcpu);
-
        spin_unlock(&vc->lock);
 
        /* make sure updates to secondary vcpu structs are visible now */
        smp_mb();
-       guest_exit();
 
        for (sub = 0; sub < core_info.n_subcores; ++sub) {
                pvc = core_info.vc[sub];
 
        std     r0, PPC_LR_STKOFF(r1)
        stdu    r1, -112(r1)
        mfmsr   r10
+       std     r10, HSTATE_HOST_MSR(r13)
        LOAD_REG_ADDR(r5, kvmppc_call_hv_entry)
        li      r0,MSR_RI
        andc    r0,r10,r0
        addi    r1, r1, 112
        ld      r7, HSTATE_HOST_MSR(r13)
 
+       /* Return the trap number on this thread as the return value */
+       mr      r3, r12
+
        /*
         * If we came back from the guest via a relocation-on interrupt,
         * we will be in virtual mode at this point, which makes it a
        andi.   r0, r0, MSR_IR          /* in real mode? */
        bne     .Lvirt_return
 
-       cmpwi   r12, BOOK3S_INTERRUPT_EXTERNAL
-       beq     11f
-       cmpwi   r12, BOOK3S_INTERRUPT_H_DOORBELL
-       beq     15f     /* Invoke the H_DOORBELL handler */
-       cmpwi   cr2, r12, BOOK3S_INTERRUPT_HMI
-       beq     cr2, 14f                        /* HMI check */
-
-       /* RFI into the highmem handler, or branch to interrupt handler */
+       /* RFI into the highmem handler */
        mfmsr   r6
        li      r0, MSR_RI
        andc    r6, r6, r0
        mtmsrd  r6, 1                   /* Clear RI in MSR */
        mtsrr0  r8
        mtsrr1  r7
-       /*
-        * BOOK3S_INTERRUPT_MACHINE_CHECK is handled at the
-        * time of guest exit
-        */
        RFI
 
-       /* On POWER7, we have external interrupts set to use HSRR0/1 */
-11:    mtspr   SPRN_HSRR0, r8
-       mtspr   SPRN_HSRR1, r7
-       ba      0x500
-
-14:    mtspr   SPRN_HSRR0, r8
-       mtspr   SPRN_HSRR1, r7
-       b       hmi_exception_after_realmode
-
-15:    mtspr SPRN_HSRR0, r8
-       mtspr SPRN_HSRR1, r7
-       ba    0xe80
-
-       /* Virtual-mode return - can't get here for HMI or machine check */
+       /* Virtual-mode return */
 .Lvirt_return:
-       cmpwi   r12, BOOK3S_INTERRUPT_EXTERNAL
-       beq     16f
-       cmpwi   r12, BOOK3S_INTERRUPT_H_DOORBELL
-       beq     17f
-       andi.   r0, r7, MSR_EE          /* were interrupts hard-enabled? */
-       beq     18f
-       mtmsrd  r7, 1                   /* if so then re-enable them */
-18:    mtlr    r8
+       mtlr    r8
        blr
 
-16:    mtspr   SPRN_HSRR0, r8          /* jump to reloc-on external vector */
-       mtspr   SPRN_HSRR1, r7
-       b       exc_virt_0x4500_hardware_interrupt
-
-17:    mtspr   SPRN_HSRR0, r8
-       mtspr   SPRN_HSRR1, r7
-       b       exc_virt_0x4e80_h_doorbell
-
 kvmppc_primary_no_guest:
        /* We handle this much like a ceded vcpu */
        /* put the HDEC into the DEC, since HDEC interrupts don't wake us */
 
        stw     r12,VCPU_TRAP(r9)
 
+       /*
+        * Now that we have saved away SRR0/1 and HSRR0/1,
+        * interrupts are recoverable in principle, so set MSR_RI.
+        * This becomes important for relocation-on interrupts from
+        * the guest, which we can get in radix mode on POWER9.
+        */
+       li      r0, MSR_RI
+       mtmsrd  r0, 1
+
 #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
        addi    r3, r9, VCPU_TB_RMINTR
        mr      r4, r9