u8              do_nap;
        u8              napped[MAX_SMT_THREADS];
        struct kvmppc_vcore *vc[MAX_SUBCORES];
+       /* Bits for changing lpcr on P9 */
+       unsigned long   lpcr_req;
+       unsigned long   lpidr_req;
+       unsigned long   host_lpcr;
+       u32             do_set;
+       u32             do_restore;
+       union {
+               u32     allphases;
+               u8      phase[4];
+       } lpcr_sync;
 };
 
 /*
        u8 hwthread_req;
        u8 hwthread_state;
        u8 host_ipi;
-       u8 ptid;
+       u8 ptid;                /* thread number within subcore when split */
+       u8 tid;                 /* thread number within whole core */
        struct kvm_vcpu *kvm_vcpu;
        struct kvmppc_vcore *kvm_vcore;
        void __iomem *xics_phys;
 
        HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
        HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
        HSTATE_FIELD(HSTATE_PTID, ptid);
+       HSTATE_FIELD(HSTATE_TID, tid);
        HSTATE_FIELD(HSTATE_MMCR0, host_mmcr[0]);
        HSTATE_FIELD(HSTATE_MMCR1, host_mmcr[1]);
        HSTATE_FIELD(HSTATE_MMCRA, host_mmcr[2]);
        OFFSET(KVM_SPLIT_LDBAR, kvm_split_mode, ldbar);
        OFFSET(KVM_SPLIT_DO_NAP, kvm_split_mode, do_nap);
        OFFSET(KVM_SPLIT_NAPPED, kvm_split_mode, napped);
+       OFFSET(KVM_SPLIT_DO_SET, kvm_split_mode, do_set);
+       OFFSET(KVM_SPLIT_DO_RESTORE, kvm_split_mode, do_restore);
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 
 #ifdef CONFIG_PPC_BOOK3S_64
 
        if (!cpu_has_feature(CPU_FTR_ARCH_207S))
                return false;
 
+       /* POWER9 currently requires all threads to be in the same MMU mode */
+       if (cpu_has_feature(CPU_FTR_ARCH_300) &&
+           kvm_is_radix(vc->kvm) != kvm_is_radix(cip->vc[0]->kvm))
+               return false;
+
        if (n_threads < cip->max_subcore_threads)
                n_threads = cip->max_subcore_threads;
        if (!subcore_config_ok(cip->n_subcores + 1, n_threads))
        for_each_runnable_thread(i, vcpu, vc) {
                if (signal_pending(vcpu->arch.run_task))
                        vcpu->arch.ret = -EINTR;
-               else if (kvm_is_radix(vc->kvm) != radix_enabled())
-                       /* can't actually run HPT guest on radix host yet... */
-                       vcpu->arch.ret = -EINVAL;
                else if (vcpu->arch.vpa.update_pending ||
                         vcpu->arch.slb_shadow.update_pending ||
                         vcpu->arch.dtl.update_pending)
        int controlled_threads;
        int trap;
        bool is_power8;
+       bool hpt_on_radix;
 
        /*
         * Remove from the list any threads that have a signal pending
         * Make sure we are running on primary threads, and that secondary
         * threads are offline.  Also check if the number of threads in this
         * guest are greater than the current system threads per guest.
+        * On POWER9, we need to be not in independent-threads mode if
+        * this is a HPT guest on a radix host.
         */
-       if ((controlled_threads > 1) &&
-           ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
+       hpt_on_radix = radix_enabled() && !kvm_is_radix(vc->kvm);
+       if (((controlled_threads > 1) &&
+            ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) ||
+           (hpt_on_radix && vc->kvm->arch.threads_indep)) {
                for_each_runnable_thread(i, vcpu, vc) {
                        vcpu->arch.ret = -EBUSY;
                        kvmppc_remove_runnable(vc, vcpu);
        is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S)
                && !cpu_has_feature(CPU_FTR_ARCH_300);
 
-       if (split > 1) {
+       if (split > 1 || hpt_on_radix) {
                sip = &split_info;
                memset(&split_info, 0, sizeof(split_info));
                for (sub = 0; sub < core_info.n_subcores; ++sub)
                        split_info.subcore_size = subcore_size;
                } else {
                        split_info.subcore_size = 1;
+                       if (hpt_on_radix) {
+                               /* Use the split_info for LPCR/LPIDR changes */
+                               split_info.lpcr_req = vc->lpcr;
+                               split_info.lpidr_req = vc->kvm->arch.lpid;
+                               split_info.host_lpcr = vc->kvm->arch.host_lpcr;
+                               split_info.do_set = 1;
+                       }
                }
 
                /* order writes to split_info before kvm_split_mode pointer */
                smp_wmb();
        }
-       for (thr = 0; thr < controlled_threads; ++thr)
+
+       for (thr = 0; thr < controlled_threads; ++thr) {
+               paca[pcpu + thr].kvm_hstate.tid = thr;
+               paca[pcpu + thr].kvm_hstate.napping = 0;
                paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
+       }
 
        /* Initiate micro-threading (split-core) on POWER8 if required */
        if (cmd_bit) {
         * When doing micro-threading, poke the inactive threads as well.
         * This gets them to the nap instruction after kvm_do_nap,
         * which reduces the time taken to unsplit later.
+        * For POWER9 HPT guest on radix host, we need all the secondary
+        * threads woken up so they can do the LPCR/LPIDR change.
         */
-       if (cmd_bit) {
+       if (cmd_bit || hpt_on_radix) {
                split_info.do_nap = 1;  /* ask secondaries to nap when done */
                for (thr = 1; thr < threads_per_subcore; ++thr)
                        if (!(active & (1 << thr)))
                        cpu_relax();
                        ++loops;
                }
-               split_info.do_nap = 0;
+       } else if (hpt_on_radix) {
+               /* Wait for all threads to have seen final sync */
+               for (thr = 1; thr < controlled_threads; ++thr) {
+                       while (paca[pcpu + thr].kvm_hstate.kvm_split_mode) {
+                               HMT_low();
+                               barrier();
+                       }
+                       HMT_medium();
+               }
        }
+       split_info.do_nap = 0;
 
        kvmppc_set_host_core(pcpu);
 
 
        struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
        int ptid = local_paca->kvm_hstate.ptid;
        struct kvm_split_mode *sip = local_paca->kvm_hstate.kvm_split_mode;
-       int me, ee, i;
+       int me, ee, i, t;
+       int cpu0;
 
        /* Set our bit in the threads-exiting-guest map in the 0xff00
           bits of vcore->entry_exit_map */
                if ((ee >> 8) == 0)
                        kvmhv_interrupt_vcore(vc, ee);
        }
+
+       /*
+        * On POWER9 when running a HPT guest on a radix host (sip != NULL),
+        * we have to interrupt inactive CPU threads to get them to
+        * restore the host LPCR value.
+        */
+       if (sip->lpcr_req) {
+               if (cmpxchg(&sip->do_restore, 0, 1) == 0) {
+                       vc = local_paca->kvm_hstate.kvm_vcore;
+                       cpu0 = vc->pcpu + ptid - local_paca->kvm_hstate.tid;
+                       for (t = 1; t < threads_per_core; ++t) {
+                               if (sip->napped[t])
+                                       kvmhv_rm_send_ipi(cpu0 + t);
+                       }
+               }
+       }
 }
 
 struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
        die("Bad interrupt in KVM entry/exit code", regs, SIGABRT);
        panic("Bad KVM trap");
 }
+
+/*
+ * Functions used to switch LPCR HR and UPRT bits on all threads
+ * when entering and exiting HPT guests on a radix host.
+ */
+
+#define PHASE_REALMODE         1       /* in real mode */
+#define PHASE_SET_LPCR         2       /* have set LPCR */
+#define PHASE_OUT_OF_GUEST     4       /* have finished executing in guest */
+#define PHASE_RESET_LPCR       8       /* have reset LPCR to host value */
+
+#define ALL(p)         (((p) << 24) | ((p) << 16) | ((p) << 8) | (p))
+
+static void wait_for_sync(struct kvm_split_mode *sip, int phase)
+{
+       int thr = local_paca->kvm_hstate.tid;
+
+       sip->lpcr_sync.phase[thr] |= phase;
+       phase = ALL(phase);
+       while ((sip->lpcr_sync.allphases & phase) != phase) {
+               HMT_low();
+               barrier();
+       }
+       HMT_medium();
+}
+
+void kvmhv_p9_set_lpcr(struct kvm_split_mode *sip)
+{
+       unsigned long rb, set;
+
+       /* wait for every other thread to get to real mode */
+       wait_for_sync(sip, PHASE_REALMODE);
+
+       /* Set LPCR and LPIDR */
+       mtspr(SPRN_LPCR, sip->lpcr_req);
+       mtspr(SPRN_LPID, sip->lpidr_req);
+       isync();
+
+       /* Invalidate the TLB on thread 0 */
+       if (local_paca->kvm_hstate.tid == 0) {
+               sip->do_set = 0;
+               asm volatile("ptesync" : : : "memory");
+               for (set = 0; set < POWER9_TLB_SETS_RADIX; ++set) {
+                       rb = TLBIEL_INVAL_SET_LPID +
+                               (set << TLBIEL_INVAL_SET_SHIFT);
+                       asm volatile(PPC_TLBIEL(%0, %1, 0, 0, 0) : :
+                                    "r" (rb), "r" (0));
+               }
+               asm volatile("ptesync" : : : "memory");
+       }
+
+       /* indicate that we have done so and wait for others */
+       wait_for_sync(sip, PHASE_SET_LPCR);
+       /* order read of sip->lpcr_sync.allphases vs. sip->do_set */
+       smp_rmb();
+}
+
+/*
+ * Called when a thread that has been in the guest needs
+ * to reload the host LPCR value - but only on POWER9 when
+ * running a HPT guest on a radix host.
+ */
+void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip)
+{
+       /* we're out of the guest... */
+       wait_for_sync(sip, PHASE_OUT_OF_GUEST);
+
+       mtspr(SPRN_LPID, 0);
+       mtspr(SPRN_LPCR, sip->host_lpcr);
+       isync();
+
+       if (local_paca->kvm_hstate.tid == 0) {
+               sip->do_restore = 0;
+               smp_wmb();      /* order store of do_restore vs. phase */
+       }
+
+       wait_for_sync(sip, PHASE_RESET_LPCR);
+       smp_mb();
+       local_paca->kvm_hstate.kvm_split_mode = NULL;
+}
 
        RFI
 
 kvmppc_call_hv_entry:
+BEGIN_FTR_SECTION
+       /* On P9, do LPCR setting, if necessary */
+       ld      r3, HSTATE_SPLIT_MODE(r13)
+       cmpdi   r3, 0
+       beq     46f
+       lwz     r4, KVM_SPLIT_DO_SET(r3)
+       cmpwi   r4, 0
+       beq     46f
+       bl      kvmhv_p9_set_lpcr
+       nop
+46:
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+
        ld      r4, HSTATE_KVM_VCPU(r13)
        bl      kvmppc_hv_entry
 
        ld      r6, 0(r6)
        mtspr   SPRN_HDEC, r6
        /* and set per-LPAR registers, if doing dynamic micro-threading */
-BEGIN_FTR_SECTION
        ld      r6, HSTATE_SPLIT_MODE(r13)
        cmpdi   r6, 0
        beq     63f
+BEGIN_FTR_SECTION
        ld      r0, KVM_SPLIT_RPR(r6)
        mtspr   SPRN_RPR, r0
        ld      r0, KVM_SPLIT_PMMAR(r6)
        ld      r0, KVM_SPLIT_LDBAR(r6)
        mtspr   SPRN_LDBAR, r0
        isync
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+FTR_SECTION_ELSE
+       /* On P9 we use the split_info for coordinating LPCR changes */
+       lwz     r4, KVM_SPLIT_DO_SET(r6)
+       cmpwi   r4, 0
+       beq     63f
+       mr      r3, r6
+       bl      kvmhv_p9_set_lpcr
+       nop
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 63:
        /* Order load of vcpu after load of vcore */
        lwsync
        ld      r3, HSTATE_SPLIT_MODE(r13)
        cmpdi   r3, 0
        beq     kvm_no_guest
+       lwz     r0, KVM_SPLIT_DO_SET(r3)
+       cmpwi   r0, 0
+       bne     kvmhv_do_set
+       lwz     r0, KVM_SPLIT_DO_RESTORE(r3)
+       cmpwi   r0, 0
+       bne     kvmhv_do_restore
        lbz     r0, KVM_SPLIT_DO_NAP(r3)
        cmpwi   r0, 0
        beq     kvm_no_guest
        stb     r0, HSTATE_HWTHREAD_STATE(r13)
        b       kvm_no_guest
 
+kvmhv_do_set:
+       /* Set LPCR, LPIDR etc. on P9 */
+       HMT_MEDIUM
+       bl      kvmhv_p9_set_lpcr
+       nop
+       b       kvm_no_guest
+
+kvmhv_do_restore:
+       HMT_MEDIUM
+       bl      kvmhv_p9_restore_lpcr
+       nop
+       b       kvm_no_guest
+
 /*
  * Here the primary thread is trying to return the core to
  * whole-core mode, so we need to nap.
        /* Set kvm_split_mode.napped[tid] = 1 */
        ld      r3, HSTATE_SPLIT_MODE(r13)
        li      r0, 1
-       lhz     r4, PACAPACAINDEX(r13)
-       clrldi  r4, r4, 61      /* micro-threading => P8 => 8 threads/core */
+       lbz     r4, HSTATE_TID(r13)
        addi    r4, r4, KVM_SPLIT_NAPPED
        stbx    r0, r3, r4
        /* Check the do_nap flag again after setting napped[] */
 19:    lis     r8,0x7fff               /* MAX_INT@h */
        mtspr   SPRN_HDEC,r8
 
-16:    ld      r8,KVM_HOST_LPCR(r4)
+16:
+BEGIN_FTR_SECTION
+       /* On POWER9 with HPT-on-radix we need to wait for all other threads */
+       ld      r3, HSTATE_SPLIT_MODE(r13)
+       cmpdi   r3, 0
+       beq     47f
+       lwz     r8, KVM_SPLIT_DO_RESTORE(r3)
+       cmpwi   r8, 0
+       beq     47f
+       stw     r12, STACK_SLOT_TRAP(r1)
+       bl      kvmhv_p9_restore_lpcr
+       nop
+       lwz     r12, STACK_SLOT_TRAP(r1)
+       b       48f
+47:
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+       ld      r8,KVM_HOST_LPCR(r4)
        mtspr   SPRN_LPCR,r8
        isync
-
+48:
        /* load host SLB entries */
 BEGIN_MMU_FTR_SECTION
        b       0f