}
 
 struct reg_and_bit {
-       i915_reg_t reg;
+       union {
+               i915_reg_t reg;
+               i915_mcr_reg_t mcr_reg;
+       };
        u32 bit;
 };
 
 static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
 {
        if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
-               return intel_gt_mcr_wait_for_reg_fw(gt, rb.reg, rb.bit, 0,
+               return intel_gt_mcr_wait_for_reg_fw(gt, rb.mcr_reg, rb.bit, 0,
                                                    TLB_INVAL_TIMEOUT_US,
                                                    TLB_INVAL_TIMEOUT_MS);
        else
                [COPY_ENGINE_CLASS]             = GEN12_BLT_TLB_INV_CR,
                [COMPUTE_CLASS]                 = GEN12_COMPCTX_TLB_INV_CR,
        };
-       static const i915_reg_t xehp_regs[] = {
+       static const i915_mcr_reg_t xehp_regs[] = {
                [RENDER_CLASS]                  = XEHP_GFX_TLB_INV_CR,
                [VIDEO_DECODE_CLASS]            = XEHP_VD_TLB_INV_CR,
                [VIDEO_ENHANCEMENT_CLASS]       = XEHP_VE_TLB_INV_CR,
        for_each_engine_masked(engine, gt, awake, tmp) {
                struct reg_and_bit rb;
 
-               rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
+               if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+                       rb.mcr_reg = xehp_regs[engine->class];
+                       rb.bit = BIT(engine->instance);
+               } else {
+                       rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
+               }
 
                if (wait_for_invalidate(gt, rb))
                        drm_err_ratelimited(>->i915->drm,
 
        }
 }
 
+/*
+ * Although the rest of the driver should use MCR-specific functions to
+ * read/write MCR registers, we still use the regular intel_uncore_* functions
+ * internally to implement those, so we need a way for the functions in this
+ * file to "cast" an i915_mcr_reg_t into an i915_reg_t.
+ */
+static i915_reg_t mcr_reg_cast(const i915_mcr_reg_t mcr)
+{
+       i915_reg_t r = { .reg = mcr.reg };
+
+       return r;
+}
+
 /*
  * rw_with_mcr_steering_fw - Access a register with specific MCR steering
  * @uncore: pointer to struct intel_uncore
  * Caller needs to make sure the relevant forcewake wells are up.
  */
 static u32 rw_with_mcr_steering_fw(struct intel_uncore *uncore,
-                                  i915_reg_t reg, u8 rw_flag,
+                                  i915_mcr_reg_t reg, u8 rw_flag,
                                   int group, int instance, u32 value)
 {
        u32 mcr_mask, mcr_ss, mcr, old_mcr, val = 0;
        intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
 
        if (rw_flag == FW_REG_READ)
-               val = intel_uncore_read_fw(uncore, reg);
+               val = intel_uncore_read_fw(uncore, mcr_reg_cast(reg));
        else
-               intel_uncore_write_fw(uncore, reg, value);
+               intel_uncore_write_fw(uncore, mcr_reg_cast(reg), value);
 
        mcr &= ~mcr_mask;
        mcr |= old_mcr & mcr_mask;
 }
 
 static u32 rw_with_mcr_steering(struct intel_uncore *uncore,
-                               i915_reg_t reg, u8 rw_flag,
+                               i915_mcr_reg_t reg, u8 rw_flag,
                                int group, int instance,
                                u32 value)
 {
        enum forcewake_domains fw_domains;
        u32 val;
 
-       fw_domains = intel_uncore_forcewake_for_reg(uncore, reg,
+       fw_domains = intel_uncore_forcewake_for_reg(uncore, mcr_reg_cast(reg),
                                                    rw_flag);
        fw_domains |= intel_uncore_forcewake_for_reg(uncore,
                                                     GEN8_MCR_SELECTOR,
  * group/instance.
  */
 u32 intel_gt_mcr_read(struct intel_gt *gt,
-                     i915_reg_t reg,
+                     i915_mcr_reg_t reg,
                      int group, int instance)
 {
        return rw_with_mcr_steering(gt->uncore, reg, FW_REG_READ, group, instance, 0);
  * Write an MCR register in unicast mode after steering toward a specific
  * group/instance.
  */
-void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_reg_t reg, u32 value,
+void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_mcr_reg_t reg, u32 value,
                                int group, int instance)
 {
        rw_with_mcr_steering(gt->uncore, reg, FW_REG_WRITE, group, instance, value);
  * Write an MCR register in multicast mode to update all instances.
  */
 void intel_gt_mcr_multicast_write(struct intel_gt *gt,
-                               i915_reg_t reg, u32 value)
+                                 i915_mcr_reg_t reg, u32 value)
 {
-       intel_uncore_write(gt->uncore, reg, value);
+       intel_uncore_write(gt->uncore, mcr_reg_cast(reg), value);
 }
 
 /**
  * domains; use intel_gt_mcr_multicast_write() in cases where forcewake should
  * be obtained automatically.
  */
-void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, i915_reg_t reg, u32 value)
+void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, i915_mcr_reg_t reg, u32 value)
 {
-       intel_uncore_write_fw(gt->uncore, reg, value);
+       intel_uncore_write_fw(gt->uncore, mcr_reg_cast(reg), value);
 }
 
 /**
  *
  * Returns the old (unmodified) value read.
  */
-u32 intel_gt_mcr_multicast_rmw(struct intel_gt *gt, i915_reg_t reg,
+u32 intel_gt_mcr_multicast_rmw(struct intel_gt *gt, i915_mcr_reg_t reg,
                               u32 clear, u32 set)
 {
        u32 val = intel_gt_mcr_read_any(gt, reg);
  * for @type steering too.
  */
 static bool reg_needs_read_steering(struct intel_gt *gt,
-                                   i915_reg_t reg,
+                                   i915_mcr_reg_t reg,
                                    enum intel_steering_type type)
 {
        const u32 offset = i915_mmio_reg_offset(reg);
  * steering.
  */
 void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt,
-                                            i915_reg_t reg,
+                                            i915_mcr_reg_t reg,
                                             u8 *group, u8 *instance)
 {
        int type;
  *
  * Returns the value from a non-terminated instance of @reg.
  */
-u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg)
+u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_mcr_reg_t reg)
 {
        int type;
        u8 group, instance;
                }
        }
 
-       return intel_uncore_read_fw(gt->uncore, reg);
+       return intel_uncore_read_fw(gt->uncore, mcr_reg_cast(reg));
 }
 
 /**
  *
  * Returns the value from a non-terminated instance of @reg.
  */
-u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_reg_t reg)
+u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_mcr_reg_t reg)
 {
        int type;
        u8 group, instance;
                }
        }
 
-       return intel_uncore_read(gt->uncore, reg);
+       return intel_uncore_read(gt->uncore, mcr_reg_cast(reg));
 }
 
 static void report_steering_type(struct drm_printer *p,
  * Return: 0 if the register matches the desired condition, or -ETIMEDOUT.
  */
 int intel_gt_mcr_wait_for_reg_fw(struct intel_gt *gt,
-                                i915_reg_t reg,
+                                i915_mcr_reg_t reg,
                                 u32 mask,
                                 u32 value,
                                 unsigned int fast_timeout_us,
 
 void intel_gt_mcr_init(struct intel_gt *gt);
 
 u32 intel_gt_mcr_read(struct intel_gt *gt,
-                     i915_reg_t reg,
+                     i915_mcr_reg_t reg,
                      int group, int instance);
-u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg);
-u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_reg_t reg);
+u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_mcr_reg_t reg);
+u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_mcr_reg_t reg);
 
 void intel_gt_mcr_unicast_write(struct intel_gt *gt,
-                               i915_reg_t reg, u32 value,
+                               i915_mcr_reg_t reg, u32 value,
                                int group, int instance);
 void intel_gt_mcr_multicast_write(struct intel_gt *gt,
-                                 i915_reg_t reg, u32 value);
+                                 i915_mcr_reg_t reg, u32 value);
 void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt,
-                                    i915_reg_t reg, u32 value);
+                                    i915_mcr_reg_t reg, u32 value);
 
-u32 intel_gt_mcr_multicast_rmw(struct intel_gt *gt, i915_reg_t reg,
+u32 intel_gt_mcr_multicast_rmw(struct intel_gt *gt, i915_mcr_reg_t reg,
                               u32 clear, u32 set);
 
 void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt,
-                                            i915_reg_t reg,
+                                            i915_mcr_reg_t reg,
                                             u8 *group, u8 *instance);
 
 void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt,
                                  unsigned int *group, unsigned int *instance);
 
 int intel_gt_mcr_wait_for_reg_fw(struct intel_gt *gt,
-                                i915_reg_t reg,
+                                i915_mcr_reg_t reg,
                                 u32 mask,
                                 u32 value,
                                 unsigned int fast_timeout_us,
 
 
 #include "i915_reg_defs.h"
 
-#define MCR_REG(offset)        _MMIO(offset)
+#define MCR_REG(offset)        ((const i915_mcr_reg_t){ .reg = (offset) })
+
+/*
+ * The perf control registers are technically multicast registers, but the
+ * driver never needs to read/write them directly; we only use them to build
+ * lists of registers (where they're mixed in with other non-MCR registers)
+ * and then operate on the offset directly.  For now we'll just define them
+ * as non-multicast so we can place them on the same list, but we may want
+ * to try to come up with a better way to handle heterogeneous lists of
+ * registers in the future.
+ */
+#define PERF_REG(offset)                       _MMIO(offset)
 
 /* RPM unit config (Gen8+) */
 #define RPM_CONFIG0                            _MMIO(0xd00)
 #define   FLOAT_BLEND_OPTIMIZATION_ENABLE      REG_BIT(4)
 #define   ENABLE_PREFETCH_INTO_IC              REG_BIT(3)
 
-#define EU_PERF_CNTL0                          MCR_REG(0xe458)
-#define EU_PERF_CNTL4                          MCR_REG(0xe45c)
+#define EU_PERF_CNTL0                          PERF_REG(0xe458)
+#define EU_PERF_CNTL4                          PERF_REG(0xe45c)
 
 #define GEN9_ROW_CHICKEN4                      MCR_REG(0xe48c)
 #define   GEN12_DISABLE_GRF_CLEAR              REG_BIT(13)
 #define   STACKID_CTRL                         REG_GENMASK(6, 5)
 #define   STACKID_CTRL_512                     REG_FIELD_PREP(STACKID_CTRL, 0x2)
 
-#define EU_PERF_CNTL1                          MCR_REG(0xe558)
-#define EU_PERF_CNTL5                          MCR_REG(0xe55c)
+#define EU_PERF_CNTL1                          PERF_REG(0xe558)
+#define EU_PERF_CNTL5                          PERF_REG(0xe55c)
 
 #define XEHP_HDC_CHICKEN0                      MCR_REG(0xe5f0)
 #define   LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK       REG_GENMASK(13, 11)
 #define ICL_HDC_MODE                           MCR_REG(0xe5f4)
 
-#define EU_PERF_CNTL2                          MCR_REG(0xe658)
-#define EU_PERF_CNTL6                          MCR_REG(0xe65c)
-#define EU_PERF_CNTL3                          MCR_REG(0xe758)
+#define EU_PERF_CNTL2                          PERF_REG(0xe658)
+#define EU_PERF_CNTL6                          PERF_REG(0xe65c)
+#define EU_PERF_CNTL3                          PERF_REG(0xe758)
 
 #define LSC_CHICKEN_BIT_0                      MCR_REG(0xe7c8)
 #define   DISABLE_D8_D16_COASLESCE             REG_BIT(30)
 
        _wa_add(wal, &wa);
 }
 
-static void wa_mcr_add(struct i915_wa_list *wal, i915_reg_t reg,
+static void wa_mcr_add(struct i915_wa_list *wal, i915_mcr_reg_t reg,
                       u32 clear, u32 set, u32 read_mask, bool masked_reg)
 {
        struct i915_wa wa = {
-               .reg  = reg,
+               .mcr_reg = reg,
                .clr  = clear,
                .set  = set,
                .read = read_mask,
 }
 
 static void
-wa_mcr_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
+wa_mcr_write_clr_set(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clear, u32 set)
 {
        wa_mcr_add(wal, reg, clear, set, clear, false);
 }
 }
 
 static void
-wa_mcr_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
+wa_mcr_write_or(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 set)
 {
        wa_mcr_write_clr_set(wal, reg, set, set);
 }
 }
 
 static void
-wa_mcr_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
+wa_mcr_write_clr(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clr)
 {
        wa_mcr_write_clr_set(wal, reg, clr, 0);
 }
 }
 
 static void
-wa_mcr_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
+wa_mcr_masked_en(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 val)
 {
        wa_mcr_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
 }
 }
 
 static void
-wa_mcr_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
+wa_mcr_masked_dis(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 val)
 {
        wa_mcr_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
 }
 }
 
 static void
-wa_mcr_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
+wa_mcr_masked_field_set(struct i915_wa_list *wal, i915_mcr_reg_t reg,
                        u32 mask, u32 val)
 {
        wa_mcr_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
                /* open-coded rmw due to steering */
                if (wa->clr)
                        old = wa->is_mcr ?
-                               intel_gt_mcr_read_any_fw(gt, wa->reg) :
+                               intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
                                intel_uncore_read_fw(uncore, wa->reg);
                val = (old & ~wa->clr) | wa->set;
                if (val != old || !wa->clr) {
                        if (wa->is_mcr)
-                               intel_gt_mcr_multicast_write_fw(gt, wa->reg, val);
+                               intel_gt_mcr_multicast_write_fw(gt, wa->mcr_reg, val);
                        else
                                intel_uncore_write_fw(uncore, wa->reg, val);
                }
 
                if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
                        u32 val = wa->is_mcr ?
-                               intel_gt_mcr_read_any_fw(gt, wa->reg) :
+                               intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
                                intel_uncore_read_fw(uncore, wa->reg);
 
                        wa_verify(wa, val, wal->name, "application");
 
        for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
                ok &= wa_verify(wa, wa->is_mcr ?
-                               intel_gt_mcr_read_any_fw(gt, wa->reg) :
+                               intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
                                intel_uncore_read_fw(uncore, wa->reg),
                                wal->name, from);
 
 }
 
 static void
-whitelist_mcr_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
+whitelist_mcr_reg_ext(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 flags)
 {
        struct i915_wa wa = {
-               .reg = reg,
+               .mcr_reg = reg,
                .is_mcr = 1,
        };
 
        if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
                return;
 
-       wa.reg.reg |= flags;
+       wa.mcr_reg.reg |= flags;
        _wa_add(wal, &wa);
 }
 
 }
 
 static void
-whitelist_mcr_reg(struct i915_wa_list *wal, i915_reg_t reg)
+whitelist_mcr_reg(struct i915_wa_list *wal, i915_mcr_reg_t reg)
 {
        whitelist_mcr_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
 }
 
 #include "i915_reg_defs.h"
 
 struct i915_wa {
-       i915_reg_t      reg;
+       union {
+               i915_reg_t      reg;
+               i915_mcr_reg_t  mcr_reg;
+       };
        u32             clr;
        u32             set;
        u32             read;
 
        /* Alas, we must pardon some whitelists. Mistakes already made */
        static const struct regmask pardon[] = {
                { GEN9_CTX_PREEMPT_REG, 9 },
-               { GEN8_L3SQCREG4, 9 },
+               { _MMIO(0xb118), 9 }, /* GEN8_L3SQCREG4 */
        };
 
        return find_reg(i915, reg, pardon, ARRAY_SIZE(pardon));
 
 
 static long __must_check guc_mcr_reg_add(struct intel_gt *gt,
                                         struct temp_regset *regset,
-                                        i915_reg_t reg, u32 flags)
+                                        i915_mcr_reg_t reg, u32 flags)
 {
        u8 group, inst;
 
 
 
 struct __ext_steer_reg {
        const char *name;
-       i915_reg_t reg;
+       i915_mcr_reg_t reg;
 };
 
 static const struct __ext_steer_reg xe_extregs[] = {
                           const struct __ext_steer_reg *extlist,
                           int slice_id, int subslice_id)
 {
-       ext->reg = extlist->reg;
+       ext->reg = _MMIO(i915_mmio_reg_offset(extlist->reg));
        ext->flags = FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id);
        ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id);
        ext->regname = extlist->name;
 
        _MMIO(0x770c),
        _MMIO(0x83a8),
        _MMIO(0xb110),
-       GEN8_L3SQCREG4,//_MMIO(0xb118)
+       _MMIO(0xb118),
        _MMIO(0xe100),
        _MMIO(0xe18c),
        _MMIO(0xe48c),
 
        {RCS0, GEN8_CS_CHICKEN1, 0xffff, true}, /* 0x2580 */
        {RCS0, COMMON_SLICE_CHICKEN2, 0xffff, true}, /* 0x7014 */
        {RCS0, GEN9_CS_DEBUG_MODE1, 0xffff, false}, /* 0x20ec */
-       {RCS0, GEN8_L3SQCREG4, 0, false}, /* 0xb118 */
-       {RCS0, GEN9_SCRATCH1, 0, false}, /* 0xb11c */
+       {RCS0, _MMIO(0xb118), 0, false}, /* GEN8_L3SQCREG4 */
+       {RCS0, _MMIO(0xb11c), 0, false}, /* GEN9_SCRATCH1 */
        {RCS0, GEN9_SCRATCH_LNCF1, 0, false}, /* 0xb008 */
        {RCS0, GEN7_HALF_SLICE_CHICKEN1, 0xffff, true}, /* 0xe100 */
-       {RCS0, HALF_SLICE_CHICKEN2, 0xffff, true}, /* 0xe180 */
-       {RCS0, GEN8_HALF_SLICE_CHICKEN3, 0xffff, true}, /* 0xe184 */
-       {RCS0, GEN9_HALF_SLICE_CHICKEN5, 0xffff, true}, /* 0xe188 */
-       {RCS0, GEN9_HALF_SLICE_CHICKEN7, 0xffff, true}, /* 0xe194 */
-       {RCS0, GEN8_ROW_CHICKEN, 0xffff, true}, /* 0xe4f0 */
+       {RCS0, _MMIO(0xe180), 0xffff, true}, /* HALF_SLICE_CHICKEN2 */
+       {RCS0, _MMIO(0xe184), 0xffff, true}, /* GEN8_HALF_SLICE_CHICKEN3 */
+       {RCS0, _MMIO(0xe188), 0xffff, true}, /* GEN9_HALF_SLICE_CHICKEN5 */
+       {RCS0, _MMIO(0xe194), 0xffff, true}, /* GEN9_HALF_SLICE_CHICKEN7 */
+       {RCS0, _MMIO(0xe4f0), 0xffff, true}, /* GEN8_ROW_CHICKEN */
        {RCS0, TRVATTL3PTRDW(0), 0, true}, /* 0x4de0 */
        {RCS0, TRVATTL3PTRDW(1), 0, true}, /* 0x4de4 */
        {RCS0, TRNULLDETCT, 0, true}, /* 0x4de8 */
 
 
 #define _MMIO(r) ((const i915_reg_t){ .reg = (r) })
 
-#define INVALID_MMIO_REG _MMIO(0)
-
-static __always_inline u32 i915_mmio_reg_offset(i915_reg_t reg)
-{
-       return reg.reg;
-}
+typedef struct {
+       u32 reg;
+} i915_mcr_reg_t;
 
-static inline bool i915_mmio_reg_equal(i915_reg_t a, i915_reg_t b)
-{
-       return i915_mmio_reg_offset(a) == i915_mmio_reg_offset(b);
-}
+#define INVALID_MMIO_REG _MMIO(0)
 
-static inline bool i915_mmio_reg_valid(i915_reg_t reg)
-{
-       return !i915_mmio_reg_equal(reg, INVALID_MMIO_REG);
-}
+/*
+ * These macros can be used on either i915_reg_t or i915_mcr_reg_t since they're
+ * simply operations on the register's offset and don't care about the MCR vs
+ * non-MCR nature of the register.
+ */
+#define i915_mmio_reg_offset(r) \
+       _Generic((r), i915_reg_t: (r).reg, i915_mcr_reg_t: (r).reg)
+#define i915_mmio_reg_equal(a, b) (i915_mmio_reg_offset(a) == i915_mmio_reg_offset(b))
+#define i915_mmio_reg_valid(r) (!i915_mmio_reg_equal(r, INVALID_MMIO_REG))
 
 #define VLV_DISPLAY_BASE               0x180000