drm/i915/xehp: Check for faults on primary GAM
authorMatt Roper <matthew.d.roper@intel.com>
Fri, 14 Oct 2022 23:02:31 +0000 (16:02 -0700)
committerMatt Roper <matthew.d.roper@intel.com>
Mon, 17 Oct 2022 17:13:46 +0000 (10:13 -0700)
On Xe_HP the fault registers are now in a multicast register range.
However as part of the GAM these registers follow special rules and we
need only read from the "primary" GAM's instance to get the information
we need.  So a single intel_gt_mcr_read_any() (which will automatically
steer to the primary GAM) is sufficient; we don't need to loop over each
instance of the MCR register.

v2:
 - Update more instances of fault registers.  (Bala)

Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221014230239.1023689-7-matthew.d.roper@intel.com
drivers/gpu/drm/i915/gt/intel_gt.c
drivers/gpu/drm/i915/i915_gpu_error.c

index 445e171940fa28efae19aff081c545e00b95b7c4..e14f159ad9fc8e5178a29a30c4a90c602f11a711 100644 (file)
@@ -270,7 +270,11 @@ intel_gt_clear_error_registers(struct intel_gt *gt,
                                   I915_MASTER_ERROR_INTERRUPT);
        }
 
-       if (GRAPHICS_VER(i915) >= 12) {
+       if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+               intel_gt_mcr_multicast_rmw(gt, XEHP_RING_FAULT_REG,
+                                          RING_FAULT_VALID, 0);
+               intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG);
+       } else if (GRAPHICS_VER(i915) >= 12) {
                rmw_clear(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID);
                intel_uncore_posting_read(uncore, GEN12_RING_FAULT_REG);
        } else if (GRAPHICS_VER(i915) >= 8) {
@@ -308,17 +312,49 @@ static void gen6_check_faults(struct intel_gt *gt)
        }
 }
 
+static void xehp_check_faults(struct intel_gt *gt)
+{
+       u32 fault;
+
+       /*
+        * Although the fault register now lives in an MCR register range,
+        * the GAM registers are special and we only truly need to read
+        * the "primary" GAM instance rather than handling each instance
+        * individually.  intel_gt_mcr_read_any() will automatically steer
+        * toward the primary instance.
+        */
+       fault = intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG);
+       if (fault & RING_FAULT_VALID) {
+               u32 fault_data0, fault_data1;
+               u64 fault_addr;
+
+               fault_data0 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA0);
+               fault_data1 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA1);
+
+               fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
+                            ((u64)fault_data0 << 12);
+
+               drm_dbg(&gt->i915->drm, "Unexpected fault\n"
+                       "\tAddr: 0x%08x_%08x\n"
+                       "\tAddress space: %s\n"
+                       "\tEngine ID: %d\n"
+                       "\tSource ID: %d\n"
+                       "\tType: %d\n",
+                       upper_32_bits(fault_addr), lower_32_bits(fault_addr),
+                       fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
+                       GEN8_RING_FAULT_ENGINE_ID(fault),
+                       RING_FAULT_SRCID(fault),
+                       RING_FAULT_FAULT_TYPE(fault));
+       }
+}
+
 static void gen8_check_faults(struct intel_gt *gt)
 {
        struct intel_uncore *uncore = gt->uncore;
        i915_reg_t fault_reg, fault_data0_reg, fault_data1_reg;
        u32 fault;
 
-       if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) {
-               fault_reg = XEHP_RING_FAULT_REG;
-               fault_data0_reg = XEHP_FAULT_TLB_DATA0;
-               fault_data1_reg = XEHP_FAULT_TLB_DATA1;
-       } else if (GRAPHICS_VER(gt->i915) >= 12) {
+       if (GRAPHICS_VER(gt->i915) >= 12) {
                fault_reg = GEN12_RING_FAULT_REG;
                fault_data0_reg = GEN12_FAULT_TLB_DATA0;
                fault_data1_reg = GEN12_FAULT_TLB_DATA1;
@@ -358,7 +394,9 @@ void intel_gt_check_and_clear_faults(struct intel_gt *gt)
        struct drm_i915_private *i915 = gt->i915;
 
        /* From GEN8 onwards we only have one 'All Engine Fault Register' */
-       if (GRAPHICS_VER(i915) >= 8)
+       if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+               xehp_check_faults(gt);
+       else if (GRAPHICS_VER(i915) >= 8)
                gen8_check_faults(gt);
        else if (GRAPHICS_VER(i915) >= 6)
                gen6_check_faults(gt);
index 9ea2fe34e7d307f6a40c979ea5a9860a02f9dbdc..f2d53edcd2ee072561548046d8224209718e74f0 100644 (file)
@@ -1221,7 +1221,10 @@ static void engine_record_registers(struct intel_engine_coredump *ee)
        if (GRAPHICS_VER(i915) >= 6) {
                ee->rc_psmi = ENGINE_READ(engine, RING_PSMI_CTL);
 
-               if (GRAPHICS_VER(i915) >= 12)
+               if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+                       ee->fault_reg = intel_gt_mcr_read_any(engine->gt,
+                                                             XEHP_RING_FAULT_REG);
+               else if (GRAPHICS_VER(i915) >= 12)
                        ee->fault_reg = intel_uncore_read(engine->uncore,
                                                          GEN12_RING_FAULT_REG);
                else if (GRAPHICS_VER(i915) >= 8)
@@ -1820,7 +1823,12 @@ static void gt_record_global_regs(struct intel_gt_coredump *gt)
        if (GRAPHICS_VER(i915) == 7)
                gt->err_int = intel_uncore_read(uncore, GEN7_ERR_INT);
 
-       if (GRAPHICS_VER(i915) >= 12) {
+       if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+               gt->fault_data0 = intel_gt_mcr_read_any((struct intel_gt *)gt->_gt,
+                                                       XEHP_FAULT_TLB_DATA0);
+               gt->fault_data1 = intel_gt_mcr_read_any((struct intel_gt *)gt->_gt,
+                                                       XEHP_FAULT_TLB_DATA1);
+       } else if (GRAPHICS_VER(i915) >= 12) {
                gt->fault_data0 = intel_uncore_read(uncore,
                                                    GEN12_FAULT_TLB_DATA0);
                gt->fault_data1 = intel_uncore_read(uncore,