drm/xe/pvc: Use fast copy engines as migrate engine on PVC
authorNiranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Thu, 17 Aug 2023 05:14:10 +0000 (22:14 -0700)
committerRodrigo Vivi <rodrigo.vivi@intel.com>
Thu, 21 Dec 2023 16:40:28 +0000 (11:40 -0500)
Some copy hardware engine instances are faster than others on PVC.
Use a virtual engine of these plus the reserved instance for the migrate
engine on PVC. The idea being if a fast instance is available it will be
used and the throughput of kernel copies, clears, and pagefault
servicing will be higher.

v2: Use OOB WA, use all copy engines if no WA is required

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
drivers/gpu/drm/xe/Makefile
drivers/gpu/drm/xe/xe_migrate.c
drivers/gpu/drm/xe/xe_wa_oob.rules

index b470c2394476fb97d53afc4e163c3d8f8c2015a5..be93745e8a30cfe7ccd9f8ce55d43088f8122839 100644 (file)
@@ -40,7 +40,7 @@ quiet_cmd_wa_oob = GEN     $(notdir $(generated_oob))
 $(generated_oob) &: $(obj)/xe_gen_wa_oob $(srctree)/$(src)/xe_wa_oob.rules
        $(call cmd,wa_oob)
 
-$(obj)/xe_guc.o $(obj)/xe_wa.o $(obj)/xe_ring_ops.o $(obj)/xe_vm.o: $(generated_oob)
+$(obj)/xe_guc.o $(obj)/xe_migrate.o $(obj)/xe_ring_ops.o $(obj)/xe_vm.o $(obj)/xe_wa.o: $(generated_oob)
 
 # Please keep these build lists sorted!
 
index 6e0d4e2c497ab1c97d40c42778010063790974b6..799ad020927955d11144e8982f7ed730a0e5658d 100644 (file)
@@ -12,6 +12,7 @@
 #include <drm/ttm/ttm_tt.h>
 #include <drm/xe_drm.h>
 
+#include "generated/xe_wa_oob.h"
 #include "regs/xe_gpu_commands.h"
 #include "tests/xe_test.h"
 #include "xe_bb.h"
@@ -29,6 +30,7 @@
 #include "xe_sync.h"
 #include "xe_trace.h"
 #include "xe_vm.h"
+#include "xe_wa.h"
 
 /**
  * struct xe_migrate - migrate context.
@@ -298,6 +300,32 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
        return 0;
 }
 
+/*
+ * Due to workaround 16017236439, odd instance hardware copy engines are
+ * faster than even instance ones.
+ * This function returns the mask involving all fast copy engines and the
+ * reserved copy engine to be used as logical mask for migrate engine.
+ * Including the reserved copy engine is required to avoid deadlocks due to
+ * migrate jobs servicing the faults gets stuck behind the job that faulted.
+ */
+static u32 xe_migrate_usm_logical_mask(struct xe_gt *gt)
+{
+       u32 logical_mask = 0;
+       struct xe_hw_engine *hwe;
+       enum xe_hw_engine_id id;
+
+       for_each_hw_engine(hwe, gt, id) {
+               if (hwe->class != XE_ENGINE_CLASS_COPY)
+                       continue;
+
+               if (!XE_WA(gt, 16017236439) ||
+                   xe_gt_is_usm_hwe(gt, hwe) || hwe->instance & 1)
+                       logical_mask |= BIT(hwe->logical_instance);
+       }
+
+       return logical_mask;
+}
+
 /**
  * xe_migrate_init() - Initialize a migrate context
  * @tile: Back-pointer to the tile we're initializing for.
@@ -338,12 +366,12 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
                                                           XE_ENGINE_CLASS_COPY,
                                                           primary_gt->usm.reserved_bcs_instance,
                                                           false);
-               if (!hwe)
+               u32 logical_mask = xe_migrate_usm_logical_mask(primary_gt);
+
+               if (!hwe || !logical_mask)
                        return ERR_PTR(-EINVAL);
 
-               m->q = xe_exec_queue_create(xe, vm,
-                                           BIT(hwe->logical_instance), 1,
-                                           hwe,
+               m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe,
                                            EXEC_QUEUE_FLAG_KERNEL |
                                            EXEC_QUEUE_FLAG_PERMANENT);
        } else {
index ea90dcc933b59f3a065d415015b7a8bcafd02f66..599e67169dae65d810be77797d6033d887004d6f 100644 (file)
@@ -17,3 +17,4 @@
 1409600907     GRAPHICS_VERSION_RANGE(1200, 1250)
 14016763929    SUBPLATFORM(DG2, G10)
                SUBPLATFORM(DG2, G12)
+16017236439    PLATFORM(PVC)