drm/xe/xe2: Update MOCS fields in blitter instructions
authorHaridhar Kalvala <haridhar.kalvala@intel.com>
Fri, 29 Sep 2023 21:36:40 +0000 (14:36 -0700)
committerRodrigo Vivi <rodrigo.vivi@intel.com>
Thu, 21 Dec 2023 16:42:08 +0000 (11:42 -0500)
Xe2 changes or adds bits for mocs in a few BLT instructions:
XY_CTRL_SURF_COPY_BLT, XY_FAST_COLOR_BLT, XY_FAST_COPY_BLT, and MEM_SET.
Modify the code to deal with the new location. Unlike Xe1, the MOCS
field in those instructions is only the MOCS index and not the
Structure_MEMORY_OBJECT_CONTROL_STATE anymore. The pxp bit is now
explicitly documented separately.

Bspec: 57567,57566,57565,57562
Cc: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Haridhar Kalvala <haridhar.kalvala@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://lore.kernel.org/r/20230929213640.3189912-5-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
drivers/gpu/drm/xe/regs/xe_gpu_commands.h
drivers/gpu/drm/xe/xe_migrate.c

index cc7b56763f1005159dda70262d37482a1c737c28..21738281bdd04913abaebda2c977c9afd4db026c 100644 (file)
@@ -45,6 +45,7 @@
 #define   CCS_SIZE_MASK                        0x3FF
 #define   CCS_SIZE_SHIFT               8
 #define   XY_CTRL_SURF_MOCS_MASK       GENMASK(31, 26)
+#define   XE2_XY_CTRL_SURF_MOCS_INDEX_MASK     GENMASK(31, 28)
 #define   NUM_CCS_BYTES_PER_BLOCK      256
 #define   NUM_BYTES_PER_CCS_BYTE       256
 #define   NUM_CCS_BLKS_PER_XFER                1024
 #define   XY_FAST_COLOR_BLT_DEPTH_32   (2 << 19)
 #define   XY_FAST_COLOR_BLT_DW         16
 #define   XY_FAST_COLOR_BLT_MOCS_MASK  GENMASK(27, 22)
+#define   XE2_XY_FAST_COLOR_BLT_MOCS_INDEX_MASK        GENMASK(27, 24)
 #define   XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31
 
 #define XY_FAST_COPY_BLT_CMD           (2 << 29 | 0x42 << 22)
 #define   XY_FAST_COPY_BLT_DEPTH_32    (3<<24)
 #define   XY_FAST_COPY_BLT_D1_SRC_TILE4        REG_BIT(31)
 #define   XY_FAST_COPY_BLT_D1_DST_TILE4        REG_BIT(30)
+#define   XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK GENMASK(23, 20)
 
 #define        PVC_MEM_SET_CMD         (2 << 29 | 0x5b << 22)
 #define   PVC_MEM_SET_CMD_LEN_DW       7
@@ -66,6 +69,7 @@
 #define   PVC_MEM_SET_DATA_FIELD       GENMASK(31, 24)
 /* Bspec lists field as [6:0], but index alone is from [6:1] */
 #define   PVC_MEM_SET_MOCS_INDEX_MASK  GENMASK(6, 1)
+#define   XE2_MEM_SET_MOCS_INDEX_MASK  GENMASK(6, 3)
 
 #define GFX_OP_PIPE_CONTROL(len)       ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
 
index 69488a0fada477e69db612c9973f18c33a63ae45..4b7210c793f5fde63e12eec759da15d28bb86b5b 100644 (file)
@@ -517,23 +517,28 @@ static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb,
                          u64 src_ofs, bool src_is_indirect,
                          u32 size)
 {
+       struct xe_device *xe = gt_to_xe(gt);
        u32 *cs = bb->cs + bb->len;
        u32 num_ccs_blks;
-       u32 mocs = gt->mocs.uc_index;
+       u32 mocs;
 
        num_ccs_blks = DIV_ROUND_UP(xe_device_ccs_bytes(gt_to_xe(gt), size),
                                    NUM_CCS_BYTES_PER_BLOCK);
        xe_gt_assert(gt, num_ccs_blks <= NUM_CCS_BLKS_PER_XFER);
+
+       if (GRAPHICS_VERx100(xe) >= 2000)
+               mocs = FIELD_PREP(XE2_XY_CTRL_SURF_MOCS_INDEX_MASK, gt->mocs.uc_index);
+       else
+               mocs = FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, gt->mocs.uc_index);
+
        *cs++ = XY_CTRL_SURF_COPY_BLT |
                (src_is_indirect ? 0x0 : 0x1) << SRC_ACCESS_TYPE_SHIFT |
                (dst_is_indirect ? 0x0 : 0x1) << DST_ACCESS_TYPE_SHIFT |
                ((num_ccs_blks - 1) & CCS_SIZE_MASK) << CCS_SIZE_SHIFT;
        *cs++ = lower_32_bits(src_ofs);
-       *cs++ = upper_32_bits(src_ofs) |
-               FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs);
+       *cs++ = upper_32_bits(src_ofs) | mocs;
        *cs++ = lower_32_bits(dst_ofs);
-       *cs++ = upper_32_bits(dst_ofs) |
-               FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs);
+       *cs++ = upper_32_bits(dst_ofs) | mocs;
 
        bb->len = cs - bb->cs;
 }
@@ -544,24 +549,27 @@ static void emit_copy(struct xe_gt *gt, struct xe_bb *bb,
                      unsigned int pitch)
 {
        struct xe_device *xe = gt_to_xe(gt);
+       u32 mocs = 0;
+       u32 tile_y = 0;
 
        xe_gt_assert(gt, size / pitch <= S16_MAX);
        xe_gt_assert(gt, pitch / 4 <= S16_MAX);
        xe_gt_assert(gt, pitch <= U16_MAX);
 
-       bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (10 - 2);
+       if (GRAPHICS_VER(xe) >= 20)
+               mocs = FIELD_PREP(XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK, gt->mocs.uc_index);
+
        if (GRAPHICS_VERx100(xe) >= 1250)
-               bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch |
-                                   XY_FAST_COPY_BLT_D1_SRC_TILE4 |
-                                   XY_FAST_COPY_BLT_D1_DST_TILE4;
-       else
-               bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch;
+               tile_y = XY_FAST_COPY_BLT_D1_SRC_TILE4 | XY_FAST_COPY_BLT_D1_DST_TILE4;
+
+       bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (10 - 2);
+       bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch | tile_y | mocs;
        bb->cs[bb->len++] = 0;
        bb->cs[bb->len++] = (size / pitch) << 16 | pitch / 4;
        bb->cs[bb->len++] = lower_32_bits(dst_ofs);
        bb->cs[bb->len++] = upper_32_bits(dst_ofs);
        bb->cs[bb->len++] = 0;
-       bb->cs[bb->len++] = pitch;
+       bb->cs[bb->len++] = pitch | mocs;
        bb->cs[bb->len++] = lower_32_bits(src_ofs);
        bb->cs[bb->len++] = upper_32_bits(src_ofs);
 }
@@ -812,8 +820,8 @@ err_sync:
 static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
                                 u32 size, u32 pitch)
 {
+       struct xe_device *xe = gt_to_xe(gt);
        u32 *cs = bb->cs + bb->len;
-       u32 mocs = gt->mocs.uc_index;
        u32 len = PVC_MEM_SET_CMD_LEN_DW;
 
        *cs++ = PVC_MEM_SET_CMD | PVC_MEM_SET_MATRIX | (len - 2);
@@ -822,7 +830,10 @@ static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs
        *cs++ = pitch - 1;
        *cs++ = lower_32_bits(src_ofs);
        *cs++ = upper_32_bits(src_ofs);
-       *cs++ = FIELD_PREP(PVC_MEM_SET_MOCS_INDEX_MASK, mocs);
+       if (GRAPHICS_VERx100(xe) >= 2000)
+               *cs++ = FIELD_PREP(XE2_MEM_SET_MOCS_INDEX_MASK, gt->mocs.uc_index);
+       else
+               *cs++ = FIELD_PREP(PVC_MEM_SET_MOCS_INDEX_MASK, gt->mocs.uc_index);
 
        xe_gt_assert(gt, cs - bb->cs == len + bb->len);
 
@@ -835,15 +846,18 @@ static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb,
        struct xe_device *xe = gt_to_xe(gt);
        u32 *cs = bb->cs + bb->len;
        u32 len = XY_FAST_COLOR_BLT_DW;
-       u32 mocs = gt->mocs.uc_index;
 
        if (GRAPHICS_VERx100(xe) < 1250)
                len = 11;
 
        *cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
                (len - 2);
-       *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) |
-               (pitch - 1);
+       if (GRAPHICS_VERx100(xe) >= 2000)
+               *cs++ = FIELD_PREP(XE2_XY_FAST_COLOR_BLT_MOCS_INDEX_MASK, gt->mocs.uc_index) |
+                       (pitch - 1);
+       else
+               *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, gt->mocs.uc_index) |
+                       (pitch - 1);
        *cs++ = 0;
        *cs++ = (size / pitch) << 16 | pitch / 4;
        *cs++ = lower_32_bits(src_ofs);