drm/xe/xe2: Handle flat ccs move for igfx.
authorHimal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Tue, 12 Dec 2023 18:25:31 +0000 (23:55 +0530)
committerRodrigo Vivi <rodrigo.vivi@intel.com>
Thu, 21 Dec 2023 16:46:15 +0000 (11:46 -0500)
- Clear flat ccs during user bo creation.
- copy ccs meta data between flat ccs and bo during eviction and
restore.
- Add a bool field ccs_cleared in bo, true means ccs region of bo is
already cleared.

v2:
 - Rebase.

v3:
 - Maintain order of xe_bo_move_notify for ttm_bo_type_sg.

v4:
 - xe_migrate_copy can be used to copy src to dst bo on igfx too.
Add a bool which handles only ccs metadata copy.

v5:
- on dgfx ccs should be cleared even if the bo is not compression enabled.

Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
drivers/gpu/drm/xe/tests/xe_migrate.c
drivers/gpu/drm/xe/xe_bo.c
drivers/gpu/drm/xe/xe_bo_types.h
drivers/gpu/drm/xe/xe_migrate.c
drivers/gpu/drm/xe/xe_migrate.h

index d6c23441632a44597c7aaf048313cd33c4414983..7a32faa2f68880dcd4cb5b217ff9986153b990cb 100644 (file)
@@ -152,7 +152,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
 
        expected = 0xc0c0c0c0c0c0c0c0;
        fence = xe_migrate_copy(m, remote, bo, remote->ttm.resource,
-                               bo->ttm.resource);
+                               bo->ttm.resource, false);
        if (!sanity_fence_failed(xe, fence, big ? "Copying big bo remote -> vram" :
                                 "Copying small bo remote -> vram", test)) {
                retval = xe_map_rd(xe, &bo->vmap, 0, u64);
@@ -169,7 +169,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
        xe_map_memset(xe, &bo->vmap, 0, 0xc0, bo->size);
 
        fence = xe_migrate_copy(m, bo, remote, bo->ttm.resource,
-                               remote->ttm.resource);
+                               remote->ttm.resource, false);
        if (!sanity_fence_failed(xe, fence, big ? "Copying big bo vram -> remote" :
                                 "Copying small bo vram -> remote", test)) {
                retval = xe_map_rd(xe, &remote->vmap, 0, u64);
index c10aa5a63a701116f75590c0304df79db62ac359..7c0037aecff3a1c46f49f8d9df229db8c1f4a574 100644 (file)
@@ -627,10 +627,11 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
        bool move_lacks_source;
        bool tt_has_data;
        bool needs_clear;
+       bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
+                                 ttm && ttm_tt_is_populated(ttm)) ? true : false;
        int ret = 0;
-
-       /* Bo creation path, moving to system or TT. No clearing required. */
-       if (!old_mem && ttm) {
+       /* Bo creation path, moving to system or TT. */
+       if ((!old_mem && ttm) && !handle_system_ccs) {
                ttm_bo_move_null(ttm_bo, new_mem);
                return 0;
        }
@@ -645,14 +646,18 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
        tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
                              (ttm->page_flags & TTM_TT_FLAG_SWAPPED));
 
-       move_lacks_source = !mem_type_is_vram(old_mem_type) && !tt_has_data;
+       move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared)  :
+                                               (!mem_type_is_vram(old_mem_type) && !tt_has_data);
 
        needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
                (!ttm && ttm_bo->type == ttm_bo_type_device);
 
-       if ((move_lacks_source && !needs_clear) ||
-           (old_mem_type == XE_PL_SYSTEM &&
-            new_mem->mem_type == XE_PL_TT)) {
+       if ((move_lacks_source && !needs_clear)) {
+               ttm_bo_move_null(ttm_bo, new_mem);
+               goto out;
+       }
+
+       if (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT && !handle_system_ccs) {
                ttm_bo_move_null(ttm_bo, new_mem);
                goto out;
        }
@@ -683,8 +688,11 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
                        ret = timeout;
                        goto out;
                }
-               ttm_bo_move_null(ttm_bo, new_mem);
-               goto out;
+
+               if (!handle_system_ccs) {
+                       ttm_bo_move_null(ttm_bo, new_mem);
+                       goto out;
+               }
        }
 
        if (!move_lacks_source &&
@@ -705,6 +713,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
                migrate = mem_type_to_migrate(xe, new_mem->mem_type);
        else if (mem_type_is_vram(old_mem_type))
                migrate = mem_type_to_migrate(xe, old_mem_type);
+       else
+               migrate = xe->tiles[0].migrate;
 
        xe_assert(xe, migrate);
 
@@ -747,8 +757,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
                if (move_lacks_source)
                        fence = xe_migrate_clear(migrate, bo, new_mem);
                else
-                       fence = xe_migrate_copy(migrate,
-                                               bo, bo, old_mem, new_mem);
+                       fence = xe_migrate_copy(migrate, bo, bo, old_mem,
+                                               new_mem, handle_system_ccs);
                if (IS_ERR(fence)) {
                        ret = PTR_ERR(fence);
                        xe_device_mem_access_put(xe);
@@ -1234,6 +1244,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
                        return bo;
        }
 
+       bo->ccs_cleared = false;
        bo->tile = tile;
        bo->size = size;
        bo->flags = flags;
index f71dbc5189586359589ce76015666131d69ae33b..64c2249a4e407995c2bd5057bb51a952c288971c 100644 (file)
@@ -79,6 +79,10 @@ struct xe_bo {
        struct llist_node freed;
        /** @created: Whether the bo has passed initial creation */
        bool created;
+
+       /** @ccs_cleared */
+       bool ccs_cleared;
+
        /**
         * @cpu_caching: CPU caching mode. Currently only used for userspace
         * objects.
index 48ada083d0b3fb150721a23e3966e80ed18e16ca..adf1dab5eba253297fb8b4ae4c2c5b5f15b2ec7a 100644 (file)
@@ -577,14 +577,14 @@ static u64 xe_migrate_batch_base(struct xe_migrate *m, bool usm)
 
 static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
                               struct xe_bb *bb,
-                              u64 src_ofs, bool src_is_vram,
-                              u64 dst_ofs, bool dst_is_vram, u32 dst_size,
+                              u64 src_ofs, bool src_is_indirect,
+                              u64 dst_ofs, bool dst_is_indirect, u32 dst_size,
                               u64 ccs_ofs, bool copy_ccs)
 {
        struct xe_gt *gt = m->tile->primary_gt;
        u32 flush_flags = 0;
 
-       if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_vram) {
+       if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_indirect) {
                /*
                 * If the src is already in vram, then it should already
                 * have been cleared by us, or has been populated by the
@@ -593,28 +593,24 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
                 * Otherwise if the bo doesn't have any CCS metadata attached,
                 * we still need to clear it for security reasons.
                 */
-               u64 ccs_src_ofs =  src_is_vram ? src_ofs : m->cleared_mem_ofs;
+               u64 ccs_src_ofs =  src_is_indirect ? src_ofs : m->cleared_mem_ofs;
 
                emit_copy_ccs(gt, bb,
                              dst_ofs, true,
-                             ccs_src_ofs, src_is_vram, dst_size);
+                             ccs_src_ofs, src_is_indirect, dst_size);
 
                flush_flags = MI_FLUSH_DW_CCS;
        } else if (copy_ccs) {
-               if (!src_is_vram)
+               if (!src_is_indirect)
                        src_ofs = ccs_ofs;
-               else if (!dst_is_vram)
+               else if (!dst_is_indirect)
                        dst_ofs = ccs_ofs;
 
-               /*
-                * At the moment, we don't support copying CCS metadata from
-                * system to system.
-                */
-               xe_gt_assert(gt, src_is_vram || dst_is_vram);
+               xe_gt_assert(gt, src_is_indirect || dst_is_indirect);
 
-               emit_copy_ccs(gt, bb, dst_ofs, dst_is_vram, src_ofs,
-                             src_is_vram, dst_size);
-               if (dst_is_vram)
+               emit_copy_ccs(gt, bb, dst_ofs, dst_is_indirect, src_ofs,
+                             src_is_indirect, dst_size);
+               if (dst_is_indirect)
                        flush_flags = MI_FLUSH_DW_CCS;
        }
 
@@ -630,6 +626,7 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
  * the buffer object @dst is currently bound to.
  * @src: The source TTM resource.
  * @dst: The dst TTM resource.
+ * @copy_only_ccs: If true copy only CCS metadata
  *
  * Copies the contents of @src to @dst: On flat CCS devices,
  * the CCS metadata is copied as well if needed, or if not present,
@@ -643,7 +640,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
                                  struct xe_bo *src_bo,
                                  struct xe_bo *dst_bo,
                                  struct ttm_resource *src,
-                                 struct ttm_resource *dst)
+                                 struct ttm_resource *dst,
+                                 bool copy_only_ccs)
 {
        struct xe_gt *gt = m->tile->primary_gt;
        struct xe_device *xe = gt_to_xe(gt);
@@ -655,6 +653,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
        u64 src_L0, dst_L0;
        int pass = 0;
        int err;
+       bool src_is_pltt = src->mem_type == XE_PL_TT;
+       bool dst_is_pltt = dst->mem_type == XE_PL_TT;
        bool src_is_vram = mem_type_is_vram(src->mem_type);
        bool dst_is_vram = mem_type_is_vram(dst->mem_type);
        bool copy_ccs = xe_device_has_flat_ccs(xe) &&
@@ -719,8 +719,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
                }
 
                /* Add copy commands size here */
-               batch_size += EMIT_COPY_DW +
-                       (xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0);
+               batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) +
+                       ((xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0));
 
                bb = xe_bb_new(gt, batch_size, usm);
                if (IS_ERR(bb)) {
@@ -746,10 +746,13 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
                bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
                update_idx = bb->len;
 
-               emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0,
-                         XE_PAGE_SIZE);
-               flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_vram,
-                                                 dst_L0_ofs, dst_is_vram,
+               if (!copy_only_ccs)
+                       emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, XE_PAGE_SIZE);
+
+               flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs,
+                                                 IS_DGFX(xe) ? src_is_vram : src_is_pltt,
+                                                 dst_L0_ofs,
+                                                 IS_DGFX(xe) ? dst_is_vram : dst_is_pltt,
                                                  src_L0, ccs_ofs, copy_ccs);
 
                mutex_lock(&m->job_mutex);
@@ -922,6 +925,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
        bool clear_vram = mem_type_is_vram(dst->mem_type);
        struct xe_gt *gt = m->tile->primary_gt;
        struct xe_device *xe = gt_to_xe(gt);
+       bool clear_system_ccs = (xe_bo_needs_ccs_pages(bo) && !IS_DGFX(xe)) ? true : false;
        struct dma_fence *fence = NULL;
        u64 size = bo->size;
        struct xe_res_cursor src_it;
@@ -954,9 +958,10 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
                batch_size = 2 +
                        pte_update_size(m, clear_vram, src, &src_it,
                                        &clear_L0, &clear_L0_ofs, &clear_L0_pt,
-                                       emit_clear_cmd_len(gt), 0,
+                                       clear_system_ccs ? 0 : emit_clear_cmd_len(gt), 0,
                                        avail_pts);
-               if (xe_device_has_flat_ccs(xe) && clear_vram)
+
+               if (xe_device_has_flat_ccs(xe))
                        batch_size += EMIT_COPY_CCS_DW;
 
                /* Clear commands */
@@ -971,7 +976,6 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
                }
 
                size -= clear_L0;
-
                /* Preemption is enabled again by the ring ops. */
                if (!clear_vram) {
                        emit_pte(m, bb, clear_L0_pt, clear_vram, true, &src_it, clear_L0,
@@ -982,9 +986,10 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
                bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
                update_idx = bb->len;
 
-               emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE,
-                          clear_vram);
-               if (xe_device_has_flat_ccs(xe) && clear_vram) {
+               if (!clear_system_ccs)
+                       emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE, clear_vram);
+
+               if (xe_device_has_flat_ccs(xe)) {
                        emit_copy_ccs(gt, bb, clear_L0_ofs, true,
                                      m->cleared_mem_ofs, false, clear_L0);
                        flush_flags = MI_FLUSH_DW_CCS;
@@ -1041,6 +1046,9 @@ err_sync:
                return ERR_PTR(err);
        }
 
+       if (clear_system_ccs)
+               bo->ccs_cleared = true;
+
        return fence;
 }
 
index c729241776adfbe522bb7355a44c1c81dd388708..951f19318ea48fe00e24839e8cf193553444196d 100644 (file)
@@ -85,7 +85,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
                                  struct xe_bo *src_bo,
                                  struct xe_bo *dst_bo,
                                  struct ttm_resource *src,
-                                 struct ttm_resource *dst);
+                                 struct ttm_resource *dst,
+                                 bool copy_only_ccs);
 
 struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
                                   struct xe_bo *bo,