drm/i915: Reduce the number of objects subject to memcpy recover
authorThomas Hellström <thomas.hellstrom@linux.intel.com>
Wed, 22 Sep 2021 06:25:25 +0000 (08:25 +0200)
committerThomas Hellström <thomas.hellstrom@linux.intel.com>
Fri, 24 Sep 2021 06:19:16 +0000 (08:19 +0200)
We really only need memcpy restore for objects that affect the
operability of the migrate context. That is, primarily the page-table
objects of the migrate VM.

Add an object flag, I915_BO_ALLOC_PM_EARLY for objects that need early
restores using memcpy and a way to assign LMEM page-table object flags
to be used by the vms.

Restore objects without this flag with the gpu blitter and only objects
carrying the flag using TTM memcpy.

Initially mark the migrate, gt, gtt and vgpu vms to use this flag, and
defer for a later audit which vms actually need it. Most importantly, user-
allocated vms with pinned page-table objects can be restored using the
blitter.

Performance-wise memcpy restore is probably as fast as gpu restore if not
faster, but using gpu restore will help tackling future restrictions in
mappable LMEM size.

v4:
- Don't mark the aliasing ppgtt page table flags for early resume, but
  rather the ggtt page table flags as intended. (Matthew Auld)
- The check for user buffer objects during early resume is pointless, since
  they are never marked I915_BO_ALLOC_PM_EARLY. (Matthew Auld)
v5:
- Mark GuC LMEM objects with I915_BO_ALLOC_PM_EARLY to have them restored
  before we fire up the migrate context.

Cc: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210922062527.865433-8-thomas.hellstrom@linux.intel.com
19 files changed:
drivers/gpu/drm/i915/gem/i915_gem_context.c
drivers/gpu/drm/i915/gem/i915_gem_object_types.h
drivers/gpu/drm/i915/gem/i915_gem_pm.c
drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c
drivers/gpu/drm/i915/gem/selftests/huge_pages.c
drivers/gpu/drm/i915/gt/gen6_ppgtt.c
drivers/gpu/drm/i915/gt/gen8_ppgtt.c
drivers/gpu/drm/i915/gt/gen8_ppgtt.h
drivers/gpu/drm/i915/gt/intel_ggtt.c
drivers/gpu/drm/i915/gt/intel_gt.c
drivers/gpu/drm/i915/gt/intel_gtt.c
drivers/gpu/drm/i915/gt/intel_gtt.h
drivers/gpu/drm/i915/gt/intel_migrate.c
drivers/gpu/drm/i915/gt/intel_ppgtt.c
drivers/gpu/drm/i915/gt/selftest_hangcheck.c
drivers/gpu/drm/i915/gt/uc/intel_guc.c
drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
drivers/gpu/drm/i915/gvt/scheduler.c
drivers/gpu/drm/i915/selftests/i915_gem_gtt.c

index c2ab0e22db0a2d6e83916ce0958d5feff6297558..8208fd5b72c3d6cb6328f34a60121ed396b51f6b 100644 (file)
@@ -1287,7 +1287,7 @@ i915_gem_create_context(struct drm_i915_private *i915,
        } else if (HAS_FULL_PPGTT(i915)) {
                struct i915_ppgtt *ppgtt;
 
-               ppgtt = i915_ppgtt_create(&i915->gt);
+               ppgtt = i915_ppgtt_create(&i915->gt, 0);
                if (IS_ERR(ppgtt)) {
                        drm_dbg(&i915->drm, "PPGTT setup failed (%ld)\n",
                                PTR_ERR(ppgtt));
@@ -1465,7 +1465,7 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
        if (args->flags)
                return -EINVAL;
 
-       ppgtt = i915_ppgtt_create(&i915->gt);
+       ppgtt = i915_ppgtt_create(&i915->gt, 0);
        if (IS_ERR(ppgtt))
                return PTR_ERR(ppgtt);
 
index 118691ce81d7df283bc06ae566d3867ebdabc76b..fa2ba9e2a4d0fc150654b9a50ac4b748d4cac538 100644 (file)
@@ -294,13 +294,16 @@ struct drm_i915_gem_object {
 #define I915_BO_ALLOC_USER        BIT(3)
 /* Object is allowed to lose its contents on suspend / resume, even if pinned */
 #define I915_BO_ALLOC_PM_VOLATILE BIT(4)
+/* Object needs to be restored early using memcpy during resume */
+#define I915_BO_ALLOC_PM_EARLY    BIT(5)
 #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \
                             I915_BO_ALLOC_VOLATILE | \
                             I915_BO_ALLOC_CPU_CLEAR | \
                             I915_BO_ALLOC_USER | \
-                            I915_BO_ALLOC_PM_VOLATILE)
-#define I915_BO_READONLY          BIT(5)
-#define I915_TILING_QUIRK_BIT     6 /* unknown swizzling; do not release! */
+                            I915_BO_ALLOC_PM_VOLATILE | \
+                            I915_BO_ALLOC_PM_EARLY)
+#define I915_BO_READONLY          BIT(6)
+#define I915_TILING_QUIRK_BIT     7 /* unknown swizzling; do not release! */
 
        /**
         * @mem_flags - Mutable placement-related flags
index 12b37b4c1192f9b0f7e448a793ff749e2a2de692..726b40e1fbb052f30ab9d7ab2b360bc0f086d9c7 100644 (file)
@@ -97,8 +97,12 @@ int i915_gem_backup_suspend(struct drm_i915_private *i915)
         * More objects may have become unpinned as requests were
         * retired. Now try to evict again. The gt may be wedged here
         * in which case we automatically fall back to memcpy.
+        * We allow also backing up pinned objects that have not been
+        * marked for early recover, and that may contain, for example,
+        * page-tables for the migrate context.
         */
-       ret = lmem_suspend(i915, I915_TTM_BACKUP_ALLOW_GPU);
+       ret = lmem_suspend(i915, I915_TTM_BACKUP_ALLOW_GPU |
+                          I915_TTM_BACKUP_PINNED);
        if (ret)
                goto out_recover;
 
index 03a00d193f40b8df013ea76189e0a660f7af3db4..3b6d14b5c604fd1d38873307f826490e4fbedbeb 100644 (file)
@@ -57,7 +57,8 @@ static int i915_ttm_backup(struct i915_gem_apply_to_region *apply,
        if (pm_apply->allow_gpu && i915_gem_object_evictable(obj))
                return ttm_bo_validate(bo, i915_ttm_sys_placement(), &ctx);
 
-       if (!pm_apply->backup_pinned)
+       if (!pm_apply->backup_pinned ||
+           (pm_apply->allow_gpu && (obj->flags & I915_BO_ALLOC_PM_EARLY)))
                return 0;
 
        if (obj->flags & I915_BO_ALLOC_PM_VOLATILE)
@@ -155,7 +156,7 @@ static int i915_ttm_restore(struct i915_gem_apply_to_region *apply,
        if (!backup)
                return 0;
 
-       if (!pm_apply->allow_gpu && (obj->flags & I915_BO_ALLOC_USER))
+       if (!pm_apply->allow_gpu && !(obj->flags & I915_BO_ALLOC_PM_EARLY))
                return 0;
 
        err = i915_gem_object_lock(backup, apply->ww);
index 0827634c842c4c3c0b8b68be42151e164a9c44a1..77d84a9e878916e8c05818c615d4f520186f739d 100644 (file)
@@ -1645,7 +1645,7 @@ int i915_gem_huge_page_mock_selftests(void)
        mkwrite_device_info(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL;
        mkwrite_device_info(dev_priv)->ppgtt_size = 48;
 
-       ppgtt = i915_ppgtt_create(&dev_priv->gt);
+       ppgtt = i915_ppgtt_create(&dev_priv->gt, 0);
        if (IS_ERR(ppgtt)) {
                err = PTR_ERR(ppgtt);
                goto out_unlock;
index 1aee5e6b1b23f1663f5b78e7c384f6c23ecbc2ea..890191f286e382b46a4bad33ea7c982007b3efe8 100644 (file)
@@ -429,7 +429,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
        mutex_init(&ppgtt->flush);
        mutex_init(&ppgtt->pin_mutex);
 
-       ppgtt_init(&ppgtt->base, gt);
+       ppgtt_init(&ppgtt->base, gt, 0);
        ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t));
        ppgtt->base.vm.top = 1;
 
index 6a5af995f5b1631c31dc05029bee2f4260110299..037a9a6e4889abc1e3e0b9b6b1853e51a63bef3c 100644 (file)
@@ -753,7 +753,8 @@ err_pd:
  * space.
  *
  */
-struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
+struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
+                                    unsigned long lmem_pt_obj_flags)
 {
        struct i915_ppgtt *ppgtt;
        int err;
@@ -762,7 +763,7 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
        if (!ppgtt)
                return ERR_PTR(-ENOMEM);
 
-       ppgtt_init(ppgtt, gt);
+       ppgtt_init(ppgtt, gt, lmem_pt_obj_flags);
        ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
        ppgtt->vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen8_pte_t));
 
index b9028c2ad3c7d1311ba30454696be6206bab9646..f541d19264b4e81908ee58bdcc416d8d8d178d1b 100644 (file)
@@ -12,7 +12,9 @@ struct i915_address_space;
 struct intel_gt;
 enum i915_cache_level;
 
-struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt);
+struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
+                                    unsigned long lmem_pt_obj_flags);
+
 u64 gen8_ggtt_pte_encode(dma_addr_t addr,
                         enum i915_cache_level level,
                         u32 flags);
index 8d71f67926f1ae1d634be79a717a88035e91f093..06576fc1310e9b469b57accc0a0ae4c2eb3ef124 100644 (file)
@@ -644,7 +644,7 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
        struct i915_ppgtt *ppgtt;
        int err;
 
-       ppgtt = i915_ppgtt_create(ggtt->vm.gt);
+       ppgtt = i915_ppgtt_create(ggtt->vm.gt, 0);
        if (IS_ERR(ppgtt))
                return PTR_ERR(ppgtt);
 
@@ -909,6 +909,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
                size = gen8_get_total_gtt_size(snb_gmch_ctl);
 
        ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+       ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY;
 
        ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
        ggtt->vm.cleanup = gen6_gmch_remove;
index 5753c5943ed92c66594279bbfbc31f882cc61737..4037c37782253c952e2a411f7a312f1ca6b60f64 100644 (file)
@@ -481,7 +481,7 @@ static void intel_gt_fini_scratch(struct intel_gt *gt)
 static struct i915_address_space *kernel_vm(struct intel_gt *gt)
 {
        if (INTEL_PPGTT(gt->i915) > INTEL_PPGTT_ALIASING)
-               return &i915_ppgtt_create(gt)->vm;
+               return &i915_ppgtt_create(gt, I915_BO_ALLOC_PM_EARLY)->vm;
        else
                return i915_vm_get(&gt->ggtt->vm);
 }
index a0c2b952aa5754cc0669e94a0cbc0afa94385387..67d14afa66237a26f553c8d3265074fe63387812 100644 (file)
@@ -28,7 +28,8 @@ struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz)
         * used the passed in size for the page size, which should ensure it
         * also has the same alignment.
         */
-       obj = __i915_gem_object_create_lmem_with_ps(vm->i915, sz, sz, 0);
+       obj = __i915_gem_object_create_lmem_with_ps(vm->i915, sz, sz,
+                                                   vm->lmem_pt_obj_flags);
        /*
         * Ensure all paging structures for this vm share the same dma-resv
         * object underneath, with the idea that one object_lock() will lock
index 5b539bd7645d322c235d9565fe757b16eba0df62..bc675026335991e0ac86bb17c44fa1df5f409fdf 100644 (file)
@@ -260,6 +260,9 @@ struct i915_address_space {
        u8 pd_shift;
        u8 scratch_order;
 
+       /* Flags used when creating page-table objects for this vm */
+       unsigned long lmem_pt_obj_flags;
+
        struct drm_i915_gem_object *
                (*alloc_pt_dma)(struct i915_address_space *vm, int sz);
 
@@ -519,7 +522,8 @@ i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
        return __px_dma(pt ? px_base(pt) : ppgtt->vm.scratch[ppgtt->vm.top]);
 }
 
-void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt);
+void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt,
+               unsigned long lmem_pt_obj_flags);
 
 int i915_ggtt_probe_hw(struct drm_i915_private *i915);
 int i915_ggtt_init_hw(struct drm_i915_private *i915);
@@ -537,7 +541,8 @@ static inline bool i915_ggtt_has_aperture(const struct i915_ggtt *ggtt)
 
 int i915_ppgtt_init_hw(struct intel_gt *gt);
 
-struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt);
+struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt,
+                                    unsigned long lmem_pt_obj_flags);
 
 void i915_ggtt_suspend(struct i915_ggtt *gtt);
 void i915_ggtt_resume(struct i915_ggtt *ggtt);
index 1dac21aa7e5c34f49b8c1ea096efc09980e247f7..afb1cce9a3522d6121d6a547df0df1054d438f6f 100644 (file)
@@ -78,7 +78,7 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt)
         * TODO: Add support for huge LMEM PTEs
         */
 
-       vm = i915_ppgtt_create(gt);
+       vm = i915_ppgtt_create(gt, I915_BO_ALLOC_PM_EARLY);
        if (IS_ERR(vm))
                return ERR_CAST(vm);
 
index 886060f7e6fcbab786b0bf2a77103bb552cc3b45..4396bfd630d8945c1ccd6172cb808a82616b4602 100644 (file)
@@ -155,19 +155,20 @@ int i915_ppgtt_init_hw(struct intel_gt *gt)
 }
 
 static struct i915_ppgtt *
-__ppgtt_create(struct intel_gt *gt)
+__ppgtt_create(struct intel_gt *gt, unsigned long lmem_pt_obj_flags)
 {
        if (GRAPHICS_VER(gt->i915) < 8)
                return gen6_ppgtt_create(gt);
        else
-               return gen8_ppgtt_create(gt);
+               return gen8_ppgtt_create(gt, lmem_pt_obj_flags);
 }
 
-struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt)
+struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt,
+                                    unsigned long lmem_pt_obj_flags)
 {
        struct i915_ppgtt *ppgtt;
 
-       ppgtt = __ppgtt_create(gt);
+       ppgtt = __ppgtt_create(gt, lmem_pt_obj_flags);
        if (IS_ERR(ppgtt))
                return ppgtt;
 
@@ -298,7 +299,8 @@ int ppgtt_set_pages(struct i915_vma *vma)
        return 0;
 }
 
-void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
+void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt,
+               unsigned long lmem_pt_obj_flags)
 {
        struct drm_i915_private *i915 = gt->i915;
 
@@ -306,6 +308,7 @@ void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
        ppgtt->vm.i915 = i915;
        ppgtt->vm.dma = i915->drm.dev;
        ppgtt->vm.total = BIT_ULL(INTEL_INFO(i915)->ppgtt_size);
+       ppgtt->vm.lmem_pt_obj_flags = lmem_pt_obj_flags;
 
        dma_resv_init(&ppgtt->vm._resv);
        i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
index 7e6fdabac5999eb05c610ad063aff3a99c9f95a6..7e2d99dd012d816b7bd5801d6faecd7634169a79 100644 (file)
@@ -1596,7 +1596,7 @@ static int igt_reset_evict_ppgtt(void *arg)
        if (INTEL_PPGTT(gt->i915) < INTEL_PPGTT_FULL)
                return 0;
 
-       ppgtt = i915_ppgtt_create(gt);
+       ppgtt = i915_ppgtt_create(gt, 0);
        if (IS_ERR(ppgtt))
                return PTR_ERR(ppgtt);
 
index 8ffb689066f6bbe6e1428cc9347aea5d921a46ec..8f8182bf7c11154758b7dd82b0f996ca9d259a0f 100644 (file)
@@ -651,7 +651,8 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
        if (HAS_LMEM(gt->i915))
                obj = i915_gem_object_create_lmem(gt->i915, size,
                                                  I915_BO_ALLOC_CPU_CLEAR |
-                                                 I915_BO_ALLOC_CONTIGUOUS);
+                                                 I915_BO_ALLOC_CONTIGUOUS |
+                                                 I915_BO_ALLOC_PM_EARLY);
        else
                obj = i915_gem_object_create_shmem(gt->i915, size);
 
index a685d563df724ab8a03ae3c9beec5dbe96eefbc2..3aa87be4f2e40207a8ed7d2be9c572058e3d0188 100644 (file)
@@ -372,10 +372,13 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
        if (uc_fw->type == INTEL_UC_FW_TYPE_GUC)
                uc_fw->private_data_size = css->private_data_size;
 
-       if (HAS_LMEM(i915))
+       if (HAS_LMEM(i915)) {
                obj = i915_gem_object_create_lmem_from_data(i915, fw->data, fw->size);
-       else
+               if (!IS_ERR(obj))
+                       obj->flags |= I915_BO_ALLOC_PM_EARLY;
+       } else {
                obj = i915_gem_object_create_shmem_from_data(i915, fw->data, fw->size);
+       }
 
        if (IS_ERR(obj)) {
                err = PTR_ERR(obj);
index b56a8e37a3cd6acc794f8e36bfb89337153b101a..0d18e13e34686ddc6d9c19b8284fb69827a9b58e 100644 (file)
@@ -1386,7 +1386,7 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
        enum intel_engine_id i;
        int ret;
 
-       ppgtt = i915_ppgtt_create(&i915->gt);
+       ppgtt = i915_ppgtt_create(&i915->gt, I915_BO_ALLOC_PM_EARLY);
        if (IS_ERR(ppgtt))
                return PTR_ERR(ppgtt);
 
index 2d60a5a5b06559634ed27be469a2c6e9e9e3b271..46f4236039a9e53e46f8f5be3af5a8ffc0c58da2 100644 (file)
@@ -155,7 +155,7 @@ static int igt_ppgtt_alloc(void *arg)
        if (!HAS_PPGTT(dev_priv))
                return 0;
 
-       ppgtt = i915_ppgtt_create(&dev_priv->gt);
+       ppgtt = i915_ppgtt_create(&dev_priv->gt, 0);
        if (IS_ERR(ppgtt))
                return PTR_ERR(ppgtt);
 
@@ -1053,7 +1053,7 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv,
        if (IS_ERR(file))
                return PTR_ERR(file);
 
-       ppgtt = i915_ppgtt_create(&dev_priv->gt);
+       ppgtt = i915_ppgtt_create(&dev_priv->gt, 0);
        if (IS_ERR(ppgtt)) {
                err = PTR_ERR(ppgtt);
                goto out_free;