drm/xe: Add range based TLB invalidations
authorMatthew Brost <matthew.brost@intel.com>
Wed, 25 Jan 2023 00:21:58 +0000 (16:21 -0800)
committerRodrigo Vivi <rodrigo.vivi@intel.com>
Tue, 19 Dec 2023 23:27:46 +0000 (18:27 -0500)
If the platform supports range based TLB invalidations use them. Hide
these details in the xe_gt_tlb_invalidation layer.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
drivers/gpu/drm/xe/xe_gt_pagefault.c
drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
drivers/gpu/drm/xe/xe_pt.c
drivers/gpu/drm/xe/xe_vm.c

index 705093cb63d76786bcf2262a3a7cd28ef2a6ad1a..e1a5a3a70c9227b96b847d9e235c6a3dfd5dc3a5 100644 (file)
@@ -240,12 +240,7 @@ unlock_vm:
                goto retry_userptr;
 
        if (!ret) {
-               /*
-                * FIXME: Doing a full TLB invalidation for now, likely could
-                * defer TLB invalidate + fault response to a callback of fence
-                * too
-                */
-               ret = xe_gt_tlb_invalidation(gt, NULL);
+               ret = xe_gt_tlb_invalidation(gt, NULL, vma);
                if (ret >= 0)
                        ret = 0;
        }
index 9e026fd0a45d9804a7eee43e5f52eb8ea170a436..0b37cd09a59a2df28ff17acd12acb2d57b2bb698 100644 (file)
@@ -92,16 +92,10 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
 }
 
 static int send_tlb_invalidation(struct xe_guc *guc,
-                                struct xe_gt_tlb_invalidation_fence *fence)
+                                struct xe_gt_tlb_invalidation_fence *fence,
+                                u32 *action, int len)
 {
        struct xe_gt *gt = guc_to_gt(guc);
-       u32 action[] = {
-               XE_GUC_ACTION_TLB_INVALIDATION,
-               0,
-               XE_GUC_TLB_INVAL_FULL << XE_GUC_TLB_INVAL_TYPE_SHIFT |
-               XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT |
-               XE_GUC_TLB_INVAL_FLUSH_CACHE,
-       };
        int seqno;
        int ret;
        bool queue_work;
@@ -125,7 +119,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
                TLB_INVALIDATION_SEQNO_MAX;
        if (!gt->tlb_invalidation.seqno)
                gt->tlb_invalidation.seqno = 1;
-       ret = xe_guc_ct_send_locked(&guc->ct, action, ARRAY_SIZE(action),
+       ret = xe_guc_ct_send_locked(&guc->ct, action, len,
                                    G2H_LEN_DW_TLB_INVALIDATE, 1);
        if (!ret && fence) {
                fence->invalidation_time = ktime_get();
@@ -146,18 +140,83 @@ static int send_tlb_invalidation(struct xe_guc *guc,
  * @gt: graphics tile
  * @fence: invalidation fence which will be signal on TLB invalidation
  * completion, can be NULL
+ * @vma: VMA to invalidate
  *
- * Issue a full TLB invalidation on the GT. Completion of TLB is asynchronous
- * and caller can either use the invalidation fence or seqno +
- * xe_gt_tlb_invalidation_wait to wait for completion.
+ * Issue a range based TLB invalidation if supported, if not fallback to a full
+ * TLB invalidation. Completion of TLB is asynchronous and caller can either use
+ * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
+ * completion.
  *
  * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
  * negative error code on error.
  */
 int xe_gt_tlb_invalidation(struct xe_gt *gt,
-                          struct xe_gt_tlb_invalidation_fence *fence)
+                          struct xe_gt_tlb_invalidation_fence *fence,
+                          struct xe_vma *vma)
 {
-       return send_tlb_invalidation(&gt->uc.guc, fence);
+       struct xe_device *xe = gt_to_xe(gt);
+#define MAX_TLB_INVALIDATION_LEN       7
+       u32 action[MAX_TLB_INVALIDATION_LEN];
+       int len = 0;
+
+       XE_BUG_ON(!vma);
+
+       if (!xe->info.has_range_tlb_invalidation) {
+               action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
+               action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */
+#define MAKE_INVAL_OP(type)    ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \
+               XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \
+               XE_GUC_TLB_INVAL_FLUSH_CACHE)
+               action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
+       } else {
+               u64 start = vma->start;
+               u64 length = vma->end - vma->start + 1;
+               u64 align, end;
+
+               if (length < SZ_4K)
+                       length = SZ_4K;
+
+               /*
+                * We need to invalidate a higher granularity if start address
+                * is not aligned to length. When start is not aligned with
+                * length we need to find the length large enough to create an
+                * address mask covering the required range.
+                */
+               align = roundup_pow_of_two(length);
+               start = ALIGN_DOWN(vma->start, align);
+               end = ALIGN(vma->start + length, align);
+               length = align;
+               while (start + length < end) {
+                       length <<= 1;
+                       start = ALIGN_DOWN(vma->start, length);
+               }
+
+               /*
+                * Minimum invalidation size for a 2MB page that the hardware
+                * expects is 16MB
+                */
+               if (length >= SZ_2M) {
+                       length = max_t(u64, SZ_16M, length);
+                       start = ALIGN_DOWN(vma->start, length);
+               }
+
+               XE_BUG_ON(length < SZ_4K);
+               XE_BUG_ON(!is_power_of_2(length));
+               XE_BUG_ON(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1));
+               XE_BUG_ON(!IS_ALIGNED(start, length));
+
+               action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
+               action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */
+               action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
+               action[len++] = vma->vm->usm.asid;
+               action[len++] = lower_32_bits(start);
+               action[len++] = upper_32_bits(start);
+               action[len++] = ilog2(length) - ilog2(SZ_4K);
+       }
+
+       XE_BUG_ON(len > MAX_TLB_INVALIDATION_LEN);
+
+       return send_tlb_invalidation(&gt->uc.guc, fence, action, len);
 }
 
 static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno)
index 7e6fbf46f0e38e5da3aa01063f6eb5d96c06be05..b4c4f717bc8ab48184564ba27ec55a8351333804 100644 (file)
 
 struct xe_gt;
 struct xe_guc;
+struct xe_vma;
 
 int xe_gt_tlb_invalidation_init(struct xe_gt *gt);
 void xe_gt_tlb_invalidation_reset(struct xe_gt *gt);
 int xe_gt_tlb_invalidation(struct xe_gt *gt,
-                          struct xe_gt_tlb_invalidation_fence *fence);
+                          struct xe_gt_tlb_invalidation_fence *fence,
+                          struct xe_vma *vma);
 int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
 int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
 
index b220d1d5cfe369632fb8cf820f406a5ee439cc51..cde75708d8439f5d92304e3f2936b34b529f47db 100644 (file)
@@ -1466,6 +1466,7 @@ static const struct xe_migrate_pt_update_ops userptr_unbind_ops = {
 struct invalidation_fence {
        struct xe_gt_tlb_invalidation_fence base;
        struct xe_gt *gt;
+       struct xe_vma *vma;
        struct dma_fence *fence;
        struct dma_fence_cb cb;
        struct work_struct work;
@@ -1505,12 +1506,13 @@ static void invalidation_fence_work_func(struct work_struct *w)
                container_of(w, struct invalidation_fence, work);
 
        trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base);
-       xe_gt_tlb_invalidation(ifence->gt, &ifence->base);
+       xe_gt_tlb_invalidation(ifence->gt, &ifence->base, ifence->vma);
 }
 
 static int invalidation_fence_init(struct xe_gt *gt,
                                   struct invalidation_fence *ifence,
-                                  struct dma_fence *fence)
+                                  struct dma_fence *fence,
+                                  struct xe_vma *vma)
 {
        int ret;
 
@@ -1528,6 +1530,7 @@ static int invalidation_fence_init(struct xe_gt *gt,
        dma_fence_get(&ifence->base.base);      /* Ref for caller */
        ifence->fence = fence;
        ifence->gt = gt;
+       ifence->vma = vma;
 
        INIT_WORK(&ifence->work, invalidation_fence_work_func);
        ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb);
@@ -1614,7 +1617,7 @@ __xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
                int err;
 
                /* TLB invalidation must be done before signaling unbind */
-               err = invalidation_fence_init(gt, ifence, fence);
+               err = invalidation_fence_init(gt, ifence, fence, vma);
                if (err) {
                        dma_fence_put(fence);
                        kfree(ifence);
index 83f8c8a186d80a98ce998483c47cb3b5e6b8338b..4fc8e24f93ce2afa052c0330243b25255584f79e 100644 (file)
@@ -3349,7 +3349,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
                if (xe_pt_zap_ptes(gt, vma)) {
                        gt_needs_invalidate |= BIT(id);
                        xe_device_wmb(xe);
-                       seqno[id] = xe_gt_tlb_invalidation(gt, NULL);
+                       seqno[id] = xe_gt_tlb_invalidation(gt, NULL, vma);
                        if (seqno[id] < 0)
                                return seqno[id];
                }