drm/xe: Fix VM bind out-sync signaling ordering
authorMatthew Brost <matthew.brost@intel.com>
Thu, 14 Sep 2023 20:40:51 +0000 (13:40 -0700)
committerRodrigo Vivi <rodrigo.vivi@intel.com>
Thu, 21 Dec 2023 16:43:18 +0000 (11:43 -0500)
A case existed where an out-sync of a later VM bind operation could
signal before a previous one if the later operation results in a NOP
(e.g. a unbind or prefetch to a VA range without any mappings). This
breaks the ordering rules, fix this. This patch also lays the groundwork
for users to pass in num_binds == 0 and out-syncs.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
drivers/gpu/drm/xe/xe_exec_queue.c
drivers/gpu/drm/xe/xe_exec_queue.h
drivers/gpu/drm/xe/xe_exec_queue_types.h
drivers/gpu/drm/xe/xe_vm.c

index 9b373b9ea47245c02307baaa246ed6eb02c1e3fe..8e0620cb89e5d3549f608caa57ebd6d5411974bc 100644 (file)
@@ -156,6 +156,7 @@ void xe_exec_queue_destroy(struct kref *ref)
        struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount);
        struct xe_exec_queue *eq, *next;
 
+       xe_exec_queue_last_fence_put_unlocked(q);
        if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) {
                list_for_each_entry_safe(eq, next, &q->multi_gt_list,
                                         multi_gt_link)
@@ -916,3 +917,77 @@ out:
 
        return ret;
 }
+
+static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q,
+                                                   struct xe_vm *vm)
+{
+       lockdep_assert_held_write(&vm->lock);
+}
+
+/**
+ * xe_exec_queue_last_fence_put() - Drop ref to last fence
+ * @q: The exec queue
+ * @vm: The VM the engine does a bind or exec for
+ */
+void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm)
+{
+       xe_exec_queue_last_fence_lockdep_assert(q, vm);
+
+       if (q->last_fence) {
+               dma_fence_put(q->last_fence);
+               q->last_fence = NULL;
+       }
+}
+
+/**
+ * xe_exec_queue_last_fence_put_unlocked() - Drop ref to last fence unlocked
+ * @q: The exec queue
+ *
+ * Only safe to be called from xe_exec_queue_destroy().
+ */
+void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *q)
+{
+       if (q->last_fence) {
+               dma_fence_put(q->last_fence);
+               q->last_fence = NULL;
+       }
+}
+
+/**
+ * xe_exec_queue_last_fence_get() - Get last fence
+ * @q: The exec queue
+ * @vm: The VM the engine does a bind or exec for
+ *
+ * Get last fence, does not take a ref
+ *
+ * Returns: last fence if not signaled, dma fence stub if signaled
+ */
+struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q,
+                                              struct xe_vm *vm)
+{
+       xe_exec_queue_last_fence_lockdep_assert(q, vm);
+
+       if (q->last_fence &&
+           test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags))
+               xe_exec_queue_last_fence_put(q, vm);
+
+       return q->last_fence ? q->last_fence : dma_fence_get_stub();
+}
+
+/**
+ * xe_exec_queue_last_fence_set() - Set last fence
+ * @q: The exec queue
+ * @vm: The VM the engine does a bind or exec for
+ * @fence: The fence
+ *
+ * Set the last fence for the engine. Increases reference count for fence, when
+ * closing engine xe_exec_queue_last_fence_put should be called.
+ */
+void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm,
+                                 struct dma_fence *fence)
+{
+       xe_exec_queue_last_fence_lockdep_assert(q, vm);
+
+       xe_exec_queue_last_fence_put(q, vm);
+       q->last_fence = dma_fence_get(fence);
+}
index 22499a2f522b4445373fed10572e4610e78ec03c..533da1b0c457a5c03432f267d5580c06eeeba91d 100644 (file)
@@ -61,4 +61,11 @@ int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data,
                                     struct drm_file *file);
 enum xe_exec_queue_priority xe_exec_queue_device_get_max_priority(struct xe_device *xe);
 
+void xe_exec_queue_last_fence_put(struct xe_exec_queue *e, struct xe_vm *vm);
+void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *e);
+struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *e,
+                                              struct xe_vm *vm);
+void xe_exec_queue_last_fence_set(struct xe_exec_queue *e, struct xe_vm *vm,
+                                 struct dma_fence *fence);
+
 #endif
index 4e382304010eb4aa31ca7ac660b8c256f25f74ae..6826feb650f34f12acb552a6219321f04a4ae33c 100644 (file)
@@ -65,6 +65,12 @@ struct xe_exec_queue {
        /** @fence_irq: fence IRQ used to signal job completion */
        struct xe_hw_fence_irq *fence_irq;
 
+       /**
+        * @last_fence: last fence on engine, protected by vm->lock in write
+        * mode if bind engine
+        */
+       struct dma_fence *last_fence;
+
 /* queue no longer allowed to submit */
 #define EXEC_QUEUE_FLAG_BANNED                 BIT(0)
 /* queue used for kernel submission only */
index 89df50f49e11fc1102a6a30591c37718fcb0b88b..4c8d77c4c7c0d06d506cc27f4207c68643a15b4e 100644 (file)
@@ -1524,6 +1524,13 @@ void xe_vm_close_and_put(struct xe_vm *vm)
        if (xe_vm_in_compute_mode(vm))
                flush_work(&vm->preempt.rebind_work);
 
+       down_write(&vm->lock);
+       for_each_tile(tile, xe, id) {
+               if (vm->q[id])
+                       xe_exec_queue_last_fence_put(vm->q[id], vm);
+       }
+       up_write(&vm->lock);
+
        for_each_tile(tile, xe, id) {
                if (vm->q[id]) {
                        xe_exec_queue_kill(vm->q[id]);
@@ -1665,16 +1672,23 @@ u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
                                         tile_to_xe(tile)->pat.idx[XE_CACHE_WB]);
 }
 
+static struct xe_exec_queue *
+to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
+{
+       return q ? q : vm->q[0];
+}
+
 static struct dma_fence *
 xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
                 struct xe_sync_entry *syncs, u32 num_syncs,
                 bool first_op, bool last_op)
 {
+       struct xe_vm *vm = xe_vma_vm(vma);
+       struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
        struct xe_tile *tile;
        struct dma_fence *fence = NULL;
        struct dma_fence **fences = NULL;
        struct dma_fence_array *cf = NULL;
-       struct xe_vm *vm = xe_vma_vm(vma);
        int cur_fence = 0, i;
        int number_tiles = hweight8(vma->tile_present);
        int err;
@@ -1727,7 +1741,8 @@ next:
                                             cf ? &cf->base : fence);
        }
 
-       return cf ? &cf->base : !fence ? dma_fence_get_stub() : fence;
+       return cf ? &cf->base : !fence ?
+               xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence;
 
 err_fences:
        if (fences) {
@@ -1826,6 +1841,7 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
                        bool last_op)
 {
        struct dma_fence *fence;
+       struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
 
        xe_vm_assert_held(vm);
 
@@ -1839,13 +1855,15 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
 
                xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
 
-               fence = dma_fence_get_stub();
+               fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm);
                if (last_op) {
                        for (i = 0; i < num_syncs; i++)
                                xe_sync_entry_signal(&syncs[i], NULL, fence);
                }
        }
 
+       if (last_op)
+               xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
        if (last_op && xe_vm_sync_mode(vm, q))
                dma_fence_wait(fence, true);
        dma_fence_put(fence);
@@ -1878,6 +1896,7 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
                        u32 num_syncs, bool first_op, bool last_op)
 {
        struct dma_fence *fence;
+       struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
 
        xe_vm_assert_held(vm);
        xe_bo_assert_held(xe_vma_bo(vma));
@@ -1887,6 +1906,8 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
                return PTR_ERR(fence);
 
        xe_vma_destroy(vma, fence);
+       if (last_op)
+               xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
        if (last_op && xe_vm_sync_mode(vm, q))
                dma_fence_wait(fence, true);
        dma_fence_put(fence);
@@ -2036,6 +2057,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
                          struct xe_sync_entry *syncs, u32 num_syncs,
                          bool first_op, bool last_op)
 {
+       struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
        int err;
 
        xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
@@ -2054,9 +2076,12 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
 
                /* Nothing to do, signal fences now */
                if (last_op) {
-                       for (i = 0; i < num_syncs; i++)
-                               xe_sync_entry_signal(&syncs[i], NULL,
-                                                    dma_fence_get_stub());
+                       for (i = 0; i < num_syncs; i++) {
+                               struct dma_fence *fence =
+                                       xe_exec_queue_last_fence_get(wait_exec_queue, vm);
+
+                               xe_sync_entry_signal(&syncs[i], NULL, fence);
+                       }
                }
 
                return 0;
@@ -3108,8 +3133,12 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 unwind_ops:
        vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
 free_syncs:
-       for (i = 0; err == -ENODATA && i < num_syncs; i++)
-               xe_sync_entry_signal(&syncs[i], NULL, dma_fence_get_stub());
+       for (i = 0; err == -ENODATA && i < num_syncs; i++) {
+               struct dma_fence *fence =
+                       xe_exec_queue_last_fence_get(to_wait_exec_queue(vm, q), vm);
+
+               xe_sync_entry_signal(&syncs[i], NULL, fence);
+       }
        while (num_syncs--)
                xe_sync_entry_cleanup(&syncs[num_syncs]);