drm/amdgpu: cleanup conditional execution
authorChristian König <christian.koenig@amd.com>
Tue, 15 Aug 2023 06:34:27 +0000 (08:34 +0200)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 4 Mar 2024 20:59:08 +0000 (15:59 -0500)
First of all calculating the number of dw to patch into a
conditional execution is not something HW generation specific.
This is just standard ring buffer calculations. While at it also
reduce the BUG_ON() into WARN_ON().

Then instead of a random bit pattern use 0 as default value for
the number of dw skipped, this way it's not mandatory any more
to patch the conditional execution.

And last make the address to check a parameter of the
conditional execution instead of getting this from the ring.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c

index 6aa3b1d845abe1e3a6efe32990ea0bd8763d5a41..8b512dc28df8384861be0385e8cbdb292406421e 100644 (file)
@@ -131,7 +131,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
        struct amdgpu_ib *ib = &ibs[0];
        struct dma_fence *tmp = NULL;
        bool need_ctx_switch;
-       unsigned int patch_offset = ~0;
        struct amdgpu_vm *vm;
        uint64_t fence_ctx;
        uint32_t status = 0, alloc_size;
@@ -139,10 +138,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
        bool secure, init_shadow;
        u64 shadow_va, csa_va, gds_va;
        int vmid = AMDGPU_JOB_GET_VMID(job);
+       bool need_pipe_sync = false;
+       unsigned int cond_exec;
 
        unsigned int i;
        int r = 0;
-       bool need_pipe_sync = false;
 
        if (num_ibs == 0)
                return -EINVAL;
@@ -228,7 +228,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
                                            init_shadow, vmid);
 
        if (ring->funcs->init_cond_exec)
-               patch_offset = amdgpu_ring_init_cond_exec(ring);
+               cond_exec = amdgpu_ring_init_cond_exec(ring,
+                                                      ring->cond_exe_gpu_addr);
 
        amdgpu_device_flush_hdp(adev, ring);
 
@@ -278,16 +279,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
                                       fence_flags | AMDGPU_FENCE_FLAG_64BIT);
        }
 
-       if (ring->funcs->emit_gfx_shadow) {
+       if (ring->funcs->emit_gfx_shadow && ring->funcs->init_cond_exec) {
                amdgpu_ring_emit_gfx_shadow(ring, 0, 0, 0, false, 0);
-
-               if (ring->funcs->init_cond_exec) {
-                       unsigned int ce_offset = ~0;
-
-                       ce_offset = amdgpu_ring_init_cond_exec(ring);
-                       if (ce_offset != ~0 && ring->funcs->patch_cond_exec)
-                               amdgpu_ring_patch_cond_exec(ring, ce_offset);
-               }
+               amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr);
        }
 
        r = amdgpu_fence_emit(ring, f, job, fence_flags);
@@ -302,8 +296,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
        if (ring->funcs->insert_end)
                ring->funcs->insert_end(ring);
 
-       if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
-               amdgpu_ring_patch_cond_exec(ring, patch_offset);
+       amdgpu_ring_patch_cond_exec(ring, cond_exec);
 
        ring->current_ctx = fence_ctx;
        if (vm && ring->funcs->emit_switch_buffer)
index fe1a61eb6e4c0809c1bccd41bc89f32bcd8304f2..75633076790968cd6d2fd10b4efbe60a95ece1fa 100644 (file)
@@ -209,8 +209,7 @@ struct amdgpu_ring_funcs {
        void (*insert_end)(struct amdgpu_ring *ring);
        /* pad the indirect buffer to the necessary number of dw */
        void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
-       unsigned (*init_cond_exec)(struct amdgpu_ring *ring);
-       void (*patch_cond_exec)(struct amdgpu_ring *ring, unsigned offset);
+       unsigned (*init_cond_exec)(struct amdgpu_ring *ring, uint64_t addr);
        /* note usage for clock and power gating */
        void (*begin_use)(struct amdgpu_ring *ring);
        void (*end_use)(struct amdgpu_ring *ring);
@@ -327,8 +326,7 @@ struct amdgpu_ring {
 #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
 #define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
-#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
-#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
+#define amdgpu_ring_init_cond_exec(r, a) (r)->funcs->init_cond_exec((r), (a))
 #define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
 #define amdgpu_ring_patch_cntl(r, o) ((r)->funcs->patch_cntl((r), (o)))
 #define amdgpu_ring_patch_ce(r, o) ((r)->funcs->patch_ce((r), (o)))
@@ -411,6 +409,30 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
        ring->count_dw -= count_dw;
 }
 
+/**
+ * amdgpu_ring_patch_cond_exec - patch dw count of conditional execute
+ * @ring: amdgpu_ring structure
+ * @offset: offset returned by amdgpu_ring_init_cond_exec
+ *
+ * Calculate the dw count and patch it into a cond_exec command.
+ */
+static inline void amdgpu_ring_patch_cond_exec(struct amdgpu_ring *ring,
+                                              unsigned int offset)
+{
+       unsigned cur;
+
+       if (!ring->funcs->init_cond_exec)
+               return;
+
+       WARN_ON(offset > ring->buf_mask);
+       WARN_ON(ring->ring[offset] != 0);
+
+       cur = (ring->wptr - 1) & ring->buf_mask;
+       if (cur < offset)
+               cur += ring->ring_size >> 2;
+       ring->ring[offset] = cur - offset;
+}
+
 #define amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset)                 \
        (ring->is_mes_queue && ring->mes_ctx ?                          \
         (ring->mes_ctx->meta_data_gpu_addr + offset) : 0)
index 18db0ddef362e9db55a92d804450f77960af1924..4299ce386322e7cea27232ae05a1222f62f5a850 100644 (file)
@@ -658,7 +658,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
        bool vm_flush_needed = job->vm_needs_flush;
        struct dma_fence *fence = NULL;
        bool pasid_mapping_needed = false;
-       unsigned patch_offset = 0;
+       unsigned int patch;
        int r;
 
        if (amdgpu_vmid_had_gpu_reset(adev, id)) {
@@ -685,7 +685,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
 
        amdgpu_ring_ib_begin(ring);
        if (ring->funcs->init_cond_exec)
-               patch_offset = amdgpu_ring_init_cond_exec(ring);
+               patch = amdgpu_ring_init_cond_exec(ring,
+                                                  ring->cond_exe_gpu_addr);
 
        if (need_pipe_sync)
                amdgpu_ring_emit_pipeline_sync(ring);
@@ -733,8 +734,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
        }
        dma_fence_put(fence);
 
-       if (ring->funcs->patch_cond_exec)
-               amdgpu_ring_patch_cond_exec(ring, patch_offset);
+       amdgpu_ring_patch_cond_exec(ring, patch);
 
        /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
        if (ring->funcs->emit_switch_buffer) {
index b9a15d51eb5c30e554d4e4f7c1397e3ce51996d9..8cedee059c8a2a286c566e0bd8e8676a23175eb4 100644 (file)
@@ -546,34 +546,21 @@ static void vpe_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid,
        amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
 }
 
-static unsigned int vpe_ring_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned int vpe_ring_init_cond_exec(struct amdgpu_ring *ring,
+                                           uint64_t addr)
 {
        unsigned int ret;
 
        amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_COND_EXE, 0));
-       amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
-       amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+       amdgpu_ring_write(ring, lower_32_bits(addr));
+       amdgpu_ring_write(ring, upper_32_bits(addr));
        amdgpu_ring_write(ring, 1);
-       ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
-       amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
+       ret = ring->wptr & ring->buf_mask;
+       amdgpu_ring_write(ring, 0);
 
        return ret;
 }
 
-static void vpe_ring_patch_cond_exec(struct amdgpu_ring *ring, unsigned int offset)
-{
-       unsigned int cur;
-
-       WARN_ON_ONCE(offset > ring->buf_mask);
-       WARN_ON_ONCE(ring->ring[offset] != 0x55aa55aa);
-
-       cur = (ring->wptr - 1) & ring->buf_mask;
-       if (cur > offset)
-               ring->ring[offset] = cur - offset;
-       else
-               ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
-}
-
 static int vpe_ring_preempt_ib(struct amdgpu_ring *ring)
 {
        struct amdgpu_device *adev = ring->adev;
@@ -864,7 +851,6 @@ static const struct amdgpu_ring_funcs vpe_ring_funcs = {
        .test_ring = vpe_ring_test_ring,
        .test_ib = vpe_ring_test_ib,
        .init_cond_exec = vpe_ring_init_cond_exec,
-       .patch_cond_exec = vpe_ring_patch_cond_exec,
        .preempt_ib = vpe_ring_preempt_ib,
        .begin_use = vpe_ring_begin_use,
        .end_use = vpe_ring_end_use,
index 691fa40e4e0157bcf682c265ca7ec8376a4e6c45..904b9ff5ead2f5d1823689e826b3dd01688851e9 100644 (file)
@@ -8542,34 +8542,23 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
        amdgpu_ring_write(ring, 0);
 }
 
-static unsigned int gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned int gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
+                                                      uint64_t addr)
 {
        unsigned int ret;
 
        amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
-       amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
-       amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
-       amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
+       amdgpu_ring_write(ring, lower_32_bits(addr));
+       amdgpu_ring_write(ring, upper_32_bits(addr));
+       /* discard following DWs if *cond_exec_gpu_addr==0 */
+       amdgpu_ring_write(ring, 0);
        ret = ring->wptr & ring->buf_mask;
-       amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
+       /* patch dummy value later */
+       amdgpu_ring_write(ring, 0);
 
        return ret;
 }
 
-static void gfx_v10_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned int offset)
-{
-       unsigned int cur;
-
-       BUG_ON(offset > ring->buf_mask);
-       BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
-       cur = (ring->wptr - 1) & ring->buf_mask;
-       if (likely(cur > offset))
-               ring->ring[offset] = cur - offset;
-       else
-               ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
-}
-
 static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring)
 {
        int i, r = 0;
@@ -9224,7 +9213,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
        .emit_switch_buffer = gfx_v10_0_ring_emit_sb,
        .emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl,
        .init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
-       .patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
        .preempt_ib = gfx_v10_0_ring_preempt_ib,
        .emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
        .emit_wreg = gfx_v10_0_ring_emit_wreg,
index 0d90d60a21d68169b725e2e5d6982f728de4e230..2ccbdee570cfd7236386ff02dc8561775a4db9f2 100644 (file)
@@ -5533,33 +5533,23 @@ static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring,
                          PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0);
 }
 
-static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
+                                                  uint64_t addr)
 {
        unsigned ret;
 
        amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
-       amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
-       amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
-       amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
+       amdgpu_ring_write(ring, lower_32_bits(addr));
+       amdgpu_ring_write(ring, upper_32_bits(addr));
+       /* discard following DWs if *cond_exec_gpu_addr==0 */
+       amdgpu_ring_write(ring, 0);
        ret = ring->wptr & ring->buf_mask;
-       amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
+       /* patch dummy value later */
+       amdgpu_ring_write(ring, 0);
 
        return ret;
 }
 
-static void gfx_v11_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
-{
-       unsigned cur;
-       BUG_ON(offset > ring->buf_mask);
-       BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
-       cur = (ring->wptr - 1) & ring->buf_mask;
-       if (likely(cur > offset))
-               ring->ring[offset] = cur - offset;
-       else
-               ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
-}
-
 static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
 {
        int i, r = 0;
@@ -6153,7 +6143,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
        .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
        .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow,
        .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
-       .patch_cond_exec = gfx_v11_0_ring_emit_patch_cond_exec,
        .preempt_ib = gfx_v11_0_ring_preempt_ib,
        .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl,
        .emit_wreg = gfx_v11_0_ring_emit_wreg,
index b97ea62212b6c14f5e74aded473ac1c477759a06..202ddda57f987efdf2770dad687d0ab1b864fe97 100644 (file)
@@ -6326,33 +6326,22 @@ static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
        amdgpu_ring_write(ring, 0);
 }
 
-static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
+                                                 uint64_t addr)
 {
        unsigned ret;
 
        amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
-       amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
-       amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
-       amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
+       amdgpu_ring_write(ring, lower_32_bits(addr));
+       amdgpu_ring_write(ring, upper_32_bits(addr));
+       /* discard following DWs if *cond_exec_gpu_addr==0 */
+       amdgpu_ring_write(ring, 0);
        ret = ring->wptr & ring->buf_mask;
-       amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
+       /* patch dummy value later */
+       amdgpu_ring_write(ring, 0);
        return ret;
 }
 
-static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
-{
-       unsigned cur;
-
-       BUG_ON(offset > ring->buf_mask);
-       BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
-       cur = (ring->wptr & ring->buf_mask) - 1;
-       if (likely(cur > offset))
-               ring->ring[offset] = cur - offset;
-       else
-               ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
-}
-
 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
                                    uint32_t reg_val_offs)
 {
@@ -6932,7 +6921,6 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
        .emit_switch_buffer = gfx_v8_ring_emit_sb,
        .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
        .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
-       .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
        .emit_wreg = gfx_v8_0_ring_emit_wreg,
        .soft_recovery = gfx_v8_0_ring_soft_recovery,
        .emit_mem_sync = gfx_v8_0_emit_mem_sync,
index 7669f82aa1dac15700eff04c9b393010c7952ea3..1753b903ad8a83b29348a787ef9f93321d550e33 100644 (file)
@@ -5610,31 +5610,21 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
        amdgpu_ring_write(ring, 0);
 }
 
-static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
+                                                 uint64_t addr)
 {
        unsigned ret;
        amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
-       amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
-       amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
-       amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
+       amdgpu_ring_write(ring, lower_32_bits(addr));
+       amdgpu_ring_write(ring, upper_32_bits(addr));
+       /* discard following DWs if *cond_exec_gpu_addr==0 */
+       amdgpu_ring_write(ring, 0);
        ret = ring->wptr & ring->buf_mask;
-       amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
+       /* patch dummy value later */
+       amdgpu_ring_write(ring, 0);
        return ret;
 }
 
-static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
-{
-       unsigned cur;
-       BUG_ON(offset > ring->buf_mask);
-       BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
-       cur = (ring->wptr - 1) & ring->buf_mask;
-       if (likely(cur > offset))
-               ring->ring[offset] = cur - offset;
-       else
-               ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
-}
-
 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
                                    uint32_t reg_val_offs)
 {
@@ -6908,7 +6898,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
        .emit_switch_buffer = gfx_v9_ring_emit_sb,
        .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
        .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
-       .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
        .preempt_ib = gfx_v9_0_ring_preempt_ib,
        .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
        .emit_wreg = gfx_v9_0_ring_emit_wreg,
@@ -6963,7 +6952,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
        .emit_switch_buffer = gfx_v9_ring_emit_sb,
        .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
        .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
-       .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
        .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
        .emit_wreg = gfx_v9_0_ring_emit_wreg,
        .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
index 3c485e5a531a0e52d683ad7752111d81ccad1221..883e8a1b8a407d2b8cd1e38855948de80c0d2214 100644 (file)
@@ -249,35 +249,23 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device *adev)
        return ret;
 }
 
-static unsigned sdma_v5_0_ring_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned sdma_v5_0_ring_init_cond_exec(struct amdgpu_ring *ring,
+                                             uint64_t addr)
 {
        unsigned ret;
 
        amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE));
-       amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
-       amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+       amdgpu_ring_write(ring, lower_32_bits(addr));
+       amdgpu_ring_write(ring, upper_32_bits(addr));
        amdgpu_ring_write(ring, 1);
-       ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
-       amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
+       /* this is the offset we need patch later */
+       ret = ring->wptr & ring->buf_mask;
+       /* insert dummy here and patch it later */
+       amdgpu_ring_write(ring, 0);
 
        return ret;
 }
 
-static void sdma_v5_0_ring_patch_cond_exec(struct amdgpu_ring *ring,
-                                          unsigned offset)
-{
-       unsigned cur;
-
-       BUG_ON(offset > ring->buf_mask);
-       BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
-       cur = (ring->wptr - 1) & ring->buf_mask;
-       if (cur > offset)
-               ring->ring[offset] = cur - offset;
-       else
-               ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
-}
-
 /**
  * sdma_v5_0_ring_get_rptr - get the current read pointer
  *
@@ -1780,7 +1768,6 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
        .emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
        .emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
        .init_cond_exec = sdma_v5_0_ring_init_cond_exec,
-       .patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
        .preempt_ib = sdma_v5_0_ring_preempt_ib,
 };
 
index 0058f3f7cf6e438b28062722687b52dc8fd67242..42f4bd250def622d490d355bac4883772def9c7a 100644 (file)
@@ -89,35 +89,23 @@ static u32 sdma_v5_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u3
        return base + internal_offset;
 }
 
-static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring,
+                                             uint64_t addr)
 {
        unsigned ret;
 
        amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE));
-       amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
-       amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+       amdgpu_ring_write(ring, lower_32_bits(addr));
+       amdgpu_ring_write(ring, upper_32_bits(addr));
        amdgpu_ring_write(ring, 1);
-       ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
-       amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
+       /* this is the offset we need patch later */
+       ret = ring->wptr & ring->buf_mask;
+       /* insert dummy here and patch it later */
+       amdgpu_ring_write(ring, 0);
 
        return ret;
 }
 
-static void sdma_v5_2_ring_patch_cond_exec(struct amdgpu_ring *ring,
-                                          unsigned offset)
-{
-       unsigned cur;
-
-       BUG_ON(offset > ring->buf_mask);
-       BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
-       cur = (ring->wptr - 1) & ring->buf_mask;
-       if (cur > offset)
-               ring->ring[offset] = cur - offset;
-       else
-               ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
-}
-
 /**
  * sdma_v5_2_ring_get_rptr - get the current read pointer
  *
@@ -1722,7 +1710,6 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = {
        .emit_reg_wait = sdma_v5_2_ring_emit_reg_wait,
        .emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait,
        .init_cond_exec = sdma_v5_2_ring_init_cond_exec,
-       .patch_cond_exec = sdma_v5_2_ring_patch_cond_exec,
        .preempt_ib = sdma_v5_2_ring_preempt_ib,
 };
 
index 4874ded45653a80c24232be14962bbbb89d15408..361835a61f2e66b37045430dfab0e038f5e62283 100644 (file)
@@ -80,35 +80,23 @@ static u32 sdma_v6_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u3
        return base + internal_offset;
 }
 
-static unsigned sdma_v6_0_ring_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned sdma_v6_0_ring_init_cond_exec(struct amdgpu_ring *ring,
+                                             uint64_t addr)
 {
        unsigned ret;
 
        amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COND_EXE));
-       amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
-       amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+       amdgpu_ring_write(ring, lower_32_bits(addr));
+       amdgpu_ring_write(ring, upper_32_bits(addr));
        amdgpu_ring_write(ring, 1);
-       ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
-       amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
+       /* this is the offset we need patch later */
+       ret = ring->wptr & ring->buf_mask;
+       /* insert dummy here and patch it later */
+       amdgpu_ring_write(ring, 0);
 
        return ret;
 }
 
-static void sdma_v6_0_ring_patch_cond_exec(struct amdgpu_ring *ring,
-                                          unsigned offset)
-{
-       unsigned cur;
-
-       BUG_ON(offset > ring->buf_mask);
-       BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
-       cur = (ring->wptr - 1) & ring->buf_mask;
-       if (cur > offset)
-               ring->ring[offset] = cur - offset;
-       else
-               ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
-}
-
 /**
  * sdma_v6_0_ring_get_rptr - get the current read pointer
  *
@@ -1542,7 +1530,6 @@ static const struct amdgpu_ring_funcs sdma_v6_0_ring_funcs = {
        .emit_reg_wait = sdma_v6_0_ring_emit_reg_wait,
        .emit_reg_write_reg_wait = sdma_v6_0_ring_emit_reg_write_reg_wait,
        .init_cond_exec = sdma_v6_0_ring_init_cond_exec,
-       .patch_cond_exec = sdma_v6_0_ring_patch_cond_exec,
        .preempt_ib = sdma_v6_0_ring_preempt_ib,
 };