struct amdgpu_ib *ib = &ibs[0];
struct dma_fence *tmp = NULL;
bool need_ctx_switch;
- unsigned int patch_offset = ~0;
struct amdgpu_vm *vm;
uint64_t fence_ctx;
uint32_t status = 0, alloc_size;
bool secure, init_shadow;
u64 shadow_va, csa_va, gds_va;
int vmid = AMDGPU_JOB_GET_VMID(job);
+ bool need_pipe_sync = false;
+ unsigned int cond_exec;
unsigned int i;
int r = 0;
- bool need_pipe_sync = false;
if (num_ibs == 0)
return -EINVAL;
init_shadow, vmid);
if (ring->funcs->init_cond_exec)
- patch_offset = amdgpu_ring_init_cond_exec(ring);
+ cond_exec = amdgpu_ring_init_cond_exec(ring,
+ ring->cond_exe_gpu_addr);
amdgpu_device_flush_hdp(adev, ring);
fence_flags | AMDGPU_FENCE_FLAG_64BIT);
}
- if (ring->funcs->emit_gfx_shadow) {
+ if (ring->funcs->emit_gfx_shadow && ring->funcs->init_cond_exec) {
amdgpu_ring_emit_gfx_shadow(ring, 0, 0, 0, false, 0);
-
- if (ring->funcs->init_cond_exec) {
- unsigned int ce_offset = ~0;
-
- ce_offset = amdgpu_ring_init_cond_exec(ring);
- if (ce_offset != ~0 && ring->funcs->patch_cond_exec)
- amdgpu_ring_patch_cond_exec(ring, ce_offset);
- }
+ amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr);
}
r = amdgpu_fence_emit(ring, f, job, fence_flags);
if (ring->funcs->insert_end)
ring->funcs->insert_end(ring);
- if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
- amdgpu_ring_patch_cond_exec(ring, patch_offset);
+ amdgpu_ring_patch_cond_exec(ring, cond_exec);
ring->current_ctx = fence_ctx;
if (vm && ring->funcs->emit_switch_buffer)
void (*insert_end)(struct amdgpu_ring *ring);
/* pad the indirect buffer to the necessary number of dw */
void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
- unsigned (*init_cond_exec)(struct amdgpu_ring *ring);
- void (*patch_cond_exec)(struct amdgpu_ring *ring, unsigned offset);
+ unsigned (*init_cond_exec)(struct amdgpu_ring *ring, uint64_t addr);
/* note usage for clock and power gating */
void (*begin_use)(struct amdgpu_ring *ring);
void (*end_use)(struct amdgpu_ring *ring);
#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
-#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
-#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
+#define amdgpu_ring_init_cond_exec(r, a) (r)->funcs->init_cond_exec((r), (a))
#define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
#define amdgpu_ring_patch_cntl(r, o) ((r)->funcs->patch_cntl((r), (o)))
#define amdgpu_ring_patch_ce(r, o) ((r)->funcs->patch_ce((r), (o)))
ring->count_dw -= count_dw;
}
+/**
+ * amdgpu_ring_patch_cond_exec - patch dw count of conditional execute
+ * @ring: amdgpu_ring structure
+ * @offset: offset returned by amdgpu_ring_init_cond_exec
+ *
+ * Calculate the dw count and patch it into a cond_exec command.
+ */
+static inline void amdgpu_ring_patch_cond_exec(struct amdgpu_ring *ring,
+ unsigned int offset)
+{
+ unsigned cur;
+
+ if (!ring->funcs->init_cond_exec)
+ return;
+
+ WARN_ON(offset > ring->buf_mask);
+ WARN_ON(ring->ring[offset] != 0);
+
+ cur = (ring->wptr - 1) & ring->buf_mask;
+ if (cur < offset)
+ cur += ring->ring_size >> 2;
+ ring->ring[offset] = cur - offset;
+}
+
#define amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset) \
(ring->is_mes_queue && ring->mes_ctx ? \
(ring->mes_ctx->meta_data_gpu_addr + offset) : 0)
bool vm_flush_needed = job->vm_needs_flush;
struct dma_fence *fence = NULL;
bool pasid_mapping_needed = false;
- unsigned patch_offset = 0;
+ unsigned int patch;
int r;
if (amdgpu_vmid_had_gpu_reset(adev, id)) {
amdgpu_ring_ib_begin(ring);
if (ring->funcs->init_cond_exec)
- patch_offset = amdgpu_ring_init_cond_exec(ring);
+ patch = amdgpu_ring_init_cond_exec(ring,
+ ring->cond_exe_gpu_addr);
if (need_pipe_sync)
amdgpu_ring_emit_pipeline_sync(ring);
}
dma_fence_put(fence);
- if (ring->funcs->patch_cond_exec)
- amdgpu_ring_patch_cond_exec(ring, patch_offset);
+ amdgpu_ring_patch_cond_exec(ring, patch);
/* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
if (ring->funcs->emit_switch_buffer) {
amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
}
-static unsigned int vpe_ring_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned int vpe_ring_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
{
unsigned int ret;
amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_COND_EXE, 0));
- amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
amdgpu_ring_write(ring, 1);
- ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
- amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
+ ret = ring->wptr & ring->buf_mask;
+ amdgpu_ring_write(ring, 0);
return ret;
}
-static void vpe_ring_patch_cond_exec(struct amdgpu_ring *ring, unsigned int offset)
-{
- unsigned int cur;
-
- WARN_ON_ONCE(offset > ring->buf_mask);
- WARN_ON_ONCE(ring->ring[offset] != 0x55aa55aa);
-
- cur = (ring->wptr - 1) & ring->buf_mask;
- if (cur > offset)
- ring->ring[offset] = cur - offset;
- else
- ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
-}
-
static int vpe_ring_preempt_ib(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
.test_ring = vpe_ring_test_ring,
.test_ib = vpe_ring_test_ib,
.init_cond_exec = vpe_ring_init_cond_exec,
- .patch_cond_exec = vpe_ring_patch_cond_exec,
.preempt_ib = vpe_ring_preempt_ib,
.begin_use = vpe_ring_begin_use,
.end_use = vpe_ring_end_use,
amdgpu_ring_write(ring, 0);
}
-static unsigned int gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned int gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
{
unsigned int ret;
amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
- amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, 0);
ret = ring->wptr & ring->buf_mask;
- amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
+ /* patch dummy value later */
+ amdgpu_ring_write(ring, 0);
return ret;
}
-static void gfx_v10_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned int offset)
-{
- unsigned int cur;
-
- BUG_ON(offset > ring->buf_mask);
- BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
- cur = (ring->wptr - 1) & ring->buf_mask;
- if (likely(cur > offset))
- ring->ring[offset] = cur - offset;
- else
- ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
-}
-
static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring)
{
int i, r = 0;
.emit_switch_buffer = gfx_v10_0_ring_emit_sb,
.emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl,
.init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
- .patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
.preempt_ib = gfx_v10_0_ring_preempt_ib,
.emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
.emit_wreg = gfx_v10_0_ring_emit_wreg,
PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0);
}
-static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
{
unsigned ret;
amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
- amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, 0);
ret = ring->wptr & ring->buf_mask;
- amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
+ /* patch dummy value later */
+ amdgpu_ring_write(ring, 0);
return ret;
}
-static void gfx_v11_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
-{
- unsigned cur;
- BUG_ON(offset > ring->buf_mask);
- BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
- cur = (ring->wptr - 1) & ring->buf_mask;
- if (likely(cur > offset))
- ring->ring[offset] = cur - offset;
- else
- ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
-}
-
static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
{
int i, r = 0;
.emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
.emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow,
.init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
- .patch_cond_exec = gfx_v11_0_ring_emit_patch_cond_exec,
.preempt_ib = gfx_v11_0_ring_preempt_ib,
.emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl,
.emit_wreg = gfx_v11_0_ring_emit_wreg,
amdgpu_ring_write(ring, 0);
}
-static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
{
unsigned ret;
amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
- amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, 0);
ret = ring->wptr & ring->buf_mask;
- amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
+ /* patch dummy value later */
+ amdgpu_ring_write(ring, 0);
return ret;
}
-static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
-{
- unsigned cur;
-
- BUG_ON(offset > ring->buf_mask);
- BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
- cur = (ring->wptr & ring->buf_mask) - 1;
- if (likely(cur > offset))
- ring->ring[offset] = cur - offset;
- else
- ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
-}
-
static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
uint32_t reg_val_offs)
{
.emit_switch_buffer = gfx_v8_ring_emit_sb,
.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
.init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
- .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
.emit_wreg = gfx_v8_0_ring_emit_wreg,
.soft_recovery = gfx_v8_0_ring_soft_recovery,
.emit_mem_sync = gfx_v8_0_emit_mem_sync,
amdgpu_ring_write(ring, 0);
}
-static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
{
unsigned ret;
amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
- amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, 0);
ret = ring->wptr & ring->buf_mask;
- amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
+ /* patch dummy value later */
+ amdgpu_ring_write(ring, 0);
return ret;
}
-static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
-{
- unsigned cur;
- BUG_ON(offset > ring->buf_mask);
- BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
- cur = (ring->wptr - 1) & ring->buf_mask;
- if (likely(cur > offset))
- ring->ring[offset] = cur - offset;
- else
- ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
-}
-
static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
uint32_t reg_val_offs)
{
.emit_switch_buffer = gfx_v9_ring_emit_sb,
.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
- .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
.preempt_ib = gfx_v9_0_ring_preempt_ib,
.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_switch_buffer = gfx_v9_ring_emit_sb,
.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
- .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
return ret;
}
-static unsigned sdma_v5_0_ring_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned sdma_v5_0_ring_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
{
unsigned ret;
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE));
- amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
amdgpu_ring_write(ring, 1);
- ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
- amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
+ /* this is the offset we need patch later */
+ ret = ring->wptr & ring->buf_mask;
+ /* insert dummy here and patch it later */
+ amdgpu_ring_write(ring, 0);
return ret;
}
-static void sdma_v5_0_ring_patch_cond_exec(struct amdgpu_ring *ring,
- unsigned offset)
-{
- unsigned cur;
-
- BUG_ON(offset > ring->buf_mask);
- BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
- cur = (ring->wptr - 1) & ring->buf_mask;
- if (cur > offset)
- ring->ring[offset] = cur - offset;
- else
- ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
-}
-
/**
* sdma_v5_0_ring_get_rptr - get the current read pointer
*
.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
- .patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
.preempt_ib = sdma_v5_0_ring_preempt_ib,
};
return base + internal_offset;
}
-static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
{
unsigned ret;
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE));
- amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
amdgpu_ring_write(ring, 1);
- ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
- amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
+ /* this is the offset we need patch later */
+ ret = ring->wptr & ring->buf_mask;
+ /* insert dummy here and patch it later */
+ amdgpu_ring_write(ring, 0);
return ret;
}
-static void sdma_v5_2_ring_patch_cond_exec(struct amdgpu_ring *ring,
- unsigned offset)
-{
- unsigned cur;
-
- BUG_ON(offset > ring->buf_mask);
- BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
- cur = (ring->wptr - 1) & ring->buf_mask;
- if (cur > offset)
- ring->ring[offset] = cur - offset;
- else
- ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
-}
-
/**
* sdma_v5_2_ring_get_rptr - get the current read pointer
*
.emit_reg_wait = sdma_v5_2_ring_emit_reg_wait,
.emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait,
.init_cond_exec = sdma_v5_2_ring_init_cond_exec,
- .patch_cond_exec = sdma_v5_2_ring_patch_cond_exec,
.preempt_ib = sdma_v5_2_ring_preempt_ib,
};
return base + internal_offset;
}
-static unsigned sdma_v6_0_ring_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned sdma_v6_0_ring_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
{
unsigned ret;
amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COND_EXE));
- amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
amdgpu_ring_write(ring, 1);
- ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
- amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
+ /* this is the offset we need patch later */
+ ret = ring->wptr & ring->buf_mask;
+ /* insert dummy here and patch it later */
+ amdgpu_ring_write(ring, 0);
return ret;
}
-static void sdma_v6_0_ring_patch_cond_exec(struct amdgpu_ring *ring,
- unsigned offset)
-{
- unsigned cur;
-
- BUG_ON(offset > ring->buf_mask);
- BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
- cur = (ring->wptr - 1) & ring->buf_mask;
- if (cur > offset)
- ring->ring[offset] = cur - offset;
- else
- ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
-}
-
/**
* sdma_v6_0_ring_get_rptr - get the current read pointer
*
.emit_reg_wait = sdma_v6_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = sdma_v6_0_ring_emit_reg_write_reg_wait,
.init_cond_exec = sdma_v6_0_ring_init_cond_exec,
- .patch_cond_exec = sdma_v6_0_ring_patch_cond_exec,
.preempt_ib = sdma_v6_0_ring_preempt_ib,
};