From: Christian König Date: Thu, 9 Mar 2023 20:40:48 +0000 (-0500) Subject: drm/amdgpu: add gfx shadow CS IOCTL support X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=ac9287055ff16a092416c76a19006764e4c6a978;p=linux.git drm/amdgpu: add gfx shadow CS IOCTL support Add support for submitting the shadow update packet when submitting an IB. Needed for MCBP on GFX11. v2: update API for CSA (Alex) v3: fix ordering; SET_Q_PREEMPTION_MODE most come before COND_EXEC Add missing check for AMDGPU_CHUNK_ID_CP_GFX_SHADOW in amdgpu_cs_pass1() Only initialize shadow on first use (Alex) v4: Pass parameters rather than job to new ring callback (Alex) v5: squash in change to call SET_Q_PREEMPTION_MODE/COND_EXEC before RELEASE_MEM to complete the UMDs use of the shadow (Alex) Reviewed-by: Christian König Signed-off-by: Christian König Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index a8ec5ff41fc05..c3b3287dc29ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -288,6 +288,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: + case AMDGPU_CHUNK_ID_CP_GFX_SHADOW: break; default: @@ -578,6 +579,26 @@ static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p, return 0; } +static int amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata; + int i; + + if (shadow->flags & ~AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW) + return -EINVAL; + + for (i = 0; i < p->gang_size; ++i) { + p->jobs[i]->shadow_va = shadow->shadow_va; + p->jobs[i]->csa_va = shadow->csa_va; + p->jobs[i]->gds_va = shadow->gds_va; + p->jobs[i]->init_shadow = + shadow->flags & AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW; + } + + return 0; +} + static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p) { unsigned int ce_preempt = 0, de_preempt = 0; @@ -620,6 +641,11 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p) if (r) return r; break; + case AMDGPU_CHUNK_ID_CP_GFX_SHADOW: + r = amdgpu_cs_p2_shadow(p, chunk); + if (r) + return r; + break; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index ef96ff2f42729..aebc0e5bddc6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -136,7 +136,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, uint64_t fence_ctx; uint32_t status = 0, alloc_size; unsigned fence_flags = 0; - bool secure; + bool secure, init_shadow; + u64 shadow_va, csa_va, gds_va; + int vmid = AMDGPU_JOB_GET_VMID(job); unsigned i; int r = 0; @@ -150,9 +152,17 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, vm = job->vm; fence_ctx = job->base.s_fence ? job->base.s_fence->scheduled.context : 0; + shadow_va = job->shadow_va; + csa_va = job->csa_va; + gds_va = job->gds_va; + init_shadow = job->init_shadow; } else { vm = NULL; fence_ctx = 0; + shadow_va = 0; + csa_va = 0; + gds_va = 0; + init_shadow = false; } if (!ring->sched.ready && !ring->is_mes_queue) { @@ -212,6 +222,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, } amdgpu_ring_ib_begin(ring); + + if (job && ring->funcs->emit_gfx_shadow) + amdgpu_ring_emit_gfx_shadow(ring, shadow_va, csa_va, gds_va, + init_shadow, vmid); + if (job && ring->funcs->init_cond_exec) patch_offset = amdgpu_ring_init_cond_exec(ring); @@ -263,6 +278,18 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, fence_flags | AMDGPU_FENCE_FLAG_64BIT); } + if (ring->funcs->emit_gfx_shadow) { + amdgpu_ring_emit_gfx_shadow(ring, 0, 0, 0, false, 0); + + if (ring->funcs->init_cond_exec) { + unsigned ce_offset = ~0; + + ce_offset = amdgpu_ring_init_cond_exec(ring); + if (ce_offset != ~0 && ring->funcs->patch_cond_exec) + amdgpu_ring_patch_cond_exec(ring, ce_offset); + } + } + r = amdgpu_fence_emit(ring, f, job, fence_flags); if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index 52f2e313ea17f..3f9804f956c9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -67,6 +67,12 @@ struct amdgpu_job { uint64_t uf_addr; uint64_t uf_sequence; + /* virtual addresses for shadow/GDS/CSA */ + uint64_t shadow_va; + uint64_t csa_va; + uint64_t gds_va; + bool init_shadow; + /* job_run_counter >= 1 means a resubmit job */ uint32_t job_run_counter; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 8eca6532ed196..2aa6cc1c4212c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -212,6 +212,8 @@ struct amdgpu_ring_funcs { void (*end_use)(struct amdgpu_ring *ring); void (*emit_switch_buffer) (struct amdgpu_ring *ring); void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags); + void (*emit_gfx_shadow)(struct amdgpu_ring *ring, u64 shadow_va, u64 csa_va, + u64 gds_va, bool init_shadow, int vmid); void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t reg_val_offs); void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); @@ -309,6 +311,7 @@ struct amdgpu_ring { #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r)) #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r)) #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d)) +#define amdgpu_ring_emit_gfx_shadow(r, s, c, g, i, v) ((r)->funcs->emit_gfx_shadow((r), (s), (c), (g), (i), (v))) #define amdgpu_ring_emit_rreg(r, d, o) (r)->funcs->emit_rreg((r), (d), (o)) #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))