drm/amdgpu/vpe: add collaborate mode support for VPE
authorLang Yu <Lang.Yu@amd.com>
Mon, 16 Jan 2023 05:11:59 +0000 (13:11 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 7 Mar 2024 20:33:01 +0000 (15:33 -0500)
Under clollaborate mode, multiple VPE instances share a ring buferr
and work together to finish a job.

Signed-off-by: Lang Yu <Lang.Yu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h

index 8cedee059c8a2a286c566e0bd8e8676a23175eb4..9d2415f26b7c2d0e43e9a59a515fc310b9289884 100644 (file)
@@ -457,6 +457,18 @@ static uint64_t vpe_get_csa_mc_addr(struct amdgpu_ring *ring, uint32_t vmid)
        return csa_mc_addr;
 }
 
+static void vpe_ring_emit_pred_exec(struct amdgpu_ring *ring,
+                                   uint32_t device_select,
+                                   uint32_t exec_count)
+{
+       if (!ring->adev->vpe.collaborate_mode)
+               return;
+
+       amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_PRED_EXE, 0) |
+                               (device_select << 16));
+       amdgpu_ring_write(ring, exec_count & 0x1fff);
+}
+
 static void vpe_ring_emit_ib(struct amdgpu_ring *ring,
                             struct amdgpu_job *job,
                             struct amdgpu_ib *ib,
@@ -481,6 +493,8 @@ static void vpe_ring_emit_fence(struct amdgpu_ring *ring, uint64_t addr,
 {
        int i = 0;
 
+       vpe_ring_emit_pred_exec(ring, 0, 10);
+
        do {
                /* write the fence */
                amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0));
@@ -505,6 +519,8 @@ static void vpe_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
        uint32_t seq = ring->fence_drv.sync_seq;
        uint64_t addr = ring->fence_drv.gpu_addr;
 
+       vpe_ring_emit_pred_exec(ring, 0, 6);
+
        /* wait for idle */
        amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_POLL_REGMEM,
                                VPE_POLL_REGMEM_SUBOP_REGMEM) |
@@ -520,6 +536,8 @@ static void vpe_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
 
 static void vpe_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)
 {
+       vpe_ring_emit_pred_exec(ring, 0, 3);
+
        amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_REG_WRITE, 0));
        amdgpu_ring_write(ring, reg << 2);
        amdgpu_ring_write(ring, val);
@@ -528,6 +546,8 @@ static void vpe_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t
 static void vpe_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
                                   uint32_t val, uint32_t mask)
 {
+       vpe_ring_emit_pred_exec(ring, 0, 6);
+
        amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_POLL_REGMEM,
                                VPE_POLL_REGMEM_SUBOP_REGMEM) |
                                VPE_CMD_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
index ee6db04cf27becfe96347eb5662a437f5d39f76f..231d86d0953e9105196b1cf56596072fba0b2b6d 100644 (file)
@@ -78,6 +78,7 @@ struct amdgpu_vpe {
        bool                            context_started;
 
        uint32_t                        num_instances;
+       bool                            collaborate_mode;
 };
 
 int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev);