drm/amdkfd: use kiq to load the mqd of hiq queue for gfx v9 (v6)
authorAaron Liu <aaron.liu@amd.com>
Wed, 25 Dec 2019 07:50:51 +0000 (15:50 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 16 Jan 2020 18:34:50 +0000 (13:34 -0500)
There is an issue that CP will check the HIQ queue to be configured and mapped
with KIQ ring, otherwise, it will be unable to read back the secure buffer while
the gfxoff is enabled even with trusted IP blocks.

v1 -> v2:
- Fix to remove surplus set_resources packets.
- Fill the whole configuration in MQD.
- Change the author as Aaron because he addressed the key point of this issue.
- Add kiq ring lock.

v2 -> v3:
- Free the lock while in error return case.
- Remove the programming only needed by the queue is unmapped.

v3 -> v4:
- Remove doorbell programming because it's used for restarting queue.
- Remove CP scheduler programming because map_queue packet will handle this.

v4 -> v5:
- Remove cp_hqd_active because mec ucode will enable it while use map_queues.
- Revise goto out_unlock.
- Correct the right doorbell offset for HIQ that kfd driver assigned in the
  packet.

v5 -> v6:
- Merge Arcturus fix into this patch because it will get oops in Arcturus
  platform.

Reported-by: Lisa Saturday <Lisa.Saturday@amd.com>
Signed-off-by: Aaron Liu <aaron.liu@amd.com>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Reviewed-and-Tested-by: Aaron Liu <aaron.liu@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
drivers/gpu/drm/amd/include/kgd_kfd_interface.h

index e3cd73cac353ab5f9f90b1a995a881b268ba7022..4bcc175a149d070cde9de9d116edb2f2459fb329 100644 (file)
@@ -305,6 +305,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
        .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
        .init_interrupts = kgd_gfx_v9_init_interrupts,
        .hqd_load = kgd_gfx_v9_hqd_load,
+       .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
        .hqd_sdma_load = kgd_hqd_sdma_load,
        .hqd_dump = kgd_gfx_v9_hqd_dump,
        .hqd_sdma_dump = kgd_hqd_sdma_dump,
index 932ae85d97e2048a874c44abad6d2958c95a9cc6..8562afe5b761382c5492b230493c56998068bc1c 100644 (file)
@@ -103,13 +103,13 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
        lock_srbm(kgd, mec, pipe, queue_id, 0);
 }
 
-static uint32_t get_queue_mask(struct amdgpu_device *adev,
+static uint64_t get_queue_mask(struct amdgpu_device *adev,
                               uint32_t pipe_id, uint32_t queue_id)
 {
-       unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe +
-                           queue_id) & 31;
+       unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
+                       queue_id;
 
-       return ((uint32_t)1) << bit;
+       return 1ull << bit;
 }
 
 static void release_queue(struct kgd_dev *kgd)
@@ -258,21 +258,6 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 
        acquire_queue(kgd, pipe_id, queue_id);
 
-       /* HIQ is set during driver init period with vmid set to 0*/
-       if (m->cp_hqd_vmid == 0) {
-               uint32_t value, mec, pipe;
-
-               mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
-               pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
-
-               pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
-                       mec, pipe, queue_id);
-               value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
-               value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
-                       ((mec << 5) | (pipe << 3) | queue_id | 0x80));
-               WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
-       }
-
        /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
        mqd_hqd = &m->cp_mqd_base_addr_lo;
        hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
@@ -323,7 +308,7 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
                WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
                       upper_32_bits((uintptr_t)wptr));
                WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
-                      get_queue_mask(adev, pipe_id, queue_id));
+                      (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
        }
 
        /* Start the EOP fetcher */
@@ -339,6 +324,59 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
        return 0;
 }
 
+int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
+                           uint32_t pipe_id, uint32_t queue_id,
+                           uint32_t doorbell_off)
+{
+       struct amdgpu_device *adev = get_amdgpu_device(kgd);
+       struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+       struct v9_mqd *m;
+       uint32_t mec, pipe;
+       int r;
+
+       m = get_mqd(mqd);
+
+       acquire_queue(kgd, pipe_id, queue_id);
+
+       mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+       pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+       pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+                mec, pipe, queue_id);
+
+       spin_lock(&adev->gfx.kiq.ring_lock);
+       r = amdgpu_ring_alloc(kiq_ring, 7);
+       if (r) {
+               pr_err("Failed to alloc KIQ (%d).\n", r);
+               goto out_unlock;
+       }
+
+       amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+       amdgpu_ring_write(kiq_ring,
+                         PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+                         PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */
+                         PACKET3_MAP_QUEUES_QUEUE(queue_id) |
+                         PACKET3_MAP_QUEUES_PIPE(pipe) |
+                         PACKET3_MAP_QUEUES_ME((mec - 1)) |
+                         PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+                         PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+                         PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
+                         PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+       amdgpu_ring_write(kiq_ring,
+                         PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
+       amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
+       amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
+       amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
+       amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
+       amdgpu_ring_commit(kiq_ring);
+
+out_unlock:
+       spin_unlock(&adev->gfx.kiq.ring_lock);
+       release_queue(kgd);
+
+       return r;
+}
+
 int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd,
                        uint32_t pipe_id, uint32_t queue_id,
                        uint32_t (**dump)[2], uint32_t *n_regs)
@@ -684,6 +722,7 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
        .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
        .init_interrupts = kgd_gfx_v9_init_interrupts,
        .hqd_load = kgd_gfx_v9_hqd_load,
+       .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
        .hqd_sdma_load = kgd_hqd_sdma_load,
        .hqd_dump = kgd_gfx_v9_hqd_dump,
        .hqd_sdma_dump = kgd_hqd_sdma_dump,
index dfafa28b75590b6b58eb89d6b0904234b4b0bdc4..63d3e6683dfe1d800424990cb50da8ac26c175e8 100644 (file)
@@ -33,6 +33,9 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
                        uint32_t queue_id, uint32_t __user *wptr,
                        uint32_t wptr_shift, uint32_t wptr_mask,
                        struct mm_struct *mm);
+int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
+                           uint32_t pipe_id, uint32_t queue_id,
+                           uint32_t doorbell_off);
 int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd,
                        uint32_t pipe_id, uint32_t queue_id,
                        uint32_t (**dump)[2], uint32_t *n_regs);
index aa9010995eaf228469e43fafd4d4683c72bf3e4b..436b7f518979185392b264c374b9f0c950f973e1 100644 (file)
@@ -191,6 +191,14 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,
                                          wptr_shift, 0, mms);
 }
 
+static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
+                           uint32_t pipe_id, uint32_t queue_id,
+                           struct queue_properties *p, struct mm_struct *mms)
+{
+       return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id,
+                                             queue_id, p->doorbell_off);
+}
+
 static void update_mqd(struct mqd_manager *mm, void *mqd,
                      struct queue_properties *q)
 {
@@ -449,7 +457,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
                mqd->allocate_mqd = allocate_hiq_mqd;
                mqd->init_mqd = init_mqd_hiq;
                mqd->free_mqd = free_mqd_hiq_sdma;
-               mqd->load_mqd = load_mqd;
+               mqd->load_mqd = hiq_load_mqd_kiq;
                mqd->update_mqd = update_mqd;
                mqd->destroy_mqd = destroy_mqd;
                mqd->is_occupied = is_occupied;
index a01ef836ad5869d22bdf8415a4690314a3c357ff..a607b1034962902534a7ae6affbdab372eab3547 100644 (file)
@@ -256,6 +256,10 @@ struct kfd2kgd_calls {
                        uint32_t wptr_shift, uint32_t wptr_mask,
                        struct mm_struct *mm);
 
+       int (*hiq_mqd_load)(struct kgd_dev *kgd, void *mqd,
+                           uint32_t pipe_id, uint32_t queue_id,
+                           uint32_t doorbell_off);
+
        int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd,
                             uint32_t __user *wptr, struct mm_struct *mm);