lock_srbm(kgd, mec, pipe, queue_id, 0);
 }
 
-static uint32_t get_queue_mask(struct amdgpu_device *adev,
+static uint64_t get_queue_mask(struct amdgpu_device *adev,
                               uint32_t pipe_id, uint32_t queue_id)
 {
-       unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe +
-                           queue_id) & 31;
+       unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
+                       queue_id;
 
-       return ((uint32_t)1) << bit;
+       return 1ull << bit;
 }
 
 static void release_queue(struct kgd_dev *kgd)
 
        acquire_queue(kgd, pipe_id, queue_id);
 
-       /* HIQ is set during driver init period with vmid set to 0*/
-       if (m->cp_hqd_vmid == 0) {
-               uint32_t value, mec, pipe;
-
-               mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
-               pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
-
-               pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
-                       mec, pipe, queue_id);
-               value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
-               value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
-                       ((mec << 5) | (pipe << 3) | queue_id | 0x80));
-               WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
-       }
-
        /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
        mqd_hqd = &m->cp_mqd_base_addr_lo;
        hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
                WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
                       upper_32_bits((uintptr_t)wptr));
                WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
-                      get_queue_mask(adev, pipe_id, queue_id));
+                      (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
        }
 
        /* Start the EOP fetcher */
        return 0;
 }
 
+int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
+                           uint32_t pipe_id, uint32_t queue_id,
+                           uint32_t doorbell_off)
+{
+       struct amdgpu_device *adev = get_amdgpu_device(kgd);
+       struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+       struct v9_mqd *m;
+       uint32_t mec, pipe;
+       int r;
+
+       m = get_mqd(mqd);
+
+       acquire_queue(kgd, pipe_id, queue_id);
+
+       mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+       pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+       pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+                mec, pipe, queue_id);
+
+       spin_lock(&adev->gfx.kiq.ring_lock);
+       r = amdgpu_ring_alloc(kiq_ring, 7);
+       if (r) {
+               pr_err("Failed to alloc KIQ (%d).\n", r);
+               goto out_unlock;
+       }
+
+       amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+       amdgpu_ring_write(kiq_ring,
+                         PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+                         PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */
+                         PACKET3_MAP_QUEUES_QUEUE(queue_id) |
+                         PACKET3_MAP_QUEUES_PIPE(pipe) |
+                         PACKET3_MAP_QUEUES_ME((mec - 1)) |
+                         PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+                         PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+                         PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
+                         PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+       amdgpu_ring_write(kiq_ring,
+                         PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
+       amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
+       amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
+       amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
+       amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
+       amdgpu_ring_commit(kiq_ring);
+
+out_unlock:
+       spin_unlock(&adev->gfx.kiq.ring_lock);
+       release_queue(kgd);
+
+       return r;
+}
+
 int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd,
                        uint32_t pipe_id, uint32_t queue_id,
                        uint32_t (**dump)[2], uint32_t *n_regs)
        .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
        .init_interrupts = kgd_gfx_v9_init_interrupts,
        .hqd_load = kgd_gfx_v9_hqd_load,
+       .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
        .hqd_sdma_load = kgd_hqd_sdma_load,
        .hqd_dump = kgd_gfx_v9_hqd_dump,
        .hqd_sdma_dump = kgd_hqd_sdma_dump,