drm/amdgpu: add PSP loading support for UMSCH
authorLang Yu <Lang.Yu@amd.com>
Mon, 19 Jun 2023 00:58:32 +0000 (08:58 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 31 Aug 2023 21:14:21 +0000 (17:14 -0400)
Add front door loading support.

Signed-off-by: Lang Yu <Lang.Yu@amd.com>
Reviewed-by: Leo Liu <leo.liu@amd.com>
Reviewed-by: Veerabadhran Gopalakrishnan <Veerabadhran.Gopalakrishnan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h
drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c

index d8bdcaf8c4db9246397b75a47e50e2beb777be33..60bc9d4f0761268b0a9e63d94006c537b7f993af 100644 (file)
@@ -2399,6 +2399,15 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
        case AMDGPU_UCODE_ID_VPE:
                *type = GFX_FW_TYPE_VPE;
                break;
+       case AMDGPU_UCODE_ID_UMSCH_MM_UCODE:
+               *type = GFX_FW_TYPE_UMSCH_UCODE;
+               break;
+       case AMDGPU_UCODE_ID_UMSCH_MM_DATA:
+               *type = GFX_FW_TYPE_UMSCH_DATA;
+               break;
+       case AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER:
+               *type = GFX_FW_TYPE_UMSCH_CMD_BUFFER;
+               break;
        case AMDGPU_UCODE_ID_MAXIMUM:
        default:
                return -EINVAL;
index e3b52f4436a777991445a80ab66f30fe72959f4d..eecb0efeb15fe73793de5b18b0beedecf1614174 100644 (file)
@@ -664,6 +664,16 @@ const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id)
                return "DMCUB";
        case AMDGPU_UCODE_ID_CAP:
                return "CAP";
+       case AMDGPU_UCODE_ID_VPE_CTX:
+               return "VPE_CTX";
+       case AMDGPU_UCODE_ID_VPE_CTL:
+               return "VPE_CTL";
+       case AMDGPU_UCODE_ID_VPE:
+               return "VPE";
+       case AMDGPU_UCODE_ID_UMSCH_MM_UCODE:
+               return "UMSCH_MM_UCODE";
+       case AMDGPU_UCODE_ID_UMSCH_MM_DATA:
+               return "UMSCH_MM_DATA";
        default:
                return "UNKNOWN UCODE";
        }
@@ -750,6 +760,7 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
        const struct sdma_firmware_header_v2_0 *sdma_hdr = NULL;
        const struct imu_firmware_header_v1_0 *imu_hdr = NULL;
        const struct vpe_firmware_header_v1_0 *vpe_hdr = NULL;
+       const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr = NULL;
        u8 *ucode_addr;
 
        if (!ucode->fw)
@@ -962,6 +973,16 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
                        ucode_addr = (u8 *)ucode->fw->data +
                                le32_to_cpu(vpe_hdr->ctl_ucode_offset);
                        break;
+               case AMDGPU_UCODE_ID_UMSCH_MM_UCODE:
+                       ucode->ucode_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_size_bytes);
+                       ucode_addr = (u8 *)ucode->fw->data +
+                               le32_to_cpu(umsch_mm_hdr->header.ucode_array_offset_bytes);
+                       break;
+               case AMDGPU_UCODE_ID_UMSCH_MM_DATA:
+                       ucode->ucode_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_size_bytes);
+                       ucode_addr = (u8 *)ucode->fw->data +
+                               le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_offset_bytes);
+                       break;
                default:
                        ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
                        ucode_addr = (u8 *)ucode->fw->data +
index e153dd3d6b888a6af33c5609e34ae189160e31ad..ae5fa61d289098026877b0b30553858e68f650fc 100644 (file)
@@ -507,6 +507,9 @@ enum AMDGPU_UCODE_ID {
        AMDGPU_UCODE_ID_VPE_CTX,
        AMDGPU_UCODE_ID_VPE_CTL,
        AMDGPU_UCODE_ID_VPE,
+       AMDGPU_UCODE_ID_UMSCH_MM_UCODE,
+       AMDGPU_UCODE_ID_UMSCH_MM_DATA,
+       AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER,
        AMDGPU_UCODE_ID_MAXIMUM,
 };
 
index 6f20b42c98d2cff4eef8ce56fec75437824d1e13..7341808a3558d4fdd3fb3d7ea1cd5d8a11dc05d3 100644 (file)
@@ -76,6 +76,17 @@ struct umsch_mm_test {
        uint32_t                num_queues;
 };
 
+int umsch_mm_psp_update_sram(struct amdgpu_device *adev, u32 ucode_size)
+{
+       struct amdgpu_firmware_info ucode = {
+               .ucode_id = AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER,
+               .mc_addr = adev->umsch_mm.cmd_buf_gpu_addr,
+               .ucode_size = ucode_size,
+       };
+
+       return psp_execute_ip_fw_load(&adev->psp, &ucode);
+}
+
 static int map_ring_data(struct amdgpu_device *adev, struct amdgpu_vm *vm,
                          struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va,
                          uint64_t addr, uint32_t size)
@@ -600,6 +611,22 @@ int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch)
                le32_to_cpu(umsch_mm_hdr->umsch_mm_data_start_addr_lo) |
                ((uint64_t)(le32_to_cpu(umsch_mm_hdr->umsch_mm_data_start_addr_hi)) << 32);
 
+       if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+               struct amdgpu_firmware_info *info;
+
+               info = &adev->firmware.ucode[AMDGPU_UCODE_ID_UMSCH_MM_UCODE];
+               info->ucode_id = AMDGPU_UCODE_ID_UMSCH_MM_UCODE;
+               info->fw = adev->umsch_mm.fw;
+               adev->firmware.fw_size +=
+                       ALIGN(le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_size_bytes), PAGE_SIZE);
+
+               info = &adev->firmware.ucode[AMDGPU_UCODE_ID_UMSCH_MM_DATA];
+               info->ucode_id = AMDGPU_UCODE_ID_UMSCH_MM_DATA;
+               info->fw = adev->umsch_mm.fw;
+               adev->firmware.fw_size +=
+                       ALIGN(le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_size_bytes), PAGE_SIZE);
+       }
+
        return 0;
 }
 
@@ -667,6 +694,17 @@ int amdgpu_umsch_mm_allocate_ucode_data_buffer(struct amdgpu_umsch_mm *umsch)
        return 0;
 }
 
+void* amdgpu_umsch_mm_add_cmd(struct amdgpu_umsch_mm *umsch,
+                             void* cmd_ptr, uint32_t reg_offset, uint32_t reg_data)
+{
+       uint32_t* ptr = (uint32_t *)cmd_ptr;
+
+       *ptr++ = (reg_offset << 2);
+       *ptr++ = reg_data;
+
+       return ptr;
+}
+
 static void umsch_mm_agdb_index_init(struct amdgpu_device *adev)
 {
        uint32_t umsch_mm_agdb_start;
@@ -697,6 +735,17 @@ static int umsch_mm_init(struct amdgpu_device *adev)
        adev->umsch_mm.sch_ctx_gpu_addr = adev->wb.gpu_addr +
                                          (adev->umsch_mm.wb_index * 4);
 
+       r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+                                   AMDGPU_GEM_DOMAIN_GTT,
+                                   &adev->umsch_mm.cmd_buf_obj,
+                                   &adev->umsch_mm.cmd_buf_gpu_addr,
+                                   (void **)&adev->umsch_mm.cmd_buf_ptr);
+       if (r) {
+               dev_err(adev->dev, "failed to allocate cmdbuf bo %d\n", r);
+               amdgpu_device_wb_free(adev, adev->umsch_mm.wb_index);
+               return r;
+       }
+
        mutex_init(&adev->umsch_mm.mutex_hidden);
 
        umsch_mm_agdb_index_init(adev);
@@ -760,6 +809,11 @@ static int umsch_mm_sw_fini(void *handle)
        amdgpu_ring_fini(&adev->umsch_mm.ring);
 
        mutex_destroy(&adev->umsch_mm.mutex_hidden);
+
+       amdgpu_bo_free_kernel(&adev->umsch_mm.cmd_buf_obj,
+                             &adev->umsch_mm.cmd_buf_gpu_addr,
+                             (void **)&adev->umsch_mm.cmd_buf_ptr);
+
        amdgpu_device_wb_free(adev, adev->umsch_mm.wb_index);
 
        return 0;
index 660150c630e7b9fe2f86cb8ac3e99b4ec48813f3..d83fdf2da4644b26ec5d02ef7c0f44831ee7c76a 100644 (file)
@@ -147,6 +147,10 @@ struct amdgpu_umsch_mm {
        uint64_t                        data_start_addr;
        uint32_t                        data_size;
 
+       struct amdgpu_bo                *cmd_buf_obj;
+       uint64_t                        cmd_buf_gpu_addr;
+       uint32_t                        *cmd_buf_ptr;
+
        uint32_t                        wb_index;
        uint64_t                        sch_ctx_gpu_addr;
        uint32_t                        *sch_ctx_cpu_addr;
@@ -163,12 +167,16 @@ struct amdgpu_umsch_mm {
        struct mutex                    mutex_hidden;
 };
 
+int umsch_mm_psp_update_sram(struct amdgpu_device *adev, u32 ucode_size);
+
 int amdgpu_umsch_mm_submit_pkt(struct amdgpu_umsch_mm *umsch, void *pkt, int ndws);
 int amdgpu_umsch_mm_query_fence(struct amdgpu_umsch_mm *umsch);
 
 int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch);
 int amdgpu_umsch_mm_allocate_ucode_buffer(struct amdgpu_umsch_mm *umsch);
 int amdgpu_umsch_mm_allocate_ucode_data_buffer(struct amdgpu_umsch_mm *umsch);
+void* amdgpu_umsch_mm_add_cmd(struct amdgpu_umsch_mm *umsch,
+                             void* cmd_ptr, uint32_t reg_offset, uint32_t reg_data);
 
 int amdgpu_umsch_mm_ring_init(struct amdgpu_umsch_mm *umsch);
 
index 0683a8cb044d5c0c36f9a4b3b2ced9ad40976b55..d3dec5f21bec53b3be83f8e239b7d14d173f1120 100644 (file)
 #include "umsch_mm_4_0_api_def.h"
 #include "umsch_mm_v4_0.h"
 
+#define WREG32_SOC15_UMSCH(ptr, reg, value) \
+({     void *ret = ptr;                                                                                \
+       do {                                                                                            \
+               uint32_t reg_offset = adev->reg_offset[VCN_HWIP][0][reg##_BASE_IDX] + reg;              \
+               if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)                                     \
+                       ret = amdgpu_umsch_mm_add_cmd((&adev->umsch_mm), (ptr), (reg_offset), (value)); \
+               else                                                                                    \
+                       WREG32(reg_offset, value);                                                      \
+       } while (0);                                                                                    \
+       ret;                                                                                            \
+})
+
 static int umsch_mm_v4_0_load_microcode(struct amdgpu_umsch_mm *umsch)
 {
        struct amdgpu_device *adev = umsch->ring.adev;
+       void* ptr = umsch->cmd_buf_ptr;
        uint32_t data;
        int r;
 
@@ -50,88 +63,95 @@ static int umsch_mm_v4_0_load_microcode(struct amdgpu_umsch_mm *umsch)
 
        data = RREG32_SOC15(VCN, 0, regUMSCH_MES_RESET_CTRL);
        data = REG_SET_FIELD(data, UMSCH_MES_RESET_CTRL, MES_CORE_SOFT_RESET, 0);
-       WREG32_SOC15(VCN, 0, regUMSCH_MES_RESET_CTRL, data);
+       ptr = WREG32_SOC15_UMSCH(ptr, regUMSCH_MES_RESET_CTRL, data);
 
        data = RREG32_SOC15(VCN, 0, regVCN_MES_CNTL);
        data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_INVALIDATE_ICACHE, 1);
        data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_PIPE0_RESET, 1);
        data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_PIPE0_ACTIVE, 0);
        data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_HALT, 1);
-       WREG32_SOC15(VCN, 0, regVCN_MES_CNTL, data);
+       ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_CNTL, data);
 
        data = RREG32_SOC15(VCN, 0, regVCN_MES_IC_BASE_CNTL);
        data = REG_SET_FIELD(data, VCN_MES_IC_BASE_CNTL, VMID, 0);
        data = REG_SET_FIELD(data, VCN_MES_IC_BASE_CNTL, EXE_DISABLE, 0);
        data = REG_SET_FIELD(data, VCN_MES_IC_BASE_CNTL, CACHE_POLICY, 0);
-       WREG32_SOC15(VCN, 0, regVCN_MES_IC_BASE_CNTL, data);
+       ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_IC_BASE_CNTL, data);
+
+
+       ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_INTR_ROUTINE_START,
+                                lower_32_bits(adev->umsch_mm.irq_start_addr >> 2));
 
-       WREG32_SOC15(VCN, 0, regVCN_MES_INTR_ROUTINE_START,
-                    lower_32_bits(adev->umsch_mm.irq_start_addr >> 2));
-       WREG32_SOC15(VCN, 0, regVCN_MES_INTR_ROUTINE_START_HI,
-                    upper_32_bits(adev->umsch_mm.irq_start_addr >> 2));
+       ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_INTR_ROUTINE_START_HI,
+                                upper_32_bits(adev->umsch_mm.irq_start_addr >> 2));
 
-       WREG32_SOC15(VCN, 0, regVCN_MES_PRGRM_CNTR_START,
-                    lower_32_bits(adev->umsch_mm.uc_start_addr >> 2));
-       WREG32_SOC15(VCN, 0, regVCN_MES_PRGRM_CNTR_START_HI,
-                    upper_32_bits(adev->umsch_mm.uc_start_addr >> 2));
+       ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_PRGRM_CNTR_START,
+                                lower_32_bits(adev->umsch_mm.uc_start_addr >> 2));
+       ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_PRGRM_CNTR_START_HI,
+                                upper_32_bits(adev->umsch_mm.uc_start_addr >> 2));
 
-       WREG32_SOC15(VCN, 0, regVCN_MES_LOCAL_INSTR_BASE_LO, 0);
-       WREG32_SOC15(VCN, 0, regVCN_MES_LOCAL_INSTR_BASE_HI, 0);
+       ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_LOCAL_INSTR_BASE_LO, 0);
+       ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_LOCAL_INSTR_BASE_HI, 0);
 
        data = adev->umsch_mm.uc_start_addr + adev->umsch_mm.ucode_size - 1;
-       WREG32_SOC15(VCN, 0, regVCN_MES_LOCAL_INSTR_MASK_LO, lower_32_bits(data));
-       WREG32_SOC15(VCN, 0, regVCN_MES_LOCAL_INSTR_MASK_HI, upper_32_bits(data));
+       ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_LOCAL_INSTR_MASK_LO, lower_32_bits(data));
+       ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_LOCAL_INSTR_MASK_HI, upper_32_bits(data));
 
-       WREG32_SOC15(VCN, 0, regVCN_MES_IC_BASE_LO,
-                    lower_32_bits(adev->umsch_mm.ucode_fw_gpu_addr));
-       WREG32_SOC15(VCN, 0, regVCN_MES_IC_BASE_HI,
-                    upper_32_bits(adev->umsch_mm.ucode_fw_gpu_addr));
+       ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_IC_BASE_LO,
+                                lower_32_bits(adev->umsch_mm.ucode_fw_gpu_addr));
+       ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_IC_BASE_HI,
+                                upper_32_bits(adev->umsch_mm.ucode_fw_gpu_addr));
 
-       WREG32_SOC15(VCN, 0, regVCN_MES_MIBOUND_LO, 0x1FFFFF);
+       ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_MIBOUND_LO, 0x1FFFFF);
 
-       WREG32_SOC15(VCN, 0, regVCN_MES_LOCAL_BASE0_LO,
-                    lower_32_bits(adev->umsch_mm.data_start_addr));
-       WREG32_SOC15(VCN, 0, regVCN_MES_LOCAL_BASE0_HI,
-                    upper_32_bits(adev->umsch_mm.data_start_addr));
+       ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_LOCAL_BASE0_LO,
+                                lower_32_bits(adev->umsch_mm.data_start_addr));
+       ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_LOCAL_BASE0_HI,
+                                upper_32_bits(adev->umsch_mm.data_start_addr));
 
-       WREG32_SOC15(VCN, 0, regVCN_MES_LOCAL_MASK0_LO,
-                    lower_32_bits(adev->umsch_mm.data_size - 1));
-       WREG32_SOC15(VCN, 0, regVCN_MES_LOCAL_MASK0_HI,
-                    upper_32_bits(adev->umsch_mm.data_size - 1));
+       ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_LOCAL_MASK0_LO,
+                                lower_32_bits(adev->umsch_mm.data_size - 1));
+       ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_LOCAL_MASK0_HI,
+                                upper_32_bits(adev->umsch_mm.data_size - 1));
 
-       WREG32_SOC15(VCN, 0, regVCN_MES_DC_BASE_LO,
-                    lower_32_bits(adev->umsch_mm.data_fw_gpu_addr));
-       WREG32_SOC15(VCN, 0, regVCN_MES_DC_BASE_HI,
-                    upper_32_bits(adev->umsch_mm.data_fw_gpu_addr));
+       ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_DC_BASE_LO,
+                                lower_32_bits(adev->umsch_mm.data_fw_gpu_addr));
+       ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_DC_BASE_HI,
+                                upper_32_bits(adev->umsch_mm.data_fw_gpu_addr));
 
-       WREG32_SOC15(VCN, 0, regVCN_MES_MDBOUND_LO, 0x3FFFF);
+       ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_MDBOUND_LO, 0x3FFFF);
 
        data = RREG32_SOC15(VCN, 0, regUVD_UMSCH_FORCE);
        data = REG_SET_FIELD(data, UVD_UMSCH_FORCE, IC_FORCE_GPUVM, 1);
        data = REG_SET_FIELD(data, UVD_UMSCH_FORCE, DC_FORCE_GPUVM, 1);
-       WREG32_SOC15(VCN, 0, regUVD_UMSCH_FORCE, data);
+       ptr = WREG32_SOC15_UMSCH(ptr, regUVD_UMSCH_FORCE, data);
 
        data = RREG32_SOC15(VCN, 0, regVCN_MES_IC_OP_CNTL);
        data = REG_SET_FIELD(data, VCN_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
        data = REG_SET_FIELD(data, VCN_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
-       WREG32_SOC15(VCN, 0, regVCN_MES_IC_OP_CNTL, data);
+       ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_IC_OP_CNTL, data);
 
        data = RREG32_SOC15(VCN, 0, regVCN_MES_IC_OP_CNTL);
        data = REG_SET_FIELD(data, VCN_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
-       WREG32_SOC15(VCN, 0, regVCN_MES_IC_OP_CNTL, data);
+       ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_IC_OP_CNTL, data);
 
-       WREG32_SOC15(VCN, 0, regVCN_MES_GP0_LO, 0);
-       WREG32_SOC15(VCN, 0, regVCN_MES_GP0_HI, 0);
+       ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_GP0_LO, 0);
+       ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_GP0_HI, 0);
 
-       WREG32_SOC15(VCN, 0, regVCN_MES_GP1_LO, 0);
-       WREG32_SOC15(VCN, 0, regVCN_MES_GP1_HI, 0);
+       ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_GP1_LO, 0);
+       ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_GP1_HI, 0);
 
        data = RREG32_SOC15(VCN, 0, regVCN_MES_CNTL);
        data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_INVALIDATE_ICACHE, 0);
        data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_PIPE0_RESET, 0);
        data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_HALT, 0);
        data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_PIPE0_ACTIVE, 1);
-       WREG32_SOC15(VCN, 0, regVCN_MES_CNTL, data);
+       ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_CNTL, data);
+
+       if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+               umsch_mm_psp_update_sram(adev,
+                                        (u32)((uintptr_t)ptr - (uintptr_t)umsch->cmd_buf_ptr));
+       }
 
        r = SOC15_WAIT_ON_RREG(VCN, 0, regVCN_MES_MSTATUS_LO, 0xAAAAAAAA, 0xFFFFFFFF);
        if (r) {