drm/amd/pm: Support RAS fatal error mode1 reset on smu v13_0_0 and v13_0_10
authorCandice Li <candice.li@amd.com>
Mon, 9 Jan 2023 13:55:22 +0000 (21:55 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 13 Jan 2023 19:59:26 +0000 (14:59 -0500)
Support RAS fatal error mode1 reset on smu v13_0_0 and v13_0_10.

Signed-off-by: Candice Li <candice.li@amd.com>
Reviewed-by: Evan Quan <evan.quan@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h

index 969e5f96554015b06076c5c7c3bf57fda0ff2103..d0cdc578344d8d2af25c0bc1b10de275a8d2d9ed 100644 (file)
@@ -1904,15 +1904,51 @@ static int smu_v13_0_0_set_df_cstate(struct smu_context *smu,
                                               NULL);
 }
 
+static void smu_v13_0_0_set_mode1_reset_param(struct smu_context *smu,
+                                               uint32_t supported_version,
+                                               uint32_t *param)
+{
+       uint32_t smu_version;
+       struct amdgpu_device *adev = smu->adev;
+       struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+       smu_cmn_get_smc_version(smu, NULL, &smu_version);
+
+       if ((smu_version >= supported_version) &&
+                       ras && atomic_read(&ras->in_recovery))
+               /* Set RAS fatal error reset flag */
+               *param = 1 << 16;
+       else
+               *param = 0;
+}
+
 static int smu_v13_0_0_mode1_reset(struct smu_context *smu)
 {
        int ret;
+       uint32_t param;
        struct amdgpu_device *adev = smu->adev;
 
-       if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 10))
-               ret = smu_cmn_send_debug_smc_msg(smu, DEBUGSMC_MSG_Mode1Reset);
-       else
+       switch (adev->ip_versions[MP1_HWIP][0]) {
+       case IP_VERSION(13, 0, 0):
+               /* SMU 13_0_0 PMFW supports RAS fatal error reset from 78.77 */
+               smu_v13_0_0_set_mode1_reset_param(smu, 0x004e4d00, &param);
+
+               ret = smu_cmn_send_smc_msg_with_param(smu,
+                                               SMU_MSG_Mode1Reset, param, NULL);
+               break;
+
+       case IP_VERSION(13, 0, 10):
+               /* SMU 13_0_10 PMFW supports RAS fatal error reset from 80.28 */
+               smu_v13_0_0_set_mode1_reset_param(smu, 0x00501c00, &param);
+
+               ret = smu_cmn_send_debug_smc_msg_with_param(smu,
+                                               DEBUGSMC_MSG_Mode1Reset, param);
+               break;
+
+       default:
                ret = smu_cmn_send_smc_msg(smu, SMU_MSG_Mode1Reset, NULL);
+               break;
+       }
 
        if (!ret)
                msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS);
index 768b6e7dbd7719680485b33104164d73be8f7520..d5abafc5a68201fc9e329b2b7c1e45da4c6c869e 100644 (file)
@@ -404,6 +404,12 @@ int smu_cmn_send_debug_smc_msg(struct smu_context *smu,
        return __smu_cmn_send_debug_msg(smu, msg, 0);
 }
 
+int smu_cmn_send_debug_smc_msg_with_param(struct smu_context *smu,
+                        uint32_t msg, uint32_t param)
+{
+       return __smu_cmn_send_debug_msg(smu, msg, param);
+}
+
 int smu_cmn_to_asic_specific_index(struct smu_context *smu,
                                   enum smu_cmn2asic_mapping_type type,
                                   uint32_t index)
index f82cf76dd3a474d53338efdbe86b5d23e4b37729..d7cd358a53bdcd899768d8c8c0681a882936ea71 100644 (file)
@@ -45,6 +45,9 @@ int smu_cmn_send_smc_msg(struct smu_context *smu,
 int smu_cmn_send_debug_smc_msg(struct smu_context *smu,
                         uint32_t msg);
 
+int smu_cmn_send_debug_smc_msg_with_param(struct smu_context *smu,
+                        uint32_t msg, uint32_t param);
+
 int smu_cmn_wait_for_response(struct smu_context *smu);
 
 int smu_cmn_to_asic_specific_index(struct smu_context *smu,