drm/amdgpu: Adjust error inject function code style in amdgpu_ras.c
authoryipechai <YiPeng.Chai@amd.com>
Wed, 5 Jan 2022 07:40:26 +0000 (15:40 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 14 Jan 2022 22:52:00 +0000 (17:52 -0500)
1. Move xgmi special error inject function from amdgpu_ras.c to xgmi block.
2. Support to use psp_ras_trigger_error as default error inject function in amdgpu_ras.c. If .ras_error_inject isn't defined in ras block, default error inject function will take effect.

v2: squash in warning fix (Alex)

Signed-off-by: yipechai <YiPeng.Chai@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: John Clements <john.clements@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c

index 7c21eab95fc88f584dfdf8753f583bc83ff8cb58..bcd33e5350f82eacc1eba6075e1160c71f302306 100644 (file)
@@ -903,31 +903,6 @@ static struct amdgpu_ras_block_object* amdgpu_ras_get_ras_block(struct amdgpu_de
        return NULL;
 }
 
-static void amdgpu_ras_mca_query_error_status(struct amdgpu_device *adev,
-                                             struct ras_common_if *ras_block,
-                                             struct ras_err_data  *err_data)
-{
-       switch (ras_block->sub_block_index) {
-       case AMDGPU_RAS_MCA_BLOCK__MP0:
-               if (adev->mca.mp0.ras_funcs &&
-                   adev->mca.mp0.ras_funcs->query_ras_error_count)
-                       adev->mca.mp0.ras_funcs->query_ras_error_count(adev, &err_data);
-               break;
-       case AMDGPU_RAS_MCA_BLOCK__MP1:
-               if (adev->mca.mp1.ras_funcs &&
-                   adev->mca.mp1.ras_funcs->query_ras_error_count)
-                       adev->mca.mp1.ras_funcs->query_ras_error_count(adev, &err_data);
-               break;
-       case AMDGPU_RAS_MCA_BLOCK__MPIO:
-               if (adev->mca.mpio.ras_funcs &&
-                   adev->mca.mpio.ras_funcs->query_ras_error_count)
-                       adev->mca.mpio.ras_funcs->query_ras_error_count(adev, &err_data);
-               break;
-       default:
-               break;
-       }
-}
-
 static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_data *err_data)
 {
        struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
@@ -994,6 +969,7 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
        case AMDGPU_RAS_BLOCK__PCIE_BIF:
        case AMDGPU_RAS_BLOCK__XGMI_WAFL:
        case AMDGPU_RAS_BLOCK__HDP:
+       case AMDGPU_RAS_BLOCK__MCA:
                if (!block_obj || !block_obj->hw_ops)   {
                        dev_info(adev->dev, "%s doesn't config ras function \n",
                                get_ras_block_str(&info->head));
@@ -1002,9 +978,6 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
                if (block_obj->hw_ops->query_ras_error_count)
                        block_obj->hw_ops->query_ras_error_count(adev, &err_data);
                break;
-       case AMDGPU_RAS_BLOCK__MCA:
-               amdgpu_ras_mca_query_error_status(adev, &info->head, &err_data);
-               break;
        default:
                break;
        }
@@ -1099,32 +1072,6 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
        return 0;
 }
 
-/* Trigger XGMI/WAFL error */
-static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
-                                struct ta_ras_trigger_error_input *block_info)
-{
-       int ret;
-
-       if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
-               dev_warn(adev->dev, "Failed to disallow df cstate");
-
-       if (amdgpu_dpm_allow_xgmi_power_down(adev, false))
-               dev_warn(adev->dev, "Failed to disallow XGMI power down");
-
-       ret = psp_ras_trigger_error(&adev->psp, block_info);
-
-       if (amdgpu_ras_intr_triggered())
-               return ret;
-
-       if (amdgpu_dpm_allow_xgmi_power_down(adev, true))
-               dev_warn(adev->dev, "Failed to allow XGMI power down");
-
-       if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
-               dev_warn(adev->dev, "Failed to allow df cstate");
-
-       return ret;
-}
-
 /* wrapper of psp_ras_trigger_error */
 int amdgpu_ras_error_inject(struct amdgpu_device *adev,
                struct ras_inject_if *info)
@@ -1143,6 +1090,11 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
        if (!obj)
                return -EINVAL;
 
+       if (!block_obj || !block_obj->hw_ops)   {
+               dev_info(adev->dev, "%s doesn't config ras function \n", get_ras_block_str(&info->head));
+               return -EINVAL;
+       }
+
        /* Calculate XGMI relative offset */
        if (adev->gmc.xgmi.num_physical_nodes > 1) {
                block_info.address =
@@ -1150,30 +1102,15 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
                                                          block_info.address);
        }
 
-       switch (info->head.block) {
-       case AMDGPU_RAS_BLOCK__GFX:
-               if (!block_obj || !block_obj->hw_ops)   {
-                       dev_info(adev->dev, "%s doesn't config ras function \n", get_ras_block_str(&info->head));
-                       return -EINVAL;
-               }
-
+       if (info->head.block == AMDGPU_RAS_BLOCK__GFX) {
                if (block_obj->hw_ops->ras_error_inject)
                        ret = block_obj->hw_ops->ras_error_inject(adev, info);
-               break;
-       case AMDGPU_RAS_BLOCK__UMC:
-       case AMDGPU_RAS_BLOCK__SDMA:
-       case AMDGPU_RAS_BLOCK__MMHUB:
-       case AMDGPU_RAS_BLOCK__PCIE_BIF:
-       case AMDGPU_RAS_BLOCK__MCA:
-               ret = psp_ras_trigger_error(&adev->psp, &block_info);
-               break;
-       case AMDGPU_RAS_BLOCK__XGMI_WAFL:
-               ret = amdgpu_ras_error_inject_xgmi(adev, &block_info);
-               break;
-       default:
-               dev_info(adev->dev, "%s error injection is not supported yet\n",
-                        get_ras_block_str(&info->head));
-               ret = -EINVAL;
+       } else {
+               /* If defined special ras_error_inject(e.g: xgmi), implement special ras_error_inject */
+               if (block_obj->hw_ops->ras_error_inject)
+                       ret = block_obj->hw_ops->ras_error_inject(adev, &block_info);
+               else  /*If not defined .ras_error_inject, use default ras_error_inject*/
+                       ret = psp_ras_trigger_error(&adev->psp, &block_info);
        }
 
        if (ret)
index d29acd33eb11048a0fb5629f6e54bf13f8ac0c62..478457637d29d3f605d5198e1bb2c09a98ee45ac 100644 (file)
@@ -946,9 +946,36 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
        err_data->ce_count += ce_cnt;
 }
 
+/* Trigger XGMI/WAFL error */
+static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,  void *inject_if)
+{
+       int ret = 0;
+       struct ta_ras_trigger_error_input *block_info =  (struct ta_ras_trigger_error_input *)inject_if;
+
+       if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
+               dev_warn(adev->dev, "Failed to disallow df cstate");
+
+       if (amdgpu_dpm_allow_xgmi_power_down(adev, false))
+               dev_warn(adev->dev, "Failed to disallow XGMI power down");
+
+       ret = psp_ras_trigger_error(&adev->psp, block_info);
+
+       if (amdgpu_ras_intr_triggered())
+               return ret;
+
+       if (amdgpu_dpm_allow_xgmi_power_down(adev, true))
+               dev_warn(adev->dev, "Failed to allow XGMI power down");
+
+       if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
+               dev_warn(adev->dev, "Failed to allow df cstate");
+
+       return ret;
+}
+
 struct amdgpu_ras_block_hw_ops  xgmi_ras_hw_ops = {
        .query_ras_error_count = amdgpu_xgmi_query_ras_error_count,
        .reset_ras_error_count = amdgpu_xgmi_reset_ras_error_count,
+       .ras_error_inject = amdgpu_ras_error_inject_xgmi,
 };
 
 struct amdgpu_xgmi_ras xgmi_ras = {