drm/amdgpu: add interface to update umc v12_0 ecc status
authorYiPeng Chai <YiPeng.Chai@amd.com>
Tue, 19 Mar 2024 02:14:16 +0000 (10:14 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 26 Apr 2024 21:22:41 +0000 (17:22 -0400)
Add interface to update umc v12_0 ecc status.

Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c

index 66186e28ab9214a89d1dfaea9373bdd3da973e09..63b5723e26ea5ad3ad55f3e43d419b904d48a936 100644 (file)
@@ -4215,6 +4215,8 @@ void amdgpu_ras_add_mca_err_addr(struct ras_err_info *err_info, struct ras_err_a
 {
        struct ras_err_addr *mca_err_addr;
 
+       /* This function will be retired. */
+       return;
        mca_err_addr = kzalloc(sizeof(*mca_err_addr), GFP_KERNEL);
        if (!mca_err_addr)
                return;
index f486510fc94c7d5c746aaad9920bc4edc941b6cf..7006a57277ef6a56393fda4e3a227269b0ea1f76 100644 (file)
@@ -437,3 +437,12 @@ int amdgpu_umc_loop_channels(struct amdgpu_device *adev,
 
        return 0;
 }
+
+int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev,
+                               uint64_t status, uint64_t ipid, uint64_t addr)
+{
+       if (adev->umc.ras->update_ecc_status)
+               return adev->umc.ras->update_ecc_status(adev,
+                                       status, ipid, addr);
+       return 0;
+}
index 563b0249247ea951a1f44c2c8685aed163460c20..4f3834fa10a8f9c8a6cec244d4e7cf337afb9ef1 100644 (file)
@@ -66,6 +66,8 @@ struct amdgpu_umc_ras {
                                        void *ras_error_status);
        bool (*check_ecc_err_status)(struct amdgpu_device *adev,
                        enum amdgpu_mca_error_type type, void *ras_error_status);
+       int (*update_ecc_status)(struct amdgpu_device *adev,
+                       uint64_t status, uint64_t ipid, uint64_t addr);
 };
 
 struct amdgpu_umc_funcs {
@@ -122,4 +124,8 @@ int amdgpu_umc_loop_channels(struct amdgpu_device *adev,
 
 int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev,
                        uint32_t reset, uint32_t timeout_ms);
+
+int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev,
+                               uint64_t status, uint64_t ipid, uint64_t addr);
+
 #endif
index a0122b22eda467244043335ded59322b3bcf0401..81435533c4a7d7670dad90046bf9555080f22c18 100644 (file)
@@ -479,6 +479,29 @@ static int umc_v12_0_ras_late_init(struct amdgpu_device *adev, struct ras_common
        return 0;
 }
 
+static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev,
+                       uint64_t status, uint64_t ipid, uint64_t addr)
+{
+       uint16_t hwid, mcatype;
+       struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+       hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID);
+       mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType);
+
+       if ((hwid != MCA_UMC_HWID_V12_0) || (mcatype != MCA_UMC_MCATYPE_V12_0))
+               return 0;
+
+       if (!status)
+               return 0;
+
+       if (!umc_v12_0_is_deferred_error(adev, status))
+               return 0;
+
+       con->umc_ecc_log.de_updated = true;
+
+       return 0;
+}
+
 struct amdgpu_umc_ras umc_v12_0_ras = {
        .ras_block = {
                .hw_ops = &umc_v12_0_ras_hw_ops,
@@ -489,5 +512,6 @@ struct amdgpu_umc_ras umc_v12_0_ras = {
        .ecc_info_query_ras_error_count = umc_v12_0_ecc_info_query_ras_error_count,
        .ecc_info_query_ras_error_address = umc_v12_0_ecc_info_query_ras_error_address,
        .check_ecc_err_status = umc_v12_0_check_ecc_err_status,
+       .update_ecc_status = umc_v12_0_update_ecc_status,
 };
 
index 1d5f44dcffdd213b3bf7cc649c672da54700fd69..5c2d7e127608850e34404741cd27f347c812749c 100644 (file)
@@ -62,6 +62,9 @@
 /* row bits in SOC physical address */
 #define UMC_V12_0_PA_R13_BIT 35
 
+#define MCA_UMC_HWID_V12_0     0x96
+#define MCA_UMC_MCATYPE_V12_0  0x0
+
 #define MCA_IPID_LO_2_UMC_CH(_ipid_lo) (((((_ipid_lo) >> 20) & 0x1) * 4) + \
                        (((_ipid_lo) >> 12) & 0xF))
 #define MCA_IPID_LO_2_UMC_INST(_ipid_lo) (((_ipid_lo) >> 21) & 0x7)
index 7a9f85dc06b562738500199504fef2c8b57060ed..4d3eca2fc3f11ed36d2bc452c8dfab3fa6305593 100644 (file)
@@ -2716,6 +2716,11 @@ static int mca_umc_mca_get_err_count(const struct mca_ras_info *mca_ras, struct
            umc_v12_0_is_correctable_error(adev, status0))
                *count = (ext_error_code == 0) ? odecc_err_cnt : 1;
 
+       amdgpu_umc_update_ecc_status(adev,
+                       entry->regs[MCA_REG_IDX_STATUS],
+                       entry->regs[MCA_REG_IDX_IPID],
+                       entry->regs[MCA_REG_IDX_ADDR]);
+
        return 0;
 }