drm/amdgpu: add aca deferred error type support
authorYang Wang <kevinyang.wang@amd.com>
Wed, 21 Feb 2024 07:07:30 +0000 (15:07 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 20 Mar 2024 17:38:15 +0000 (13:38 -0400)
add aca deferred error type support

Signed-off-by: Yang Wang <kevinyang.wang@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

index a0b7f0d0c51304818ea08e725440d2f46367a39a..611c1751577a5e8ecb5385b3034ee7f784c1ab14 100644 (file)
@@ -434,6 +434,8 @@ static int aca_log_aca_error_data(struct aca_bank_error *bank_error, enum aca_er
                amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, count);
                break;
        case ACA_ERROR_TYPE_DEFERRED:
+               amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, NULL, count);
+               break;
        default:
                break;
        }
@@ -474,6 +476,7 @@ static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *h
                smu_type = ACA_SMU_TYPE_UE;
                break;
        case ACA_ERROR_TYPE_CE:
+       case ACA_ERROR_TYPE_DEFERRED:
                smu_type = ACA_SMU_TYPE_CE;
                break;
        default:
index 3c6d532824f6143ea07d31f43fa6416f5af7a1a9..2bc02b80924673d67bc080a121042342306e0d3e 100644 (file)
@@ -1291,8 +1291,8 @@ ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *a
        if (amdgpu_ras_query_error_status(obj->adev, &info))
                return -EINVAL;
 
-       return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count,
-                         "ce", info.ce_count);
+       return sysfs_emit(buf, "%s: %lu\n%s: %lu\n%s: %lu\n", "ue", info.ue_count,
+                         "ce", info.ce_count, "de", info.ue_count);
 }
 
 static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev,
@@ -1341,6 +1341,10 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev,
                        ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_CE, err_data);
                        if (ret)
                                return ret;
+
+                       ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_DEFERRED, err_data);
+                       if (ret)
+                               return ret;
                } else {
                        /* FIXME: add code to check return value later */
                        amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_UE, err_data, qctx);