From: Stanley.Yang Date: Tue, 7 Dec 2021 06:28:58 +0000 (+0800) Subject: drm/amdgpu: skip umc ras error count harvest X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=cf63b702720d734cb4144440d72d4b2ac6c494f8;p=linux.git drm/amdgpu: skip umc ras error count harvest remove in recovery stat check, skip umc ras err cnt harvest in amdgpu_ras_log_on_err_counter Signed-off-by: Stanley.Yang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 1043d41b68077..a95d200adff96 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -897,11 +897,6 @@ static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_d struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); int ret = 0; - /* skip get ecc info during gpu recovery */ - if (atomic_read(&ras->in_recovery) == 1 && - adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2)) - return; - /* * choosing right query method according to * whether smu support query error information @@ -1752,6 +1747,16 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev) if (info.head.block == AMDGPU_RAS_BLOCK__PCIE_BIF) continue; + /* + * this is a workaround for aldebaran, skip send msg to + * smu to get ecc_info table due to smu handle get ecc + * info table failed temporarily. + * should be removed until smu fix handle ecc_info table. + */ + if ((info.head.block == AMDGPU_RAS_BLOCK__UMC) && + (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2))) + continue; + amdgpu_ras_query_error_status(adev, &info); } }