From: Tao Zhou Date: Thu, 18 Jan 2024 06:25:07 +0000 (+0800) Subject: drm/amdgpu: update check condition of query for ras page retire X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=1757bb7dab6de79c92b1d54b999a2ee1066acc1f;p=linux.git drm/amdgpu: update check condition of query for ras page retire Support page retirement handling in debug mode. v2: revert smu_v13_0_6_get_ecc_info directly. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 83983f0e8eb5f..a6cdb69897f2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -91,11 +91,16 @@ static void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + unsigned int error_query_mode; int ret = 0; unsigned long err_count; + + amdgpu_ras_get_error_query_mode(adev, &error_query_mode); + mutex_lock(&con->page_retirement_lock); ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(con->umc_ecc)); - if (ret == -EOPNOTSUPP) { + if (ret == -EOPNOTSUPP && + error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) { if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops && adev->umc.ras->ras_block.hw_ops->query_ras_error_count) adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, ras_error_status); @@ -119,7 +124,8 @@ static void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, */ adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, ras_error_status); } - } else if (!ret) { + } else if (error_query_mode == AMDGPU_RAS_FIRMWARE_ERROR_QUERY || + (!ret && error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY)) { if (adev->umc.ras && adev->umc.ras->ecc_info_query_ras_error_count) adev->umc.ras->ecc_info_query_ras_error_count(adev, ras_error_status);