Add interface to check mca umc status.
Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
#include "umc/umc_6_7_0_offset.h"
#include "umc/umc_6_7_0_sh_mask.h"
+static bool amdgpu_mca_is_deferred_error(struct amdgpu_device *adev,
+ uint64_t mc_status)
+{
+ if (adev->umc.ras->check_ecc_err_status)
+ return adev->umc.ras->check_ecc_err_status(adev,
+ AMDGPU_MCA_ERROR_TYPE_DE, &mc_status);
+
+ return false;
+}
+
void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev,
uint64_t mc_status_addr,
unsigned long *error_count)
amdgpu_ras_error_statistic_ue_count(err_data,
&mcm_info, &err_addr, (uint64_t)count);
else {
- if (!!(MCA_REG__STATUS__DEFERRED(entry->regs[MCA_REG_IDX_STATUS])))
+ if (amdgpu_mca_is_deferred_error(adev, entry->regs[MCA_REG_IDX_STATUS]))
amdgpu_ras_error_statistic_de_count(err_data,
&mcm_info, &err_addr, (uint64_t)count);
else
enum amdgpu_mca_error_type {
AMDGPU_MCA_ERROR_TYPE_UE = 0,
AMDGPU_MCA_ERROR_TYPE_CE,
+ AMDGPU_MCA_ERROR_TYPE_DE,
};
struct amdgpu_mca_ras_block {
#ifndef __AMDGPU_UMC_H__
#define __AMDGPU_UMC_H__
#include "amdgpu_ras.h"
-
+#include "amdgpu_mca.h"
/*
* (addr / 256) * 4096, the higher 26 bits in ErrorAddr
* is the index of 4KB block
void *ras_error_status);
void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev,
void *ras_error_status);
+ bool (*check_ecc_err_status)(struct amdgpu_device *adev,
+ enum amdgpu_mca_error_type type, void *ras_error_status);
/* support different eeprom table version for different asic */
void (*set_eeprom_table_version)(struct amdgpu_ras_eeprom_table_header *hdr);
};
}
}
+static bool umc_v12_0_check_ecc_err_status(struct amdgpu_device *adev,
+ enum amdgpu_mca_error_type type, void *ras_error_status)
+{
+ uint64_t mc_umc_status = *(uint64_t *)ras_error_status;
+
+ switch (type) {
+ case AMDGPU_MCA_ERROR_TYPE_UE:
+ return umc_v12_0_is_uncorrectable_error(adev, mc_umc_status);
+ case AMDGPU_MCA_ERROR_TYPE_CE:
+ return umc_v12_0_is_correctable_error(adev, mc_umc_status);
+ case AMDGPU_MCA_ERROR_TYPE_DE:
+ return umc_v12_0_is_deferred_error(adev, mc_umc_status);
+ default:
+ return false;
+ }
+
+ return false;
+}
+
static void umc_v12_0_err_cnt_init(struct amdgpu_device *adev)
{
amdgpu_umc_loop_channels(adev,
.query_ras_poison_mode = umc_v12_0_query_ras_poison_mode,
.ecc_info_query_ras_error_count = umc_v12_0_ecc_info_query_ras_error_count,
.ecc_info_query_ras_error_address = umc_v12_0_ecc_info_query_ras_error_address,
+ .check_ecc_err_status = umc_v12_0_check_ecc_err_status,
};
return 0;
}
- if ((type == AMDGPU_MCA_ERROR_TYPE_UE && umc_v12_0_is_uncorrectable_error(adev, status0)) ||
- (type == AMDGPU_MCA_ERROR_TYPE_CE && (umc_v12_0_is_correctable_error(adev, status0) ||
- umc_v12_0_is_deferred_error(adev, status0))))
+ if (umc_v12_0_is_deferred_error(adev, status0) ||
+ umc_v12_0_is_uncorrectable_error(adev, status0) ||
+ umc_v12_0_is_correctable_error(adev, status0))
*count = 1;
return 0;