From: Yazen Ghannam Date: Mon, 15 May 2023 11:35:34 +0000 (+0000) Subject: x86/MCE/AMD, EDAC/mce_amd: Decode UMC_V2 ECC errors X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=c35977b00fa76ce5f3fe9afdb9cffda970c943d5;p=linux.git x86/MCE/AMD, EDAC/mce_amd: Decode UMC_V2 ECC errors The MI200 (Aldebaran) series of devices introduced a new SMCA bank type for Unified Memory Controllers. The MCE subsystem already has support for this new type. The MCE decoder module will decode the common MCA error information for the new bank type, but it will not pass the information to the AMD64 EDAC module for detailed memory error decoding. Have the MCE decoder module recognize the new bank type as an SMCA UMC memory error and pass the MCA information to AMD64 EDAC. Signed-off-by: Yazen Ghannam Co-developed-by: Muralidhara M K Signed-off-by: Muralidhara M K Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230515113537.1052146-3-muralimk@amd.com --- diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 0b971f9740964..5e74610b39e74 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -715,11 +715,13 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) bool amd_mce_is_memory_error(struct mce *m) { + enum smca_bank_types bank_type; /* ErrCodeExt[20:16] */ u8 xec = (m->status >> 16) & 0x1f; + bank_type = smca_get_bank_type(m->extcpu, m->bank); if (mce_flags.smca) - return smca_get_bank_type(m->extcpu, m->bank) == SMCA_UMC && xec == 0x0; + return (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2) && xec == 0x0; return m->bank == 4 && xec == 0x8; } @@ -1050,7 +1052,7 @@ static const char *get_name(unsigned int cpu, unsigned int bank, struct threshol if (bank_type >= N_SMCA_BANK_TYPES) return NULL; - if (b && bank_type == SMCA_UMC) { + if (b && (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2)) { if (b->block < ARRAY_SIZE(smca_umc_block_names)) return smca_umc_block_names[b->block]; return NULL; diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index cc5c63feb26a2..9215c06783df5 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -1186,7 +1186,8 @@ static void decode_smca_error(struct mce *m) if (xec < smca_mce_descs[bank_type].num_descs) pr_cont(", %s.\n", smca_mce_descs[bank_type].descs[xec]); - if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc) + if ((bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2) && + xec == 0 && decode_dram_ecc) decode_dram_ecc(topology_die_id(m->extcpu), m); }