From b3ece3a6a231e51c262d67fd47c017970a7dee44 Mon Sep 17 00:00:00 2001 From: Muralidhara M K Date: Fri, 27 Jan 2023 17:04:19 +0000 Subject: [PATCH] EDAC/amd64: Add get_err_info() to pvt->ops GPU Nodes will use a different method to determine the chip select and channel of an error. A function pointer should be used rather than introduce another branching condition. Prepare for this by adding get_err_info() to pvt->ops. This function is only called from the modern code path, so a legacy function is not defined. Make sure to call this after MCA_STATUS[SyndV] is checked, since the csrow value is found in MCA_SYND. [ Yazen: rebased/reworked patch and reworded commit message. ] Signed-off-by: Muralidhara M K Co-developed-by: Naveen Krishna Chatradhi Signed-off-by: Naveen Krishna Chatradhi Co-developed-by: Yazen Ghannam Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230127170419.1824692-23-yazen.ghannam@amd.com --- drivers/edac/amd64_edac.c | 13 ++++++++----- drivers/edac/amd64_edac.h | 1 + 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 7446b4802a620..85b460b678384 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2974,10 +2974,14 @@ static inline void decode_bus_error(int node_id, struct mce *m) * Currently, we can derive the channel number by looking at the 6th nibble in * the instance_id. For example, instance_id=0xYXXXXX where Y is the channel * number. + * + * For DRAM ECC errors, the Chip Select number is given in bits [2:0] of + * the MCA_SYND[ErrorInformation] field. */ -static int find_umc_channel(struct mce *m) +static void umc_get_err_info(struct mce *m, struct err_info *err) { - return (m->ipid & GENMASK(31, 0)) >> 20; + err->channel = (m->ipid & GENMASK(31, 0)) >> 20; + err->csrow = m->synd & 0x7; } static void decode_umc_error(int node_id, struct mce *m) @@ -2999,8 +3003,6 @@ static void decode_umc_error(int node_id, struct mce *m) if (m->status & MCI_STATUS_DEFERRED) ecc_type = 3; - err.channel = find_umc_channel(m); - if (!(m->status & MCI_STATUS_SYNDV)) { err.err_code = ERR_SYND; goto log_error; @@ -3015,7 +3017,7 @@ static void decode_umc_error(int node_id, struct mce *m) err.err_code = ERR_CHANNEL; } - err.csrow = m->synd & 0x7; + pvt->ops->get_err_info(m, &err); if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) { err.err_code = ERR_NORM_ADDR; @@ -3685,6 +3687,7 @@ static struct low_ops umc_ops = { .ecc_enabled = umc_ecc_enabled, .setup_mci_misc_attrs = umc_setup_mci_misc_attrs, .dump_misc_regs = umc_dump_misc_regs, + .get_err_info = umc_get_err_info, }; /* Use Family 16h versions for defaults and adjust as needed below. */ diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 1c64fd4a14b16..e84fe0d4120a2 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -470,6 +470,7 @@ struct low_ops { bool (*ecc_enabled)(struct amd64_pvt *pvt); void (*setup_mci_misc_attrs)(struct mem_ctl_info *mci); void (*dump_misc_regs)(struct amd64_pvt *pvt); + void (*get_err_info)(struct mce *m, struct err_info *err); }; int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, -- 2.30.2