edac,ghes,cper: Add Row Extension to Memory Error Record
authorAlex Kluver <alex.kluver@hpe.com>
Wed, 19 Aug 2020 14:35:43 +0000 (09:35 -0500)
committerArd Biesheuvel <ardb@kernel.org>
Thu, 17 Sep 2020 07:19:52 +0000 (10:19 +0300)
Memory errors could be printed with incorrect row values since the DIMM
size has outgrown the 16 bit row field in the CPER structure. UEFI
Specification Version 2.8 has increased the size of row by allowing it to
use the first 2 bits from a previously reserved space within the structure.

When needed, add the extension bits to the row value printed.

Based on UEFI 2.8 Table 299. Memory Error Record

Signed-off-by: Alex Kluver <alex.kluver@hpe.com>
Tested-by: Russ Anderson <russ.anderson@hpe.com>
Reviewed-by: Steve Wahl <steve.wahl@hpe.com>
Reviewed-by: Kyle Meyer <kyle.meyer@hpe.com>
Acked-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20200819143544.155096-2-alex.kluver@hpe.com
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
drivers/edac/ghes_edac.c
drivers/firmware/efi/cper.c
include/linux/cper.h

index da60c29468a7cf48575ed1d8000963732ceb5728..741e7609e5113f5188d33b2f72e55a00c5ed1c2a 100644 (file)
@@ -372,8 +372,12 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
                p += sprintf(p, "rank:%d ", mem_err->rank);
        if (mem_err->validation_bits & CPER_MEM_VALID_BANK)
                p += sprintf(p, "bank:%d ", mem_err->bank);
-       if (mem_err->validation_bits & CPER_MEM_VALID_ROW)
-               p += sprintf(p, "row:%d ", mem_err->row);
+       if (mem_err->validation_bits & (CPER_MEM_VALID_ROW | CPER_MEM_VALID_ROW_EXT)) {
+               u32 row = mem_err->row;
+
+               row |= cper_get_mem_extension(mem_err->validation_bits, mem_err->extended);
+               p += sprintf(p, "row:%d ", row);
+       }
        if (mem_err->validation_bits & CPER_MEM_VALID_COLUMN)
                p += sprintf(p, "col:%d ", mem_err->column);
        if (mem_err->validation_bits & CPER_MEM_VALID_BIT_POSITION)
index f564e15fbc7e6a193af171ad816bff26b0da2173..a60acd17bcaa0a5f0409c22348497802434e55da 100644 (file)
@@ -234,8 +234,12 @@ static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
                n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
        if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
                n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
-       if (mem->validation_bits & CPER_MEM_VALID_ROW)
-               n += scnprintf(msg + n, len - n, "row: %d ", mem->row);
+       if (mem->validation_bits & (CPER_MEM_VALID_ROW | CPER_MEM_VALID_ROW_EXT)) {
+               u32 row = mem->row;
+
+               row |= cper_get_mem_extension(mem->validation_bits, mem->extended);
+               n += scnprintf(msg + n, len - n, "row: %d ", row);
+       }
        if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
                n += scnprintf(msg + n, len - n, "column: %d ", mem->column);
        if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
@@ -292,6 +296,7 @@ void cper_mem_err_pack(const struct cper_sec_mem_err *mem,
        cmem->requestor_id = mem->requestor_id;
        cmem->responder_id = mem->responder_id;
        cmem->target_id = mem->target_id;
+       cmem->extended = mem->extended;
        cmem->rank = mem->rank;
        cmem->mem_array_handle = mem->mem_array_handle;
        cmem->mem_dev_handle = mem->mem_dev_handle;
index 8537e9282a658a5edcb06c928aff0b42e60f7284..bd2d8a77a78422ac37e1e98eee79dd774f5b7ed8 100644 (file)
@@ -230,6 +230,10 @@ enum {
 #define CPER_MEM_VALID_RANK_NUMBER             0x8000
 #define CPER_MEM_VALID_CARD_HANDLE             0x10000
 #define CPER_MEM_VALID_MODULE_HANDLE           0x20000
+#define CPER_MEM_VALID_ROW_EXT                 0x40000
+
+#define CPER_MEM_EXT_ROW_MASK                  0x3
+#define CPER_MEM_EXT_ROW_SHIFT                 16
 
 #define CPER_PCIE_VALID_PORT_TYPE              0x0001
 #define CPER_PCIE_VALID_VERSION                        0x0002
@@ -443,7 +447,7 @@ struct cper_sec_mem_err_old {
        u8      error_type;
 };
 
-/* Memory Error Section (UEFI >= v2.3), UEFI v2.7 sec N.2.5 */
+/* Memory Error Section (UEFI >= v2.3), UEFI v2.8 sec N.2.5 */
 struct cper_sec_mem_err {
        u64     validation_bits;
        u64     error_status;
@@ -461,7 +465,7 @@ struct cper_sec_mem_err {
        u64     responder_id;
        u64     target_id;
        u8      error_type;
-       u8      reserved;
+       u8      extended;
        u16     rank;
        u16     mem_array_handle;       /* "card handle" in UEFI 2.4 */
        u16     mem_dev_handle;         /* "module handle" in UEFI 2.4 */
@@ -483,8 +487,16 @@ struct cper_mem_err_compact {
        u16     rank;
        u16     mem_array_handle;
        u16     mem_dev_handle;
+       u8      extended;
 };
 
+static inline u32 cper_get_mem_extension(u64 mem_valid, u8 mem_extended)
+{
+       if (!(mem_valid & CPER_MEM_VALID_ROW_EXT))
+               return 0;
+       return (mem_extended & CPER_MEM_EXT_ROW_MASK) << CPER_MEM_EXT_ROW_SHIFT;
+}
+
 /* PCI Express Error Section, UEFI v2.7 sec N.2.7 */
 struct cper_sec_pcie {
        u64             validation_bits;