cxl/pci: Add (hopeful) error handling support
authorDan Williams <dan.j.williams@intel.com>
Tue, 29 Nov 2022 17:48:59 +0000 (10:48 -0700)
committerDan Williams <dan.j.williams@intel.com>
Sat, 3 Dec 2022 21:40:17 +0000 (13:40 -0800)
Add nominal error handling that tears down CXL.mem in response to error
notifications that imply a device reset. Given some CXL.mem may be
operating as System RAM, there is a high likelihood that these error
events are fatal. However, if the system survives the notification the
expectation is that the driver behavior is equivalent to a hot-unplug
and re-plug of an endpoint.

Note that this does not change the mask values from the default. That
awaits CXL _OSC support to determine whether platform firmware is in
control of the mask registers.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/166974413966.1608150.15522782911404473932.stgit@djiang5-desk3.ch.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
drivers/cxl/core/memdev.c
drivers/cxl/cxl.h
drivers/cxl/cxlmem.h
drivers/cxl/pci.c

index 20ce488a77540c363c8af99ec36e9dfef586e607..a74a93310d26d1fa2cd369248955c50801135531 100644 (file)
@@ -344,6 +344,7 @@ struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
         * needed as this is ordered with cdev_add() publishing the device.
         */
        cxlmd->cxlds = cxlds;
+       cxlds->cxlmd = cxlmd;
 
        cdev = &cxlmd->cdev;
        rc = cdev_device_add(cdev, dev);
index e533c5b5e4b01e4efcfb04b103a986b6b8dbd3bd..2f3951b76e2d998bec5f398df469a9baa3c54886 100644 (file)
@@ -132,6 +132,7 @@ static inline int ways_to_cxl(unsigned int ways, u8 *iw)
 #define CXL_RAS_CORRECTABLE_MASK_OFFSET 0x10
 #define   CXL_RAS_CORRECTABLE_MASK_MASK GENMASK(6, 0)
 #define CXL_RAS_CAP_CONTROL_OFFSET 0x14
+#define CXL_RAS_CAP_CONTROL_FE_MASK GENMASK(5, 0)
 #define CXL_RAS_HEADER_LOG_OFFSET 0x18
 #define CXL_RAS_CAPABILITY_LENGTH 0x58
 
index 88e3a8e54b6a4e4a59d173dd9a2d29575e33b5c5..b3117fd67f42099a6b5835b1c210305b1ac81487 100644 (file)
@@ -186,6 +186,7 @@ struct cxl_endpoint_dvsec_info {
  * Currently only memory devices are represented.
  *
  * @dev: The device associated with this CXL state
+ * @cxlmd: The device representing the CXL.mem capabilities of @dev
  * @regs: Parsed register blocks
  * @cxl_dvsec: Offset to the PCIe device DVSEC
  * @payload_size: Size of space for payload
@@ -218,6 +219,7 @@ struct cxl_endpoint_dvsec_info {
  */
 struct cxl_dev_state {
        struct device *dev;
+       struct cxl_memdev *cxlmd;
 
        struct cxl_regs regs;
        int cxl_dvsec;
index 8b817138140a1742f82d6763779dd17b26af7ab3..a7c2d733e5ae8fe633e02066da876e1c6ed43032 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/list.h>
 #include <linux/pci.h>
 #include <linux/pci-doe.h>
+#include <linux/aer.h>
 #include <linux/io.h>
 #include "cxlmem.h"
 #include "cxlpci.h"
@@ -399,6 +400,11 @@ static void devm_cxl_pci_create_doe(struct cxl_dev_state *cxlds)
        }
 }
 
+static void disable_aer(void *pdev)
+{
+       pci_disable_pcie_error_reporting(pdev);
+}
+
 static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
        struct cxl_register_map map;
@@ -420,6 +426,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        cxlds = cxl_dev_state_create(&pdev->dev);
        if (IS_ERR(cxlds))
                return PTR_ERR(cxlds);
+       pci_set_drvdata(pdev, cxlds);
 
        cxlds->serial = pci_get_dsn(pdev);
        cxlds->cxl_dvsec = pci_find_dvsec_capability(
@@ -474,6 +481,14 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        if (IS_ERR(cxlmd))
                return PTR_ERR(cxlmd);
 
+       if (cxlds->regs.ras) {
+               pci_enable_pcie_error_reporting(pdev);
+               rc = devm_add_action_or_reset(&pdev->dev, disable_aer, pdev);
+               if (rc)
+                       return rc;
+       }
+       pci_save_state(pdev);
+
        if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM))
                rc = devm_cxl_add_nvdimm(&pdev->dev, cxlmd);
 
@@ -487,10 +502,132 @@ static const struct pci_device_id cxl_mem_pci_tbl[] = {
 };
 MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl);
 
+/* CXL spec rev3.0 8.2.4.16.1 */
+static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log)
+{
+       void __iomem *addr;
+       u32 *log_addr;
+       int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32);
+
+       addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET;
+       log_addr = log;
+
+       for (i = 0; i < log_u32_size; i++) {
+               *log_addr = readl(addr);
+               log_addr++;
+               addr += sizeof(u32);
+       }
+}
+
+/*
+ * Log the state of the RAS status registers and prepare them to log the
+ * next error status. Return 1 if reset needed.
+ */
+static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
+{
+       struct cxl_memdev *cxlmd = cxlds->cxlmd;
+       struct device *dev = &cxlmd->dev;
+       u32 hl[CXL_HEADERLOG_SIZE_U32];
+       void __iomem *addr;
+       u32 status;
+       u32 fe;
+
+       if (!cxlds->regs.ras)
+               return false;
+
+       addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
+       status = le32_to_cpu((__force __le32)readl(addr));
+       if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
+               return false;
+
+       /* If multiple errors, log header points to first error from ctrl reg */
+       if (hweight32(status) > 1) {
+               addr = cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET;
+               fe = BIT(le32_to_cpu((__force __le32)readl(addr)) &
+                                    CXL_RAS_CAP_CONTROL_FE_MASK);
+       } else {
+               fe = status;
+       }
+
+       header_log_copy(cxlds, hl);
+       trace_cxl_aer_uncorrectable_error(dev_name(dev), status, fe, hl);
+       writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
+
+       return true;
+}
+
+static pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
+                                          pci_channel_state_t state)
+{
+       struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+       struct cxl_memdev *cxlmd = cxlds->cxlmd;
+       struct device *dev = &cxlmd->dev;
+       bool ue;
+
+       /*
+        * A frozen channel indicates an impending reset which is fatal to
+        * CXL.mem operation, and will likely crash the system. On the off
+        * chance the situation is recoverable dump the status of the RAS
+        * capability registers and bounce the active state of the memdev.
+        */
+       ue = cxl_report_and_clear(cxlds);
+
+       switch (state) {
+       case pci_channel_io_normal:
+               if (ue) {
+                       device_release_driver(dev);
+                       return PCI_ERS_RESULT_NEED_RESET;
+               }
+               return PCI_ERS_RESULT_CAN_RECOVER;
+       case pci_channel_io_frozen:
+               dev_warn(&pdev->dev,
+                        "%s: frozen state error detected, disable CXL.mem\n",
+                        dev_name(dev));
+               device_release_driver(dev);
+               return PCI_ERS_RESULT_NEED_RESET;
+       case pci_channel_io_perm_failure:
+               dev_warn(&pdev->dev,
+                        "failure state error detected, request disconnect\n");
+               return PCI_ERS_RESULT_DISCONNECT;
+       }
+       return PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev)
+{
+       struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+       struct cxl_memdev *cxlmd = cxlds->cxlmd;
+       struct device *dev = &cxlmd->dev;
+
+       dev_info(&pdev->dev, "%s: restart CXL.mem after slot reset\n",
+                dev_name(dev));
+       pci_restore_state(pdev);
+       if (device_attach(dev) <= 0)
+               return PCI_ERS_RESULT_DISCONNECT;
+       return PCI_ERS_RESULT_RECOVERED;
+}
+
+static void cxl_error_resume(struct pci_dev *pdev)
+{
+       struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+       struct cxl_memdev *cxlmd = cxlds->cxlmd;
+       struct device *dev = &cxlmd->dev;
+
+       dev_info(&pdev->dev, "%s: error resume %s\n", dev_name(dev),
+                dev->driver ? "successful" : "failed");
+}
+
+static const struct pci_error_handlers cxl_error_handlers = {
+       .error_detected = cxl_error_detected,
+       .slot_reset     = cxl_slot_reset,
+       .resume         = cxl_error_resume,
+};
+
 static struct pci_driver cxl_pci_driver = {
        .name                   = KBUILD_MODNAME,
        .id_table               = cxl_mem_pci_tbl,
        .probe                  = cxl_pci_probe,
+       .err_handler            = &cxl_error_handlers,
        .driver = {
                .probe_type     = PROBE_PREFER_ASYNCHRONOUS,
        },