pds_core: add attempts to fix broken PCI
authorShannon Nelson <shannon.nelson@amd.com>
Thu, 14 Sep 2023 22:32:00 +0000 (15:32 -0700)
committerDavid S. Miller <davem@davemloft.net>
Mon, 18 Sep 2023 08:28:22 +0000 (09:28 +0100)
If we see a 0xff value from a PCI register read, we know that
the PCI connection is broken, possibly by a low level reset that
didn't go through the nice pci_error_handlers path.

Make use of the PCI cleanup code that we already have from the
reset handlers and add some detection and attempted recovery
from a broken PCI connection.

Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
Reviewed-by: Brett Creeley <brett.creeley@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/amd/pds_core/core.c
drivers/net/ethernet/amd/pds_core/core.h
drivers/net/ethernet/amd/pds_core/main.c

index c1b6b5f7c0b522e79201f9f11dbb05155152c7ee..2a8643e167e16fd1fc18a0955a25f6d69c260518 100644 (file)
@@ -578,6 +578,18 @@ err_out:
        pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
 }
 
+static void pdsc_check_pci_health(struct pdsc *pdsc)
+{
+       u8 fw_status = ioread8(&pdsc->info_regs->fw_status);
+
+       /* is PCI broken? */
+       if (fw_status != PDS_RC_BAD_PCI)
+               return;
+
+       pdsc_reset_prepare(pdsc->pdev);
+       pdsc_reset_done(pdsc->pdev);
+}
+
 void pdsc_health_thread(struct work_struct *work)
 {
        struct pdsc *pdsc = container_of(work, struct pdsc, health_work);
@@ -604,6 +616,8 @@ void pdsc_health_thread(struct work_struct *work)
                        pdsc_fw_down(pdsc);
        }
 
+       pdsc_check_pci_health(pdsc);
+
        pdsc->fw_generation = pdsc->fw_status & PDS_CORE_FW_STS_F_GENERATION;
 
 out_unlock:
index 19c1957167da2abf61c15a4761a77d95bc812cc0..f3a7deda997245bd3c80070889981f35c01dcd28 100644 (file)
@@ -283,6 +283,9 @@ int pdsc_devcmd_reset(struct pdsc *pdsc);
 int pdsc_dev_reinit(struct pdsc *pdsc);
 int pdsc_dev_init(struct pdsc *pdsc);
 
+void pdsc_reset_prepare(struct pci_dev *pdev);
+void pdsc_reset_done(struct pci_dev *pdev);
+
 int pdsc_intr_alloc(struct pdsc *pdsc, char *name,
                    irq_handler_t handler, void *data);
 void pdsc_intr_free(struct pdsc *pdsc, int index);
index 4c7f982c12a1d2d87dd52adc5bda4480ca1dde93..3080898d7b95b0122701cacb8a15796ed2cc2dcb 100644 (file)
@@ -445,7 +445,7 @@ static void pdsc_remove(struct pci_dev *pdev)
        devlink_free(dl);
 }
 
-static void pdsc_reset_prepare(struct pci_dev *pdev)
+void pdsc_reset_prepare(struct pci_dev *pdev)
 {
        struct pdsc *pdsc = pci_get_drvdata(pdev);
 
@@ -457,7 +457,7 @@ static void pdsc_reset_prepare(struct pci_dev *pdev)
        pci_disable_device(pdev);
 }
 
-static void pdsc_reset_done(struct pci_dev *pdev)
+void pdsc_reset_done(struct pci_dev *pdev)
 {
        struct pdsc *pdsc = pci_get_drvdata(pdev);
        struct device *dev = pdsc->dev;