net: ena: Add a counter for driver's reset failures
authorDavid Arinzon <darinzon@amazon.com>
Sun, 12 May 2024 13:46:33 +0000 (13:46 +0000)
committerJakub Kicinski <kuba@kernel.org>
Mon, 13 May 2024 21:42:04 +0000 (14:42 -0700)
This patch adds a counter to the ena_adapter struct in
order to keep track of reset failures.
The counter is incremented every time either ena_restore_device()
or ena_destroy_device() fail.

Signed-off-by: Osama Abboud <osamaabb@amazon.com>
Signed-off-by: David Arinzon <darinzon@amazon.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://lore.kernel.org/r/20240512134637.25299-2-darinzon@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/ethernet/amazon/ena/ena_ethtool.c
drivers/net/ethernet/amazon/ena/ena_netdev.c
drivers/net/ethernet/amazon/ena/ena_netdev.h

index 0cb6cc1cef566221b50e29519682756a22c39496..28583db848e2239d041e21c386a97829c7427f2e 100644 (file)
@@ -49,6 +49,7 @@ static const struct ena_stats ena_stats_global_strings[] = {
        ENA_STAT_GLOBAL_ENTRY(interface_up),
        ENA_STAT_GLOBAL_ENTRY(interface_down),
        ENA_STAT_GLOBAL_ENTRY(admin_q_pause),
+       ENA_STAT_GLOBAL_ENTRY(reset_fail),
 };
 
 static const struct ena_stats ena_stats_eni_strings[] = {
index 28eaedaf713d5a997486b909c8af137e1e0c0f36..6a9d1b6d91c9a2f7717ea1cf5f12533a58d57812 100644 (file)
@@ -42,7 +42,7 @@ MODULE_DEVICE_TABLE(pci, ena_pci_tbl);
 
 static int ena_rss_init_default(struct ena_adapter *adapter);
 static void check_for_admin_com_state(struct ena_adapter *adapter);
-static void ena_destroy_device(struct ena_adapter *adapter, bool graceful);
+static int ena_destroy_device(struct ena_adapter *adapter, bool graceful);
 static int ena_restore_device(struct ena_adapter *adapter);
 
 static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue)
@@ -3235,14 +3235,15 @@ err_disable_msix:
        return rc;
 }
 
-static void ena_destroy_device(struct ena_adapter *adapter, bool graceful)
+static int ena_destroy_device(struct ena_adapter *adapter, bool graceful)
 {
        struct net_device *netdev = adapter->netdev;
        struct ena_com_dev *ena_dev = adapter->ena_dev;
        bool dev_up;
+       int rc = 0;
 
        if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
-               return;
+               return 0;
 
        netif_carrier_off(netdev);
 
@@ -3260,7 +3261,7 @@ static void ena_destroy_device(struct ena_adapter *adapter, bool graceful)
         *  and device is up, ena_down() already reset the device.
         */
        if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up))
-               ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
+               rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
 
        ena_free_mgmnt_irq(adapter);
 
@@ -3279,6 +3280,8 @@ static void ena_destroy_device(struct ena_adapter *adapter, bool graceful)
 
        clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
        clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
+
+       return rc;
 }
 
 static int ena_restore_device(struct ena_adapter *adapter)
@@ -3355,14 +3358,17 @@ err:
 
 static void ena_fw_reset_device(struct work_struct *work)
 {
+       int rc = 0;
+
        struct ena_adapter *adapter =
                container_of(work, struct ena_adapter, reset_task);
 
        rtnl_lock();
 
        if (likely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
-               ena_destroy_device(adapter, false);
-               ena_restore_device(adapter);
+               rc |= ena_destroy_device(adapter, false);
+               rc |= ena_restore_device(adapter);
+               adapter->dev_stats.reset_fail += !!rc;
 
                dev_err(&adapter->pdev->dev, "Device reset completed successfully\n");
        }
index 6d2cc20210cc0b34a14c3ae9bf6dfa5ce13a8f41..d59509747d1aaceb53b695cdf45d08ae2509628f 100644 (file)
@@ -290,6 +290,7 @@ struct ena_stats_dev {
        u64 admin_q_pause;
        u64 rx_drops;
        u64 tx_drops;
+       u64 reset_fail;
 };
 
 enum ena_flags_t {