bnxt_en: Do not call sleeping hwmon_notify_event() from NAPI
authorKalesh AP <kalesh-anakkur.purayil@broadcom.com>
Fri, 20 Oct 2023 21:27:50 +0000 (14:27 -0700)
committerDavid S. Miller <davem@davemloft.net>
Sun, 22 Oct 2023 10:41:45 +0000 (11:41 +0100)
Defer hwmon_notify_event() to bnxt_sp_task() workqueue because
hwmon_notify_event() can try to acquire a mutex shown in the stack trace
below.  Modify bnxt_event_error_report() to return true if we need to
schedule bnxt_sp_task() to notify hwmon.

  __schedule+0x68/0x520
  hwmon_notify_event+0xe8/0x114
  schedule+0x60/0xe0
  schedule_preempt_disabled+0x28/0x40
  __mutex_lock.constprop.0+0x534/0x550
  __mutex_lock_slowpath+0x18/0x20
  mutex_lock+0x5c/0x70
  kobject_uevent_env+0x2f4/0x3d0
  kobject_uevent+0x10/0x20
  hwmon_notify_event+0x94/0x114
  bnxt_hwmon_notify_event+0x40/0x70 [bnxt_en]
  bnxt_event_error_report+0x260/0x290 [bnxt_en]
  bnxt_async_event_process.isra.0+0x250/0x850 [bnxt_en]
  bnxt_hwrm_handler.isra.0+0xc8/0x120 [bnxt_en]
  bnxt_poll_p5+0x150/0x350 [bnxt_en]
  __napi_poll+0x3c/0x210
  net_rx_action+0x308/0x3b0
  __do_softirq+0x120/0x3e0

Cc: Guenter Roeck <linux@roeck-us.net>
Fixes: a19b4801457b ("bnxt_en: Event handler for Thermal event")
Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt.h
drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.c
drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.h

index 16eb7a7af9708a0b8409e0522e14b611c80f99e3..7837e22f237b8a0b883fe20d2a64bd898fd0d350 100644 (file)
@@ -2147,7 +2147,8 @@ static u16 bnxt_agg_ring_id_to_grp_idx(struct bnxt *bp, u16 ring_id)
          ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_TRANSITION_DIR) ==\
         ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_TRANSITION_DIR_INCREASING)
 
-static void bnxt_event_error_report(struct bnxt *bp, u32 data1, u32 data2)
+/* Return true if the workqueue has to be scheduled */
+static bool bnxt_event_error_report(struct bnxt *bp, u32 data1, u32 data2)
 {
        u32 err_type = BNXT_EVENT_ERROR_REPORT_TYPE(data1);
 
@@ -2182,7 +2183,7 @@ static void bnxt_event_error_report(struct bnxt *bp, u32 data1, u32 data2)
                        break;
                default:
                        netdev_err(bp->dev, "Unknown Thermal threshold type event\n");
-                       return;
+                       return false;
                }
                if (EVENT_DATA1_THERMAL_THRESHOLD_DIR_INCREASING(data1))
                        dir_str = "above";
@@ -2193,14 +2194,16 @@ static void bnxt_event_error_report(struct bnxt *bp, u32 data1, u32 data2)
                netdev_warn(bp->dev, "Temperature (In Celsius), Current: %lu, threshold: %lu\n",
                            BNXT_EVENT_THERMAL_CURRENT_TEMP(data2),
                            BNXT_EVENT_THERMAL_THRESHOLD_TEMP(data2));
-               bnxt_hwmon_notify_event(bp, type);
-               break;
+               bp->thermal_threshold_type = type;
+               set_bit(BNXT_THERMAL_THRESHOLD_SP_EVENT, &bp->sp_event);
+               return true;
        }
        default:
                netdev_err(bp->dev, "FW reported unknown error type %u\n",
                           err_type);
                break;
        }
+       return false;
 }
 
 #define BNXT_GET_EVENT_PORT(data)      \
@@ -2401,7 +2404,8 @@ static int bnxt_async_event_process(struct bnxt *bp,
                goto async_event_process_exit;
        }
        case ASYNC_EVENT_CMPL_EVENT_ID_ERROR_REPORT: {
-               bnxt_event_error_report(bp, data1, data2);
+               if (bnxt_event_error_report(bp, data1, data2))
+                       break;
                goto async_event_process_exit;
        }
        case ASYNC_EVENT_CMPL_EVENT_ID_PHC_UPDATE: {
@@ -12085,6 +12089,9 @@ static void bnxt_sp_task(struct work_struct *work)
        if (test_and_clear_bit(BNXT_FW_ECHO_REQUEST_SP_EVENT, &bp->sp_event))
                bnxt_fw_echo_reply(bp);
 
+       if (test_and_clear_bit(BNXT_THERMAL_THRESHOLD_SP_EVENT, &bp->sp_event))
+               bnxt_hwmon_notify_event(bp);
+
        /* These functions below will clear BNXT_STATE_IN_SP_TASK.  They
         * must be the last functions to be called before exiting.
         */
index 9ce0193798d41719226326a3c7c80af6869dec9c..80846c3ca9fc4ed1141efac3c03c5c31540e75f1 100644 (file)
@@ -2094,6 +2094,7 @@ struct bnxt {
 #define BNXT_FW_RESET_NOTIFY_SP_EVENT  18
 #define BNXT_FW_EXCEPTION_SP_EVENT     19
 #define BNXT_LINK_CFG_CHANGE_SP_EVENT  21
+#define BNXT_THERMAL_THRESHOLD_SP_EVENT        22
 #define BNXT_FW_ECHO_REQUEST_SP_EVENT  23
 
        struct delayed_work     fw_reset_task;
@@ -2196,6 +2197,7 @@ struct bnxt {
        u8                      fatal_thresh_temp;
        u8                      shutdown_thresh_temp;
 #endif
+       u32                     thermal_threshold_type;
        enum board_idx          board_idx;
 };
 
index e48094043c3b9bbad8acc4249d36604604bc6ce0..669d24ba0e87f697960872dcbf829b4b6e1e78b8 100644 (file)
 #include "bnxt_hwrm.h"
 #include "bnxt_hwmon.h"
 
-void bnxt_hwmon_notify_event(struct bnxt *bp, u32 type)
+void bnxt_hwmon_notify_event(struct bnxt *bp)
 {
        u32 attr;
 
        if (!bp->hwmon_dev)
                return;
 
-       switch (type) {
+       switch (bp->thermal_threshold_type) {
        case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_WARN:
                attr = hwmon_temp_max_alarm;
                break;
index 76d9f599ebc0496f6f5928c8f7004e0e84b893d5..de54a562e06a4fb1dd661f112f4e751d24e3a492 100644 (file)
 #define BNXT_HWMON_H
 
 #ifdef CONFIG_BNXT_HWMON
-void bnxt_hwmon_notify_event(struct bnxt *bp, u32 type);
+void bnxt_hwmon_notify_event(struct bnxt *bp);
 void bnxt_hwmon_uninit(struct bnxt *bp);
 void bnxt_hwmon_init(struct bnxt *bp);
 #else
-static inline void bnxt_hwmon_notify_event(struct bnxt *bp, u32 type)
+static inline void bnxt_hwmon_notify_event(struct bnxt *bp)
 {
 }