bnxt_en: Event handler for Thermal event
authorKalesh AP <kalesh-anakkur.purayil@broadcom.com>
Wed, 27 Sep 2023 03:57:32 +0000 (20:57 -0700)
committerDavid S. Miller <davem@davemloft.net>
Wed, 4 Oct 2023 10:23:01 +0000 (11:23 +0100)
Newer FW will send a new async event when it detects that
the chip's temperature has crossed the configured threshold value.
The driver will now notify hwmon and will log a warning message.

Link: https://lore.kernel.org/netdev/20230815045658.80494-13-michael.chan@broadcom.com/
Cc: Jean Delvare <jdelvare@suse.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: linux-hwmon@vger.kernel.org
Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.c
drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.h

index b83f8de0a015306844db1cd8d104e232442b408d..7104237272de42b45a0297136a4c484561058f9c 100644 (file)
@@ -2129,6 +2129,24 @@ static u16 bnxt_agg_ring_id_to_grp_idx(struct bnxt *bp, u16 ring_id)
        return INVALID_HW_RING_ID;
 }
 
+#define BNXT_EVENT_THERMAL_CURRENT_TEMP(data2)                         \
+       ((data2) &                                                      \
+         ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA2_CURRENT_TEMP_MASK)
+
+#define BNXT_EVENT_THERMAL_THRESHOLD_TEMP(data2)                       \
+       (((data2) &                                                     \
+         ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA2_THRESHOLD_TEMP_MASK) >>\
+        ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA2_THRESHOLD_TEMP_SFT)
+
+#define EVENT_DATA1_THERMAL_THRESHOLD_TYPE(data1)                      \
+       ((data1) &                                                      \
+        ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_MASK)
+
+#define EVENT_DATA1_THERMAL_THRESHOLD_DIR_INCREASING(data1)            \
+       (((data1) &                                                     \
+         ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_TRANSITION_DIR) ==\
+        ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_TRANSITION_DIR_INCREASING)
+
 static void bnxt_event_error_report(struct bnxt *bp, u32 data1, u32 data2)
 {
        u32 err_type = BNXT_EVENT_ERROR_REPORT_TYPE(data1);
@@ -2144,6 +2162,40 @@ static void bnxt_event_error_report(struct bnxt *bp, u32 data1, u32 data2)
        case ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD:
                netdev_warn(bp->dev, "One or more MMIO doorbells dropped by the device!\n");
                break;
+       case ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_THERMAL_THRESHOLD: {
+               u32 type = EVENT_DATA1_THERMAL_THRESHOLD_TYPE(data1);
+               char *threshold_type;
+               char *dir_str;
+
+               switch (type) {
+               case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_WARN:
+                       threshold_type = "warning";
+                       break;
+               case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_CRITICAL:
+                       threshold_type = "critical";
+                       break;
+               case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_FATAL:
+                       threshold_type = "fatal";
+                       break;
+               case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_SHUTDOWN:
+                       threshold_type = "shutdown";
+                       break;
+               default:
+                       netdev_err(bp->dev, "Unknown Thermal threshold type event\n");
+                       return;
+               }
+               if (EVENT_DATA1_THERMAL_THRESHOLD_DIR_INCREASING(data1))
+                       dir_str = "above";
+               else
+                       dir_str = "below";
+               netdev_warn(bp->dev, "Chip temperature has gone %s the %s thermal threshold!\n",
+                           dir_str, threshold_type);
+               netdev_warn(bp->dev, "Temperature (In Celsius), Current: %lu, threshold: %lu\n",
+                           BNXT_EVENT_THERMAL_CURRENT_TEMP(data2),
+                           BNXT_EVENT_THERMAL_THRESHOLD_TEMP(data2));
+               bnxt_hwmon_notify_event(bp, type);
+               break;
+       }
        default:
                netdev_err(bp->dev, "FW reported unknown error type %u\n",
                           err_type);
index 6d36158df26e4a62789de0b9ac0fd65afbce3b6d..e48094043c3b9bbad8acc4249d36604604bc6ce0 100644 (file)
 #include "bnxt_hwrm.h"
 #include "bnxt_hwmon.h"
 
+void bnxt_hwmon_notify_event(struct bnxt *bp, u32 type)
+{
+       u32 attr;
+
+       if (!bp->hwmon_dev)
+               return;
+
+       switch (type) {
+       case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_WARN:
+               attr = hwmon_temp_max_alarm;
+               break;
+       case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_CRITICAL:
+               attr = hwmon_temp_crit_alarm;
+               break;
+       case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_FATAL:
+       case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_SHUTDOWN:
+               attr = hwmon_temp_emergency_alarm;
+               break;
+       default:
+               return;
+       }
+
+       hwmon_notify_event(&bp->pdev->dev, hwmon_temp, attr, 0);
+}
+
 static int bnxt_hwrm_temp_query(struct bnxt *bp, u8 *temp)
 {
        struct hwrm_temp_monitor_query_output *resp;
index af310066687c5e4fc394b826dd7b55919da38450..76d9f599ebc0496f6f5928c8f7004e0e84b893d5 100644 (file)
 #define BNXT_HWMON_H
 
 #ifdef CONFIG_BNXT_HWMON
+void bnxt_hwmon_notify_event(struct bnxt *bp, u32 type);
 void bnxt_hwmon_uninit(struct bnxt *bp);
 void bnxt_hwmon_init(struct bnxt *bp);
 #else
+static inline void bnxt_hwmon_notify_event(struct bnxt *bp, u32 type)
+{
+}
+
 static inline void bnxt_hwmon_uninit(struct bnxt *bp)
 {
 }