From 5731b6e6f08a4a3adf944fd0436f77f3e9ce1725 Mon Sep 17 00:00:00 2001 From: Tal Cohen Date: Wed, 28 Sep 2022 18:33:19 +0300 Subject: [PATCH] habanalabs/gaudi2: add device unavailable notification Device unavailable notifies the user that there isn't an option to retrieve debug information from the device. When a critical device error occurs and the f/w performs the device reset, a device unavailable notification shall be sent to the user process. Signed-off-by: Tal Cohen Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index cb048920ffc8a..e9c4ec429baee 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -8576,7 +8576,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent { u32 ctl, reset_flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY; struct gaudi2_device *gaudi2 = hdev->asic_specific; - bool reset_required = false, skip_reset = false; + bool reset_required = false, skip_reset = false, is_critical = false; int index, sbte_index; u64 event_mask = 0; u16 event_type; @@ -8602,6 +8602,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); + is_critical = eq_entry->ecc_data.is_critical; break; case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_PDMA1_QM: @@ -8976,9 +8977,16 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent return; reset_device: - if (hdev->hard_reset_on_fw_events) { + if (hdev->asic_prop.fw_security_enabled && is_critical) { + reset_flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW; + + /* notify on device unavailable while the reset triggered by fw */ + event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET | + HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE); hl_device_reset(hdev, reset_flags); + } else if (hdev->hard_reset_on_fw_events) { event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; + hl_device_reset(hdev, reset_flags); } else { if (!gaudi2_irq_map_table[event_type].msg) hl_fw_unmask_irq(hdev, event_type); -- 2.30.2