accel/habanalabs: dump temperature threshold boot error
authorOfir Bitton <obitton@habana.ai>
Mon, 12 Jun 2023 11:51:15 +0000 (14:51 +0300)
committerOded Gabbay <ogabbay@kernel.org>
Mon, 9 Oct 2023 09:37:19 +0000 (12:37 +0300)
Add dump of an error reported from f/w during boot time.
This error indicates a failure with setting temperature threshold.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
drivers/accel/habanalabs/common/firmware_if.c

index 370508e98854aac671392a4c6712a5e7cbc64bdd..c7da69dbfa0ab55ba6ba98d8dc583c7c575e9393 100644 (file)
@@ -724,6 +724,11 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
                err_exists = true;
        }
 
+       if (err_val & CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL) {
+               dev_err(hdev->dev, "Device boot error - Failed to set threshold for temperature sensor\n");
+               err_exists = true;
+       }
+
        if (err_val & CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL) {
                /* Ignore this bit, don't prevent driver loading */
                dev_dbg(hdev->dev, "device unusable status is set\n");