From: Dafna Hirschfeld Date: Wed, 15 Feb 2023 10:15:57 +0000 (+0200) Subject: accel/habanalabs: assert return value of hw_fini X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=86b74d843897fdb9f2366bc515e9d52816e3d247;p=linux.git accel/habanalabs: assert return value of hw_fini Since hw_fini return error code for failure indication, we should check its return value. Currently it might only fail upon soft-reset from hl_device_reset. Later patch will add hw_fini failure in case of polling timeout in hard-reset. Signed-off-by: Dafna Hirschfeld Reviewed-by: Oded Gabbay Reviewed-by: Stanislaw Gruszka Signed-off-by: Oded Gabbay --- diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c index 7ade32487138c..99e793dfb1261 100644 --- a/drivers/accel/habanalabs/common/device.c +++ b/drivers/accel/habanalabs/common/device.c @@ -1472,7 +1472,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags) schedule_hard_reset = false, delay_reset, from_dev_release, from_watchdog_thread; u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; struct hl_ctx *ctx; - int i, rc; + int i, rc, hw_fini_rc; if (!hdev->init_done) { dev_err(hdev->dev, "Can't reset before initialization is done\n"); @@ -1634,7 +1634,7 @@ kill_processes: } /* Reset the H/W. It will be in idle state after this returns */ - hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset); + hw_fini_rc = hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset); if (hard_reset) { hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE; @@ -1661,6 +1661,10 @@ kill_processes: hl_ctx_put(ctx); } + if (hw_fini_rc) { + rc = hw_fini_rc; + goto out_err; + } /* Finished tear-down, starting to re-initialize */ if (hard_reset) { @@ -2416,7 +2420,9 @@ void hl_device_fini(struct hl_device *hdev) hl_cb_pool_fini(hdev); /* Reset the H/W. It will be in idle state after this returns */ - hdev->asic_funcs->hw_fini(hdev, true, false); + rc = hdev->asic_funcs->hw_fini(hdev, true, false); + if (rc) + dev_err(hdev->dev, "hw_fini failed in device fini while removing device %d\n", rc); hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE; diff --git a/drivers/accel/habanalabs/gaudi/gaudi.c b/drivers/accel/habanalabs/gaudi/gaudi.c index cc4cd581c8ef0..d37c5d4893b94 100644 --- a/drivers/accel/habanalabs/gaudi/gaudi.c +++ b/drivers/accel/habanalabs/gaudi/gaudi.c @@ -868,13 +868,18 @@ pci_init: rc = hl_fw_read_preboot_status(hdev); if (rc) { if (hdev->reset_on_preboot_fail) + /* we are already on failure flow, so don't check if hw_fini fails. */ hdev->asic_funcs->hw_fini(hdev, true, false); goto pci_fini; } if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n"); - hdev->asic_funcs->hw_fini(hdev, true, false); + rc = hdev->asic_funcs->hw_fini(hdev, true, false); + if (rc) { + dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc); + goto pci_fini; + } } return 0; diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index bf1d4ef6bbec2..c2af874e758c2 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -2886,13 +2886,18 @@ static int gaudi2_early_init(struct hl_device *hdev) rc = hl_fw_read_preboot_status(hdev); if (rc) { if (hdev->reset_on_preboot_fail) + /* we are already on failure flow, so don't check if hw_fini fails. */ hdev->asic_funcs->hw_fini(hdev, true, false); goto pci_fini; } if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n"); - hdev->asic_funcs->hw_fini(hdev, true, false); + rc = hdev->asic_funcs->hw_fini(hdev, true, false); + if (rc) { + dev_err(hdev->dev, "failed to reset HW during early init (%d)\n", rc); + goto pci_fini; + } } return 0; diff --git a/drivers/accel/habanalabs/goya/goya.c b/drivers/accel/habanalabs/goya/goya.c index c869cfe44ce7a..e02de936b7b5c 100644 --- a/drivers/accel/habanalabs/goya/goya.c +++ b/drivers/accel/habanalabs/goya/goya.c @@ -669,13 +669,18 @@ pci_init: rc = hl_fw_read_preboot_status(hdev); if (rc) { if (hdev->reset_on_preboot_fail) + /* we are already on failure flow, so don't check if hw_fini fails. */ hdev->asic_funcs->hw_fini(hdev, true, false); goto pci_fini; } if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n"); - hdev->asic_funcs->hw_fini(hdev, true, false); + rc = hdev->asic_funcs->hw_fini(hdev, true, false); + if (rc) { + dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc); + goto pci_fini; + } } if (!hdev->pldm) {