From 605e1ef3d58c348d34e8716b05194814681a432b Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Thu, 12 May 2022 15:20:55 +0300 Subject: [PATCH] habanalabs: move call to scrub_device_mem after ctx_fini In future ASICs, it would be possible to have a non-idle device when context is released. We thus need to postpone the scrubbing. Postpone it to hpriv release if reset is not executed or to device late init if reset is executed. Signed-off-by: Dafna Hirschfeld Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/context.c | 3 --- drivers/misc/habanalabs/common/device.c | 16 ++++++++++++++-- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c index 60e3e3125fbcf..a69c14405f418 100644 --- a/drivers/misc/habanalabs/common/context.c +++ b/drivers/misc/habanalabs/common/context.c @@ -106,9 +106,6 @@ static void hl_ctx_fini(struct hl_ctx *ctx) hl_vm_ctx_fini(ctx); hl_asid_free(hdev, ctx->asid); hl_encaps_sig_mgr_fini(hdev, &ctx->sig_mgr); - - /* Scrub both SRAM and DRAM */ - hdev->asic_funcs->scrub_device_mem(hdev); } else { dev_dbg(hdev->dev, "closing kernel context\n"); hdev->asic_funcs->ctx_fini(ctx); diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 0f804ecb6caa7..1a4f3eb941a9b 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -272,9 +272,15 @@ static void hpriv_release(struct kref *ref) list_del(&hpriv->dev_node); mutex_unlock(&hdev->fpriv_list_lock); - if ((hdev->reset_if_device_not_idle && !device_is_idle) - || hdev->reset_upon_device_release) + if ((hdev->reset_if_device_not_idle && !device_is_idle) || + hdev->reset_upon_device_release) { hl_device_reset(hdev, HL_DRV_RESET_DEV_RELEASE); + } else { + int rc = hdev->asic_funcs->scrub_device_mem(hdev); + + if (rc) + dev_err(hdev->dev, "failed to scrub memory from hpriv release (%d)\n", rc); + } /* Now we can mark the compute_ctx as not active. Even if a reset is running in a different * thread, we don't care because the in_reset is marked so if a user will try to open @@ -1459,6 +1465,12 @@ kill_processes: } } + rc = hdev->asic_funcs->scrub_device_mem(hdev); + if (rc) { + dev_err(hdev->dev, "scrub mem failed from device reset (%d)\n", rc); + return rc; + } + spin_lock(&hdev->reset_info.lock); hdev->reset_info.is_in_soft_reset = false; -- 2.30.2