habanalabs: move call to scrub_device_mem after ctx_fini
authorDafna Hirschfeld <dhirschfeld@habana.ai>
Thu, 12 May 2022 12:20:55 +0000 (15:20 +0300)
committerOded Gabbay <ogabbay@kernel.org>
Tue, 12 Jul 2022 06:09:25 +0000 (09:09 +0300)
In future ASICs, it would be possible to have a non-idle
device when context is released. We thus need to postpone the
scrubbing. Postpone it to hpriv release if reset is not executed
or to device late init if reset is executed.

Signed-off-by: Dafna Hirschfeld <dhirschfeld@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
drivers/misc/habanalabs/common/context.c
drivers/misc/habanalabs/common/device.c

index 60e3e3125fbcfb090a061bac1cd43d7b54f1efce..a69c14405f4188908dbd3bf1b0e0f39013a5aeb7 100644 (file)
@@ -106,9 +106,6 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
                hl_vm_ctx_fini(ctx);
                hl_asid_free(hdev, ctx->asid);
                hl_encaps_sig_mgr_fini(hdev, &ctx->sig_mgr);
-
-               /* Scrub both SRAM and DRAM */
-               hdev->asic_funcs->scrub_device_mem(hdev);
        } else {
                dev_dbg(hdev->dev, "closing kernel context\n");
                hdev->asic_funcs->ctx_fini(ctx);
index 0f804ecb6caa7e92399ffa27dc8d6989b841a1b7..1a4f3eb941a9bbfed092c88311cbef0e67fb169e 100644 (file)
@@ -272,9 +272,15 @@ static void hpriv_release(struct kref *ref)
        list_del(&hpriv->dev_node);
        mutex_unlock(&hdev->fpriv_list_lock);
 
-       if ((hdev->reset_if_device_not_idle && !device_is_idle)
-                       || hdev->reset_upon_device_release)
+       if ((hdev->reset_if_device_not_idle && !device_is_idle) ||
+               hdev->reset_upon_device_release) {
                hl_device_reset(hdev, HL_DRV_RESET_DEV_RELEASE);
+       } else {
+               int rc = hdev->asic_funcs->scrub_device_mem(hdev);
+
+               if (rc)
+                       dev_err(hdev->dev, "failed to scrub memory from hpriv release (%d)\n", rc);
+       }
 
        /* Now we can mark the compute_ctx as not active. Even if a reset is running in a different
         * thread, we don't care because the in_reset is marked so if a user will try to open
@@ -1459,6 +1465,12 @@ kill_processes:
                }
        }
 
+       rc = hdev->asic_funcs->scrub_device_mem(hdev);
+       if (rc) {
+               dev_err(hdev->dev, "scrub mem failed from device reset (%d)\n", rc);
+               return rc;
+       }
+
        spin_lock(&hdev->reset_info.lock);
        hdev->reset_info.is_in_soft_reset = false;