habanalabs: rephrase error messages
authorOded Gabbay <oded.gabbay@gmail.com>
Mon, 22 Jun 2020 06:52:22 +0000 (09:52 +0300)
committerOded Gabbay <oded.gabbay@gmail.com>
Fri, 24 Jul 2020 17:31:35 +0000 (20:31 +0300)
rephrase some error/warning/notice messages to make them more accessible to
ordinary users.

There is no need to print context ASID as the driver currently doesn't
support multiple contexts.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Reviewed-by: Tomer Tayar <ttayar@habana.ai>
drivers/misc/habanalabs/command_submission.c
drivers/misc/habanalabs/context.c
drivers/misc/habanalabs/firmware_if.c
drivers/misc/habanalabs/memory.c

index 62dab99dda98e395fe74a85a86113d388433851c..f81d6685e011aa9b434a6e3672ce9391fb360e1e 100644 (file)
@@ -373,9 +373,9 @@ static void cs_timedout(struct work_struct *work)
        hdev = cs->ctx->hdev;
        ctx_asid = cs->ctx->asid;
 
-       /* TODO: add information about last signaled seq and last emitted seq */
-       dev_err(hdev->dev, "User %d command submission %llu got stuck!\n",
-               ctx_asid, cs->sequence);
+       dev_err(hdev->dev,
+               "Command submission %llu has not finished in time!\n",
+               cs->sequence);
 
        cs_put(cs);
 
@@ -1130,7 +1130,7 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev,
                rc = PTR_ERR(fence);
                if (rc == -EINVAL)
                        dev_notice_ratelimited(hdev->dev,
-                               "Can't wait on seq %llu because current CS is at seq %llu\n",
+                               "Can't wait on CS %llu because current CS is at seq %llu\n",
                                seq, ctx->cs_sequence);
        } else if (fence) {
                rc = dma_fence_wait_timeout(fence, true, timeout);
@@ -1163,15 +1163,21 @@ int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
        memset(args, 0, sizeof(*args));
 
        if (rc < 0) {
-               dev_err_ratelimited(hdev->dev,
-                               "Error %ld on waiting for CS handle %llu\n",
-                               rc, seq);
                if (rc == -ERESTARTSYS) {
+                       dev_err_ratelimited(hdev->dev,
+                               "user process got signal while waiting for CS handle %llu\n",
+                               seq);
                        args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED;
                        rc = -EINTR;
                } else if (rc == -ETIMEDOUT) {
+                       dev_err_ratelimited(hdev->dev,
+                               "CS %llu has timed-out while user process is waiting for it\n",
+                               seq);
                        args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
                } else if (rc == -EIO) {
+                       dev_err_ratelimited(hdev->dev,
+                               "CS %llu has been aborted while user process is waiting for it\n",
+                               seq);
                        args->out.status = HL_WAIT_CS_STATUS_ABORTED;
                }
                return rc;
index 1b96fefa4a65aea4ce4c404b376f926b3145d7b9..1e3e5b19ecd96324733eaab9679a7a1f1c149f04 100644 (file)
@@ -112,8 +112,7 @@ void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx)
                return;
 
        dev_warn(hdev->dev,
-               "Context %d closed or terminated but its CS are executing\n",
-               ctx->asid);
+               "user process released device but its command submissions are still executing\n");
 }
 
 int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
index 6900c01d060f2675e782e66b216e35742fb99e22..9e7f203a09d7f7a26074939c31515eb5ce5a19e6 100644 (file)
@@ -289,7 +289,7 @@ int hl_fw_armcp_info_get(struct hl_device *hdev)
                                        HL_ARMCP_INFO_TIMEOUT_USEC, &result);
        if (rc) {
                dev_err(hdev->dev,
-                       "Failed to send ArmCP info pkt, error %d\n", rc);
+                       "Failed to handle ArmCP info pkt, error %d\n", rc);
                goto out;
        }
 
@@ -340,7 +340,7 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
 
        if (rc) {
                dev_err(hdev->dev,
-                       "Failed to send ArmCP EEPROM packet, error %d\n", rc);
+                       "Failed to handle ArmCP EEPROM packet, error %d\n", rc);
                goto out;
        }
 
index 47da84a177197df9e9005040850f148e21974803..e4e1693e5c6c386b650a0cd8d555f877d1199b43 100644 (file)
@@ -1730,8 +1730,7 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
         */
        if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash))
                dev_notice(hdev->dev,
-                               "ctx %d is freed while it has va in use\n",
-                               ctx->asid);
+                       "user released device without removing its memory mappings\n");
 
        hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) {
                dev_dbg(hdev->dev,