habanalabs: force user to set device debug mode
authorOded Gabbay <oded.gabbay@gmail.com>
Sat, 4 May 2019 14:36:06 +0000 (17:36 +0300)
committerOded Gabbay <oded.gabbay@gmail.com>
Sat, 4 May 2019 14:36:06 +0000 (17:36 +0300)
This patch adds the implementation of the HL_DEBUG_OP_SET_MODE opcode in
the DEBUG IOCTL.

It forces the user who wants to debug the device to set the device into
debug mode before he can configure the debug engines. The patch also makes
sure to disable debug mode upon user releasing FD, in case the user forgot
to disable debug mode.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
drivers/misc/habanalabs/context.c
drivers/misc/habanalabs/device.c
drivers/misc/habanalabs/habanalabs.h
drivers/misc/habanalabs/habanalabs_drv.c
drivers/misc/habanalabs/habanalabs_ioctl.c

index f4c92f110a721d045ecfdfa24bda5c0bc01ce8df..280f4625e313dda5daf8f98119bf22028648975c 100644 (file)
@@ -31,7 +31,9 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
                 * Coresight might be still working by accessing addresses
                 * related to the stopped engines. Hence stop it explicitly.
                 */
-               hdev->asic_funcs->halt_coresight(hdev);
+               if (hdev->in_debug)
+                       hl_device_set_debug_mode(hdev, false);
+
                hl_vm_ctx_fini(ctx);
                hl_asid_free(hdev, ctx->asid);
        }
index 0b19d3eefb98f1a686cdf52ec123794bca6ebadb..640d24fcdec513f08f369b9b0443f411e2fce806 100644 (file)
@@ -231,6 +231,7 @@ static int device_early_init(struct hl_device *hdev)
 
        mutex_init(&hdev->fd_open_cnt_lock);
        mutex_init(&hdev->send_cpu_message_lock);
+       mutex_init(&hdev->debug_lock);
        mutex_init(&hdev->mmu_cache_lock);
        INIT_LIST_HEAD(&hdev->hw_queues_mirror_list);
        spin_lock_init(&hdev->hw_queues_mirror_lock);
@@ -262,6 +263,7 @@ early_fini:
 static void device_early_fini(struct hl_device *hdev)
 {
        mutex_destroy(&hdev->mmu_cache_lock);
+       mutex_destroy(&hdev->debug_lock);
        mutex_destroy(&hdev->send_cpu_message_lock);
 
        hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
@@ -420,6 +422,52 @@ int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
        return 1;
 }
 
+int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)
+{
+       int rc = 0;
+
+       mutex_lock(&hdev->debug_lock);
+
+       if (!enable) {
+               if (!hdev->in_debug) {
+                       dev_err(hdev->dev,
+                               "Failed to disable debug mode because device was not in debug mode\n");
+                       rc = -EFAULT;
+                       goto out;
+               }
+
+               hdev->asic_funcs->halt_coresight(hdev);
+               hdev->in_debug = 0;
+
+               goto out;
+       }
+
+       if (hdev->in_debug) {
+               dev_err(hdev->dev,
+                       "Failed to enable debug mode because device is already in debug mode\n");
+               rc = -EFAULT;
+               goto out;
+       }
+
+       mutex_lock(&hdev->fd_open_cnt_lock);
+
+       if (atomic_read(&hdev->fd_open_cnt) > 1) {
+               dev_err(hdev->dev,
+                       "Failed to enable debug mode. More then a single user is using the device\n");
+               rc = -EPERM;
+               goto unlock_fd_open_lock;
+       }
+
+       hdev->in_debug = 1;
+
+unlock_fd_open_lock:
+       mutex_unlock(&hdev->fd_open_cnt_lock);
+out:
+       mutex_unlock(&hdev->debug_lock);
+
+       return rc;
+}
+
 /*
  * hl_device_suspend - initiate device suspend
  *
index 7ca97df65da2ea1dd89363c80d5d1c57bc61deba..f09029339d5eb3d2cffcc31c34eeba59784be240 100644 (file)
@@ -1117,6 +1117,7 @@ struct hl_device_reset_work {
  *                    lock here so we can flush user processes which are opening
  *                    the device while we are trying to hard reset it
  * @send_cpu_message_lock: enforces only one message in KMD <-> ArmCP queue.
+ * @debug_lock: protects critical section of setting debug mode for device
  * @asic_prop: ASIC specific immutable properties.
  * @asic_funcs: ASIC specific functions.
  * @asic_specific: ASIC specific information to use only from ASIC files.
@@ -1159,6 +1160,8 @@ struct hl_device_reset_work {
  * @mmu_enable: is MMU enabled.
  * @device_cpu_disabled: is the device CPU disabled (due to timeouts)
  * @dma_mask: the dma mask that was set for this device
+ * @in_debug: is device under debug. This, together with fd_open_cnt, enforces
+ *            that only a single user is configuring the debug infrastructure.
  */
 struct hl_device {
        struct pci_dev                  *pdev;
@@ -1188,6 +1191,7 @@ struct hl_device {
        /* TODO: remove fd_open_cnt_lock for multiple process support */
        struct mutex                    fd_open_cnt_lock;
        struct mutex                    send_cpu_message_lock;
+       struct mutex                    debug_lock;
        struct asic_fixed_properties    asic_prop;
        const struct hl_asic_funcs      *asic_funcs;
        void                            *asic_specific;
@@ -1230,6 +1234,7 @@ struct hl_device {
        u8                              init_done;
        u8                              device_cpu_disabled;
        u8                              dma_mask;
+       u8                              in_debug;
 
        /* Parameters for bring-up */
        u8                              mmu_enable;
@@ -1325,6 +1330,7 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
 int hl_device_open(struct inode *inode, struct file *filp);
 bool hl_device_disabled_or_in_reset(struct hl_device *hdev);
 enum hl_device_status hl_device_status(struct hl_device *hdev);
+int hl_device_set_debug_mode(struct hl_device *hdev, bool enable);
 int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
                enum hl_asic_type asic_type, int minor);
 void destroy_hdev(struct hl_device *hdev);
index d8b47bb4729c67cd699f7e263228893d35fccbff..42a8c0b7279ae9b535808cc283b3dde4a9398d3d 100644 (file)
@@ -105,6 +105,14 @@ int hl_device_open(struct inode *inode, struct file *filp)
                return -EPERM;
        }
 
+       if (hdev->in_debug) {
+               dev_err_ratelimited(hdev->dev,
+                       "Can't open %s because it is being debugged by another user\n",
+                       dev_name(hdev->dev));
+               mutex_unlock(&hdev->fd_open_cnt_lock);
+               return -EPERM;
+       }
+
        if (atomic_read(&hdev->fd_open_cnt)) {
                dev_info_ratelimited(hdev->dev,
                        "Can't open %s because another user is working on it\n",
index b7a0eecf6b6c6a29680a07d10ff012d3b8ecc889..678375117f3ba150b8c2853ffb2e6b2bb0bc2fdb 100644 (file)
@@ -254,10 +254,18 @@ static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data)
        case HL_DEBUG_OP_BMON:
        case HL_DEBUG_OP_SPMU:
        case HL_DEBUG_OP_TIMESTAMP:
+               if (!hdev->in_debug) {
+                       dev_err(hdev->dev,
+                               "Rejecting debug configuration request because device not in debug mode\n");
+                       return -EFAULT;
+               }
                args->input_size =
                        min(args->input_size, hl_debug_struct_size[args->op]);
                rc = debug_coresight(hdev, args);
                break;
+       case HL_DEBUG_OP_SET_MODE:
+               rc = hl_device_set_debug_mode(hdev, (bool) args->enable);
+               break;
        default:
                dev_err(hdev->dev, "Invalid request %d\n", args->op);
                rc = -ENOTTY;