habanalabs: add debugfs node for configuring CS timeout
authorOfir Bitton <obitton@habana.ai>
Thu, 2 Sep 2021 07:18:01 +0000 (10:18 +0300)
committerOded Gabbay <ogabbay@kernel.org>
Mon, 18 Oct 2021 09:05:46 +0000 (12:05 +0300)
Command submission timeout is currently determined during driver
loading time. As some environments requires this timeout to be
modified in runtime, we introduce a new debugfs node that controls
the timeout value without the need to reload the driver.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Documentation/ABI/testing/debugfs-driver-habanalabs
drivers/misc/habanalabs/common/debugfs.c

index 284e2dfa61cd282ccf6e7119abd75402ae3650ea..63c46d9d538fd4e51014d6a1a870bac6a81397df 100644 (file)
@@ -226,6 +226,12 @@ Description:    Gets the state dump occurring on a CS timeout or failure.
                 Writing an integer X discards X state dumps, so that the
                 next read would return X+1-st newest state dump.
 
+What:           /sys/kernel/debug/habanalabs/hl<n>/timeout_locked
+Date:           Sep 2021
+KernelVersion:  5.16
+Contact:        obitton@habana.ai
+Description:    Sets the command submission timeout value in seconds.
+
 What:           /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
 Date:           Mar 2020
 KernelVersion:  5.6
index 985f1f3dbd20f4fe9062995b19a8e338260243b0..1f2a3dc6c4e2f91166844162b791f22acc9a4362 100644 (file)
@@ -1167,6 +1167,45 @@ static ssize_t hl_state_dump_write(struct file *f, const char __user *buf,
        return count;
 }
 
+static ssize_t hl_timeout_locked_read(struct file *f, char __user *buf,
+                                       size_t count, loff_t *ppos)
+{
+       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+       struct hl_device *hdev = entry->hdev;
+       char tmp_buf[200];
+       ssize_t rc;
+
+       if (*ppos)
+               return 0;
+
+       sprintf(tmp_buf, "%d\n",
+               jiffies_to_msecs(hdev->timeout_jiffies) / 1000);
+       rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
+                       strlen(tmp_buf) + 1);
+
+       return rc;
+}
+
+static ssize_t hl_timeout_locked_write(struct file *f, const char __user *buf,
+                                    size_t count, loff_t *ppos)
+{
+       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+       struct hl_device *hdev = entry->hdev;
+       u32 value;
+       ssize_t rc;
+
+       rc = kstrtouint_from_user(buf, count, 10, &value);
+       if (rc)
+               return rc;
+
+       if (value)
+               hdev->timeout_jiffies = msecs_to_jiffies(value * 1000);
+       else
+               hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
+
+       return count;
+}
+
 static const struct file_operations hl_data32b_fops = {
        .owner = THIS_MODULE,
        .read = hl_data_read32,
@@ -1240,6 +1279,12 @@ static const struct file_operations hl_state_dump_fops = {
        .write = hl_state_dump_write
 };
 
+static const struct file_operations hl_timeout_locked_fops = {
+       .owner = THIS_MODULE,
+       .read = hl_timeout_locked_read,
+       .write = hl_timeout_locked_write
+};
+
 static const struct hl_info_list hl_debugfs_list[] = {
        {"command_buffers", command_buffers_show, NULL},
        {"command_submission", command_submission_show, NULL},
@@ -1421,6 +1466,12 @@ void hl_debugfs_add_device(struct hl_device *hdev)
                                dev_entry,
                                &hl_state_dump_fops);
 
+       debugfs_create_file("timeout_locked",
+                               0644,
+                               dev_entry->root,
+                               dev_entry,
+                               &hl_timeout_locked_fops);
+
        for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
                debugfs_create_file(hl_debugfs_list[i].name,
                                        0444,