habanalabs: expose reset counters via existing INFO IOCTL
authorMoti Haimovski <mhaimovski@habana.ai>
Sun, 3 Nov 2019 14:26:44 +0000 (16:26 +0200)
committerOded Gabbay <oded.gabbay@gmail.com>
Thu, 21 Nov 2019 09:35:47 +0000 (11:35 +0200)
Expose both soft and hard reset counts via INFO IOCTL.
This will allow system management applications to easily check
if the device has undergone reset.

Signed-off-by: Moti Haimovski <mhaimovski@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
drivers/misc/habanalabs/habanalabs_ioctl.c
include/uapi/misc/habanalabs.h

index 5d9c269d99db0712df2f0ee963ad76b4e53442ae..6474b868ef27f6a07f1850bec7648c5b2e996c58 100644 (file)
@@ -242,6 +242,22 @@ static int get_clk_rate(struct hl_device *hdev, struct hl_info_args *args)
                min((size_t) max_size, sizeof(clk_rate))) ? -EFAULT : 0;
 }
 
+static int get_reset_count(struct hl_device *hdev, struct hl_info_args *args)
+{
+       struct hl_info_reset_count reset_count = {0};
+       u32 max_size = args->return_size;
+       void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+       if ((!max_size) || (!out))
+               return -EINVAL;
+
+       reset_count.hard_reset_cnt = hdev->hard_reset_cnt;
+       reset_count.soft_reset_cnt = hdev->soft_reset_cnt;
+
+       return copy_to_user(out, &reset_count,
+               min((size_t) max_size, sizeof(reset_count))) ? -EFAULT : 0;
+}
+
 static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
                                struct device *dev)
 {
@@ -260,6 +276,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
        case HL_INFO_DEVICE_STATUS:
                return device_status_info(hdev, args);
 
+       case HL_INFO_RESET_COUNT:
+               return get_reset_count(hdev, args);
+
        default:
                break;
        }
index 716e70750f23a600f7e32bb7ea626d24b27c4356..4faa2c9767e588018b5bdcbb60c80dfab6ccb844 100644 (file)
@@ -98,6 +98,9 @@ enum hl_device_status {
  * HL_INFO_CLK_RATE            - Retrieve the current and maximum clock rate
  *                               of the device in MHz. The maximum clock rate is
  *                               configurable via sysfs parameter
+ * HL_INFO_RESET_COUNT   - Retrieve the counts of the soft and hard reset
+ *                         operations performed on the device since the last
+ *                         time the driver was loaded.
  */
 #define HL_INFO_HW_IP_INFO             0
 #define HL_INFO_HW_EVENTS              1
@@ -107,6 +110,7 @@ enum hl_device_status {
 #define HL_INFO_DEVICE_UTILIZATION     6
 #define HL_INFO_HW_EVENTS_AGGREGATE    7
 #define HL_INFO_CLK_RATE               8
+#define HL_INFO_RESET_COUNT            9
 
 #define HL_INFO_VERSION_MAX_LEN        128
 #define HL_INFO_CARD_NAME_MAX_LEN      16
@@ -160,6 +164,11 @@ struct hl_info_clk_rate {
        __u32 max_clk_rate_mhz;
 };
 
+struct hl_info_reset_count {
+       __u32 hard_reset_cnt;
+       __u32 soft_reset_cnt;
+};
+
 struct hl_info_args {
        /* Location of relevant struct in userspace */
        __u64 return_pointer;