habanalabs: add uapi to retrieve aggregate H/W events
authorOded Gabbay <oded.gabbay@gmail.com>
Wed, 28 Aug 2019 18:51:52 +0000 (21:51 +0300)
committerOded Gabbay <oded.gabbay@gmail.com>
Thu, 5 Sep 2019 11:55:27 +0000 (14:55 +0300)
Add a new opcode to INFO IOCTL to retrieve aggregate H/W events. i.e. the
events counters are NOT cleared upon device reset, but count from the
loading of the driver.

Add the code to support it in the device event handling function.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Reviewed-by: Omer Shpigelman <oshpigelman@habana.ai>
drivers/misc/habanalabs/goya/goya.c
drivers/misc/habanalabs/goya/goyaP.h
drivers/misc/habanalabs/habanalabs.h
drivers/misc/habanalabs/habanalabs_ioctl.c
include/uapi/misc/habanalabs.h

index 0dd0b4429feef67b8644841c2ef49423ba79c187..1267ec75b19f20787cf62528801eed6d2e51b50e 100644 (file)
@@ -4469,6 +4469,7 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
        struct goya_device *goya = hdev->asic_specific;
 
        goya->events_stat[event_type]++;
+       goya->events_stat_aggregate[event_type]++;
 
        switch (event_type) {
        case GOYA_ASYNC_EVENT_ID_PCIE_IF:
@@ -4550,12 +4551,16 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
        }
 }
 
-void *goya_get_events_stat(struct hl_device *hdev, u32 *size)
+void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
 {
        struct goya_device *goya = hdev->asic_specific;
 
-       *size = (u32) sizeof(goya->events_stat);
+       if (aggregate) {
+               *size = (u32) sizeof(goya->events_stat_aggregate);
+               return goya->events_stat_aggregate;
+       }
 
+       *size = (u32) sizeof(goya->events_stat);
        return goya->events_stat;
 }
 
index d7f48c9c41cdc7f6549b5a42441f3bfd1fb897e0..f830cfd5c04d6a7e5adac9284058d329962545e0 100644 (file)
@@ -162,6 +162,7 @@ struct goya_device {
 
        u64             ddr_bar_cur_addr;
        u32             events_stat[GOYA_ASYNC_EVENT_ID_SIZE];
+       u32             events_stat_aggregate[GOYA_ASYNC_EVENT_ID_SIZE];
        u32             hw_cap_initialized;
        u8              device_cpu_mmu_mappings_done;
 };
@@ -215,7 +216,7 @@ int goya_suspend(struct hl_device *hdev);
 int goya_resume(struct hl_device *hdev);
 
 void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry);
-void *goya_get_events_stat(struct hl_device *hdev, u32 *size);
+void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size);
 
 void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address,
                                u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec);
index 23b86b7f973202fb33e53cd99d2f0f0c42a8bfd4..aa7aaa710f127920c1b4bede71f0d5679738b4d2 100644 (file)
@@ -558,7 +558,8 @@ struct hl_asic_funcs {
                                struct hl_eq_entry *eq_entry);
        void (*set_pll_profile)(struct hl_device *hdev,
                        enum hl_pll_frequency freq);
-       void* (*get_events_stat)(struct hl_device *hdev, u32 *size);
+       void* (*get_events_stat)(struct hl_device *hdev, bool aggregate,
+                               u32 *size);
        u64 (*read_pte)(struct hl_device *hdev, u64 addr);
        void (*write_pte)(struct hl_device *hdev, u64 addr, u64 val);
        void (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard);
index f958568f7996c18e4efc1ffe943e295dc05ceb62..66d9c710073c64491b04ae8c863d69ff6675919d 100644 (file)
@@ -75,7 +75,8 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
                min((size_t)size, sizeof(hw_ip))) ? -EFAULT : 0;
 }
 
-static int hw_events_info(struct hl_device *hdev, struct hl_info_args *args)
+static int hw_events_info(struct hl_device *hdev, bool aggregate,
+                       struct hl_info_args *args)
 {
        u32 size, max_size = args->return_size;
        void __user *out = (void __user *) (uintptr_t) args->return_pointer;
@@ -84,7 +85,7 @@ static int hw_events_info(struct hl_device *hdev, struct hl_info_args *args)
        if ((!max_size) || (!out))
                return -EINVAL;
 
-       arr = hdev->asic_funcs->get_events_stat(hdev, &size);
+       arr = hdev->asic_funcs->get_events_stat(hdev, aggregate, &size);
 
        return copy_to_user(out, arr, min(max_size, size)) ? -EFAULT : 0;
 }
@@ -251,7 +252,7 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 
        switch (args->op) {
        case HL_INFO_HW_EVENTS:
-               rc = hw_events_info(hdev, args);
+               rc = hw_events_info(hdev, false, args);
                break;
 
        case HL_INFO_DRAM_USAGE:
@@ -266,6 +267,10 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
                rc = device_utilization(hdev, args);
                break;
 
+       case HL_INFO_HW_EVENTS_AGGREGATE:
+               rc = hw_events_info(hdev, true, args);
+               break;
+
        default:
                dev_err(dev, "Invalid request %d\n", args->op);
                rc = -ENOTTY;
index 73ee212d7fa6a6934bc320d23361d158e563a6f6..19f8039db2ea2440c872f5d4702dd8fcb50c7b8c 100644 (file)
@@ -93,6 +93,8 @@ enum hl_device_status {
  *                              The period can be between 100ms to 1s, in
  *                              resolution of 100ms. The return value is a
  *                              percentage of the utilization rate.
+ * HL_INFO_HW_EVENTS_AGGREGATE - Receive an array describing how many times each
+ *                               event occurred since the driver was loaded.
  */
 #define HL_INFO_HW_IP_INFO             0
 #define HL_INFO_HW_EVENTS              1
@@ -100,6 +102,7 @@ enum hl_device_status {
 #define HL_INFO_HW_IDLE                        3
 #define HL_INFO_DEVICE_STATUS          4
 #define HL_INFO_DEVICE_UTILIZATION     6
+#define HL_INFO_HW_EVENTS_AGGREGATE    7
 
 #define HL_INFO_VERSION_MAX_LEN        128