habanalabs: add page fault info uapi
authorDani Liberman <dliberman@habana.ai>
Sun, 18 Sep 2022 18:37:31 +0000 (21:37 +0300)
committerOded Gabbay <ogabbay@kernel.org>
Wed, 23 Nov 2022 14:13:40 +0000 (16:13 +0200)
Only the first page fault will be saved.
Besides the address which caused the page fault, the driver captures
all of the mmu user mappings.
User can retrieve this data via the new uapi (new opcode in INFO ioctl).

Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
drivers/misc/habanalabs/common/device.c
drivers/misc/habanalabs/common/habanalabs.h
drivers/misc/habanalabs/common/habanalabs_drv.c
drivers/misc/habanalabs/common/habanalabs_ioctl.c
drivers/misc/habanalabs/gaudi/gaudi.c
include/uapi/misc/habanalabs.h

index 30ddaaae67e51dc242e41ab139f28f996ae73431..5dc6c77b47211bb603663b322085eed5a6fa511a 100644 (file)
@@ -12,6 +12,7 @@
 
 #include <linux/pci.h>
 #include <linux/hwmon.h>
+#include <linux/vmalloc.h>
 
 #include <trace/events/habanalabs.h>
 
@@ -2199,6 +2200,8 @@ void hl_device_fini(struct hl_device *hdev)
 
        hl_mmu_fini(hdev);
 
+       vfree(hdev->captured_err_info.pgf_info.user_mappings);
+
        hl_eq_fini(hdev, &hdev->event_queue);
 
        kfree(hdev->shadow_cs_queue);
@@ -2275,3 +2278,58 @@ void hl_capture_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_
                        num_of_engines * sizeof(u16));
        hdev->captured_err_info.razwi.flags = flags;
 }
+static void hl_capture_user_mappings(struct hl_device *hdev)
+{
+       struct page_fault_info *pgf_info = &hdev->captured_err_info.pgf_info;
+       struct hl_vm_hash_node *hnode;
+       struct hl_userptr *userptr;
+       struct hl_ctx *ctx;
+       u32 map_idx = 0;
+       int i;
+
+       ctx = hl_get_compute_ctx(hdev);
+       if (!ctx) {
+               dev_err(hdev->dev, "Can't get user context for user mappings\n");
+               return;
+       }
+
+       mutex_lock(&ctx->mem_hash_lock);
+       hash_for_each(ctx->mem_hash, i, hnode, node)
+       pgf_info->num_of_user_mappings++;
+
+       if (!pgf_info->num_of_user_mappings)
+               goto finish;
+
+       /* In case we already allocated in previous session, need to release it before
+        * allocating new buffer.
+        */
+       vfree(pgf_info->user_mappings);
+       pgf_info->user_mappings =
+                       vmalloc(pgf_info->num_of_user_mappings * sizeof(struct hl_user_mapping));
+       if (!pgf_info->user_mappings) {
+               pgf_info->num_of_user_mappings = 0;
+               goto finish;
+       }
+
+       hash_for_each(ctx->mem_hash, i, hnode, node) {
+               userptr = hnode->ptr;
+               pgf_info->user_mappings[map_idx].dev_va = hnode->vaddr;
+               pgf_info->user_mappings[map_idx].size = userptr->size;
+               map_idx++;
+       }
+finish:
+       mutex_unlock(&ctx->mem_hash_lock);
+       hl_ctx_put(ctx);
+}
+
+void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu)
+{
+       /* Capture only the first page fault */
+       if (atomic_cmpxchg(&hdev->captured_err_info.pgf_info_recorded, 0, 1))
+               return;
+
+       hdev->captured_err_info.pgf_info.pgf.timestamp = ktime_to_ns(ktime_get());
+       hdev->captured_err_info.pgf_info.pgf.addr = addr;
+       hdev->captured_err_info.pgf_info.pgf.engine_id = eng_id;
+       hl_capture_user_mappings(hdev);
+}
index f4b3fa4b097675215ed1fa779f0a1783eee70d11..1489240d5a3adcd0b02a0125d0e16fa5aadb3e25 100644 (file)
@@ -2957,19 +2957,38 @@ struct undefined_opcode_info {
        bool write_enable;
 };
 
+/**
+ * struct page_fault_info - info about page fault
+ * @pgf_info: page fault information.
+ * @user_mappings: buffer containing user mappings.
+ * @num_of_user_mappings: number of user mappings.
+ */
+struct page_fault_info {
+       struct hl_page_fault_info       pgf;
+       struct hl_user_mapping          *user_mappings;
+       u64                             num_of_user_mappings;
+};
+
 /**
  * struct hl_error_info - holds information collected during an error.
  * @cs_timeout: CS timeout error information.
  * @razwi: razwi information.
  * @razwi_info_recorded: if set writing to razwi information is enabled.
- *                otherwise - disabled, so the first (root cause) razwi will not be overwritten.
+ *                       otherwise - disabled, so the first (root cause) razwi will not be
+ *                       overwritten.
  * @undef_opcode: undefined opcode information
+ * @pgf_info: page fault information.
+ * @pgf_info_recorded: if set writing to page fault information is enabled.
+ *                     otherwise - disabled, so the first (root cause) page fault will not be
+ *                     overwritten.
  */
 struct hl_error_info {
        struct cs_timeout_info          cs_timeout;
        struct hl_info_razwi_event      razwi;
        atomic_t                        razwi_info_recorded;
        struct undefined_opcode_info    undef_opcode;
+       struct page_fault_info          pgf_info;
+       atomic_t                        pgf_info_recorded;
 };
 
 /**
@@ -3781,6 +3800,7 @@ hl_mmap_mem_buf_alloc(struct hl_mem_mgr *mmg,
 __printf(2, 3) void hl_engine_data_sprintf(struct engines_data *e, const char *fmt, ...);
 void hl_capture_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_of_engines,
                        u8 flags);
+void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu);
 
 #ifdef CONFIG_DEBUG_FS
 
index d87434b9bc16486f263eb23ecaecfbeda67862e6..714994725224f7828bb03394ed14476e14288be9 100644 (file)
@@ -213,6 +213,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
 
        atomic_set(&hdev->captured_err_info.cs_timeout.write_enable, 1);
        atomic_set(&hdev->captured_err_info.razwi_info_recorded, 0);
+       atomic_set(&hdev->captured_err_info.pgf_info_recorded, 0);
        hdev->captured_err_info.undef_opcode.write_enable = true;
 
        hdev->open_counter++;
index 6aef4e24d122d30ddad7fd985a68408d77df4105..cac2c7fb14f1b59386a1015c7ec0e7fb939de044 100644 (file)
@@ -778,6 +778,42 @@ static int engine_status_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
        return rc;
 }
 
+static int page_fault_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+       struct hl_device *hdev = hpriv->hdev;
+       u32 max_size = args->return_size;
+       struct hl_page_fault_info *info = &hdev->captured_err_info.pgf_info.pgf;
+       void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+       if ((!max_size) || (!out))
+               return -EINVAL;
+
+       return copy_to_user(out, info, min_t(size_t, max_size, sizeof(struct hl_page_fault_info)))
+                               ? -EFAULT : 0;
+}
+
+static int user_mappings_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+       void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+       u32 user_buf_size = args->return_size;
+       struct hl_device *hdev = hpriv->hdev;
+       struct page_fault_info *pgf_info;
+       u64 actual_size;
+
+       pgf_info = &hdev->captured_err_info.pgf_info;
+       args->array_size = pgf_info->num_of_user_mappings;
+
+       if (!out)
+               return -EINVAL;
+
+       actual_size = pgf_info->num_of_user_mappings * sizeof(struct hl_user_mapping);
+       if (user_buf_size < actual_size)
+               return -ENOMEM;
+
+       return copy_to_user(out, pgf_info->user_mappings, min_t(size_t, user_buf_size, actual_size))
+                               ? -EFAULT : 0;
+}
+
 static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
                                struct device *dev)
 {
@@ -837,6 +873,12 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
        case HL_INFO_GET_EVENTS:
                return events_info(hpriv, args);
 
+       case HL_INFO_PAGE_FAULT_EVENT:
+               return page_fault_info(hpriv, args);
+
+       case HL_INFO_USER_MAPPINGS:
+               return user_mappings_info(hpriv, args);
+
        default:
                break;
        }
index f856ac51fde1ca66f51f5d7605d38ff3eb1f9f32..1a99f7be8b609ccb0a67d73c9f09b304626c553c 100644 (file)
@@ -6755,6 +6755,8 @@ static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr
                *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
 
                dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
+               hl_capture_page_fault(hdev, *addr, 0, true);
+
                WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
        }
 
index d6f84cb35e3d89c5714b2815b29147535aa7e178..2b794f54e2ed315b1412e0d96affdff8b7bc6cb2 100644 (file)
@@ -778,6 +778,9 @@ enum hl_server_type {
  * HL_INFO_UNREGISTER_EVENTFD - Unregister eventfd
  * HL_INFO_GET_EVENTS         - Retrieve the last occurred events
  * HL_INFO_UNDEFINED_OPCODE_EVENT - Retrieve last undefined opcode error information.
+ * HL_INFO_ENGINE_STATUS - Retrieve the status of all the h/w engines in the asic.
+ * HL_INFO_PAGE_FAULT_EVENT - Retrieve parameters of captured page fault.
+ * HL_INFO_USER_MAPPINGS - Retrieve user mappings, captured after page fault event.
  */
 #define HL_INFO_HW_IP_INFO                     0
 #define HL_INFO_HW_EVENTS                      1
@@ -809,6 +812,8 @@ enum hl_server_type {
 #define HL_INFO_GET_EVENTS                     30
 #define HL_INFO_UNDEFINED_OPCODE_EVENT         31
 #define HL_INFO_ENGINE_STATUS                  32
+#define HL_INFO_PAGE_FAULT_EVENT               33
+#define HL_INFO_USER_MAPPINGS                  34
 
 #define HL_INFO_VERSION_MAX_LEN                        128
 #define HL_INFO_CARD_NAME_MAX_LEN              16
@@ -1187,6 +1192,29 @@ struct hl_info_sec_attest {
        __u8 pad0[2];
 };
 
+/**
+ * struct hl_page_fault_info - page fault information.
+ * @timestamp: timestamp of page fault.
+ * @addr: address which accessing it caused page fault.
+ * @engine_id: engine id which caused the page fault, supported only in gaudi3.
+ */
+struct hl_page_fault_info {
+       __s64 timestamp;
+       __u64 addr;
+       __u16 engine_id;
+       __u8 pad[6];
+};
+
+/**
+ * struct hl_user_mapping - user mapping information.
+ * @dev_va: device virtual address.
+ * @size: virtual address mapping size.
+ */
+struct hl_user_mapping {
+       __u64 dev_va;
+       __u64 size;
+};
+
 enum gaudi_dcores {
        HL_GAUDI_WS_DCORE,
        HL_GAUDI_WN_DCORE,
@@ -1213,6 +1241,8 @@ enum gaudi_dcores {
  *                           needed, hence updating this variable so user will know the exact amount
  *                           of bytes copied by the kernel to the buffer.
  * @sec_attest_nonce: Nonce number used for attestation report.
+ * @array_size: Number of array members copied to user buffer.
+ *              Relevant for HL_INFO_USER_MAPPINGS info ioctl.
  * @pad: Padding to 64 bit.
  */
 struct hl_info_args {
@@ -1228,6 +1258,7 @@ struct hl_info_args {
                __u32 eventfd;
                __u32 user_buffer_actual_size;
                __u32 sec_attest_nonce;
+               __u32 array_size;
        };
 
        __u32 pad;