accel/habanalabs/gaudi2: add interrupt affinity for user interrupts
authorDani Liberman <dliberman@habana.ai>
Thu, 21 Sep 2023 14:02:33 +0000 (17:02 +0300)
committerOded Gabbay <ogabbay@kernel.org>
Mon, 26 Feb 2024 07:30:40 +0000 (09:30 +0200)
User interrupts are MSIx interrupts coming from Gaudi2, that have
specific range of IDs and are assigned to the sole use of the user
process that opened the Gaudi2 device (reminder: there can be only
a single user process running on Gaudi2 at any given time).

The interrupts are allocated and managed by the driver and therefore,
the user expects the driver to initialize them properly, which also
includes setting the affinity to the related CPU cores of the
device's NUMA node to get maximum performance.

Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
drivers/accel/habanalabs/common/device.c
drivers/accel/habanalabs/common/habanalabs.h
drivers/accel/habanalabs/gaudi2/gaudi2.c

index a73bd4be94b156318aeb2faffd585ac4ef85fbd0..5eacbc73f1bb989483e816fe8a2072f82e1a6644 100644 (file)
@@ -2801,3 +2801,35 @@ void hl_enable_err_info_capture(struct hl_error_info *captured_err_info)
        atomic_set(&captured_err_info->cs_timeout.write_enable, 1);
        captured_err_info->undef_opcode.write_enable = true;
 }
+
+void hl_init_cpu_for_irq(struct hl_device *hdev)
+{
+#ifdef CONFIG_NUMA
+       struct cpumask *available_mask = &hdev->irq_affinity_mask;
+       int numa_node = hdev->pdev->dev.numa_node, i;
+       static struct cpumask cpu_mask;
+
+       if (numa_node < 0)
+               return;
+
+       if (!cpumask_and(&cpu_mask, cpumask_of_node(numa_node), cpu_online_mask)) {
+               dev_err(hdev->dev, "No available affinities in current numa node\n");
+               return;
+       }
+
+       /* Remove HT siblings */
+       for_each_cpu(i, &cpu_mask)
+               cpumask_set_cpu(cpumask_first(topology_sibling_cpumask(i)), available_mask);
+#endif
+}
+
+void hl_set_irq_affinity(struct hl_device *hdev, int irq)
+{
+       if (cpumask_empty(&hdev->irq_affinity_mask)) {
+               dev_dbg(hdev->dev, "affinity mask is empty\n");
+               return;
+       }
+
+       if (irq_set_affinity_hint(irq, &hdev->irq_affinity_mask))
+               dev_err(hdev->dev, "Failed setting irq %d affinity\n", irq);
+}
index 2a900c9941fee698fe225962de6f7e5715b7fee4..b1a7b229e16160b04a940d36cf781c8853a65bbe 100644 (file)
@@ -3257,6 +3257,7 @@ struct hl_reset_info {
  * @clk_throttling: holds information about current/previous clock throttling events
  * @captured_err_info: holds information about errors.
  * @reset_info: holds current device reset information.
+ * @irq_affinity_mask: mask of available CPU cores for user and decoder interrupt handling.
  * @stream_master_qid_arr: pointer to array with QIDs of master streams.
  * @fw_inner_major_ver: the major of current loaded preboot inner version.
  * @fw_inner_minor_ver: the minor of current loaded preboot inner version.
@@ -3446,6 +3447,8 @@ struct hl_device {
 
        struct hl_reset_info            reset_info;
 
+       cpumask_t                       irq_affinity_mask;
+
        u32                             *stream_master_qid_arr;
        u32                             fw_inner_major_ver;
        u32                             fw_inner_minor_ver;
@@ -4032,6 +4035,8 @@ void hl_handle_critical_hw_err(struct hl_device *hdev, u16 event_id, u64 *event_
 void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info);
 void hl_capture_engine_err(struct hl_device *hdev, u16 engine_id, u16 error_count);
 void hl_enable_err_info_capture(struct hl_error_info *captured_err_info);
+void hl_init_cpu_for_irq(struct hl_device *hdev);
+void hl_set_irq_affinity(struct hl_device *hdev, int irq);
 
 #ifdef CONFIG_DEBUG_FS
 
index e0e5615ef9b0f63de08f56f721df13d6ea4b947a..fd01525b1ea204ddb00d3bf6e1522d03c00cbfec 100644 (file)
@@ -4254,6 +4254,8 @@ static int gaudi2_enable_msix(struct hl_device *hdev)
        if (gaudi2->hw_cap_initialized & HW_CAP_MSIX)
                return 0;
 
+       hl_init_cpu_for_irq(hdev);
+
        rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES,
                                        PCI_IRQ_MSIX);
        if (rc < 0) {
@@ -4307,6 +4309,7 @@ static int gaudi2_enable_msix(struct hl_device *hdev)
                        i++, j++, user_irq_init_cnt++) {
 
                irq = pci_irq_vector(hdev->pdev, i);
+               hl_set_irq_affinity(hdev, irq);
                rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i),
                                &hdev->user_interrupt[j]);
                if (rc) {
@@ -4333,6 +4336,7 @@ free_user_irq:
                        i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) {
 
                irq = pci_irq_vector(hdev->pdev, i);
+               irq_set_affinity_hint(irq, NULL);
                free_irq(irq, &hdev->user_interrupt[j]);
        }
        irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
@@ -4413,6 +4417,7 @@ static void gaudi2_disable_msix(struct hl_device *hdev)
                        k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) {
 
                irq = pci_irq_vector(hdev->pdev, i);
+               irq_set_affinity_hint(irq, NULL);
                free_irq(irq, &hdev->user_interrupt[j]);
        }