drm/amdkfd: Update interrupt handling for GFX 9.4.3
authorMukul Joshi <mukul.joshi@amd.com>
Thu, 22 Jun 2023 19:24:32 +0000 (15:24 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 30 Jun 2023 17:11:35 +0000 (13:11 -0400)
For GFX 9.4.3, interrupt handling needs to be updated for:
- Interrupt cookie will have a NodeId field. Each KFD
  node needs to check the NodeId before processing the
  interrupt.
- For CPX mode, there are additional checks of client ID
  needed to process the interrupt.
- Add NodeId to the process drain interrupt.

Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/kfd_device.c
drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/amdkfd/kfd_process.c
drivers/gpu/drm/amd/amdkfd/soc15_int.h

index 226d2dd7fa493ae5b6149481f2064c239f434708..0b3dc754e06ba264bf25835f33c7607a6fae4dfe 100644 (file)
@@ -138,9 +138,12 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
        case IP_VERSION(9, 4, 0): /* VEGA20 */
        case IP_VERSION(9, 4, 1): /* ARCTURUS */
        case IP_VERSION(9, 4, 2): /* ALDEBARAN */
-       case IP_VERSION(9, 4, 3): /* GC 9.4.3 */
                kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
                break;
+       case IP_VERSION(9, 4, 3): /* GC 9.4.3 */
+               kfd->device_info.event_interrupt_class =
+                                               &event_interrupt_class_v9_4_3;
+               break;
        case IP_VERSION(10, 3, 1): /* VANGOGH */
        case IP_VERSION(10, 3, 3): /* YELLOW_CARP */
        case IP_VERSION(10, 3, 6): /* GC 10.3.6 */
@@ -599,6 +602,41 @@ static void kfd_cleanup_nodes(struct kfd_dev *kfd, unsigned int num_nodes)
        }
 }
 
+static void kfd_setup_interrupt_bitmap(struct kfd_node *node,
+                                      unsigned int kfd_node_idx)
+{
+       struct amdgpu_device *adev = node->adev;
+       uint32_t xcc_mask = node->xcc_mask;
+       uint32_t xcc, mapped_xcc;
+       /*
+        * Interrupt bitmap is setup for processing interrupts from
+        * different XCDs and AIDs.
+        * Interrupt bitmap is defined as follows:
+        * 1. Bits 0-15 - correspond to the NodeId field.
+        *    Each bit corresponds to NodeId number. For example, if
+        *    a KFD node has interrupt bitmap set to 0x7, then this
+        *    KFD node will process interrupts with NodeId = 0, 1 and 2
+        *    in the IH cookie.
+        * 2. Bits 16-31 - unused.
+        *
+        * Please note that the kfd_node_idx argument passed to this
+        * function is not related to NodeId field received in the
+        * IH cookie.
+        *
+        * In CPX mode, a KFD node will process an interrupt if:
+        * - the Node Id matches the corresponding bit set in
+        *   Bits 0-15.
+        * - AND VMID reported in the interrupt lies within the
+        *   VMID range of the node.
+        */
+       for_each_inst(xcc, xcc_mask) {
+               mapped_xcc = GET_INST(GC, xcc);
+               node->interrupt_bitmap |= (mapped_xcc % 2 ? 5 : 3) << (4 * (mapped_xcc / 2));
+       }
+       dev_info(kfd_device, "Node: %d, interrupt_bitmap: %x\n", kfd_node_idx,
+                                                       node->interrupt_bitmap);
+}
+
 bool kgd2kfd_device_init(struct kfd_dev *kfd,
                         const struct kgd2kfd_shared_resources *gpu_resources)
 {
@@ -798,6 +836,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
                amdgpu_amdkfd_get_local_mem_info(kfd->adev,
                                        &node->local_mem_info, node->xcp);
 
+               if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3))
+                       kfd_setup_interrupt_bitmap(node, i);
+
                /* Initialize the KFD node */
                if (kfd_init_node(node)) {
                        dev_err(kfd_device, "Error initializing KFD node\n");
index d5c9f30552e3bd17e7621b33896ebe6a1f9b92ef..f0731a6a5306cd14b6ed2e3e660cb2a45ade3499 100644 (file)
@@ -446,7 +446,36 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
        }
 }
 
+static bool event_interrupt_isr_v9_4_3(struct kfd_node *node,
+                               const uint32_t *ih_ring_entry,
+                               uint32_t *patched_ihre,
+                               bool *patched_flag)
+{
+       uint16_t node_id, vmid;
+
+       /*
+        * For GFX 9.4.3, process the interrupt if:
+        * - NodeID field in IH entry matches the corresponding bit
+        *   set in interrupt_bitmap Bits 0-15.
+        *   OR
+        * - If partition mode is CPX and interrupt came from
+        *   Node_id 0,4,8,12, then check if the Bit (16 + client id)
+        *   is set in interrupt bitmap Bits 16-31.
+        */
+       node_id = SOC15_NODEID_FROM_IH_ENTRY(ih_ring_entry);
+       vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
+       if (kfd_irq_is_from_node(node, node_id, vmid))
+               return event_interrupt_isr_v9(node, ih_ring_entry,
+                                       patched_ihre, patched_flag);
+       return false;
+}
+
 const struct kfd_event_interrupt_class event_interrupt_class_v9 = {
        .interrupt_isr = event_interrupt_isr_v9,
        .interrupt_wq = event_interrupt_wq_v9,
 };
+
+const struct kfd_event_interrupt_class event_interrupt_class_v9_4_3 = {
+       .interrupt_isr = event_interrupt_isr_v9_4_3,
+       .interrupt_wq = event_interrupt_wq_v9,
+};
index 7364a5d77c6ee065487df119c817bed2921821a5..d4c9ee3f99533acc685d63c13b624a9c6a408f6b 100644 (file)
@@ -1444,6 +1444,7 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
 /* Events */
 extern const struct kfd_event_interrupt_class event_interrupt_class_cik;
 extern const struct kfd_event_interrupt_class event_interrupt_class_v9;
+extern const struct kfd_event_interrupt_class event_interrupt_class_v9_4_3;
 extern const struct kfd_event_interrupt_class event_interrupt_class_v10;
 extern const struct kfd_event_interrupt_class event_interrupt_class_v11;
 
index 3d3611705d4199b90c8cd9b249b9d5b594852e5f..a844e68211accf10dd188d5c6f44ebe4c3486ca0 100644 (file)
@@ -2142,6 +2142,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
 int kfd_process_drain_interrupts(struct kfd_process_device *pdd)
 {
        uint32_t irq_drain_fence[8];
+       uint8_t node_id = 0;
        int r = 0;
 
        if (!KFD_IS_SOC15(pdd->dev))
@@ -2154,6 +2155,14 @@ int kfd_process_drain_interrupts(struct kfd_process_device *pdd)
                                                        KFD_IRQ_FENCE_CLIENTID;
        irq_drain_fence[3] = pdd->process->pasid;
 
+       /*
+        * For GFX 9.4.3, send the NodeId also in IH cookie DW[3]
+        */
+       if (KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 3)) {
+               node_id = ffs(pdd->dev->interrupt_bitmap) - 1;
+               irq_drain_fence[3] |= node_id << 16;
+       }
+
        /* ensure stale irqs scheduled KFD interrupts and send drain fence. */
        if (amdgpu_amdkfd_send_close_event_drain_irq(pdd->dev->adev,
                                                     irq_drain_fence)) {
index e3f3b0b93a59a426a27c2f464b8128e34301193a..10138676f27fd79503db9356ce4d436198bb0c70 100644 (file)
@@ -40,6 +40,7 @@
 #define SOC15_VMID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 24 & 0xf)
 #define SOC15_VMID_TYPE_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 31 & 0x1)
 #define SOC15_PASID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[3]) & 0xffff)
+#define SOC15_NODEID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[3]) >> 16 & 0xff)
 #define SOC15_CONTEXT_ID0_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[4]))
 #define SOC15_CONTEXT_ID1_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[5]))
 #define SOC15_CONTEXT_ID2_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[6]))