drm/amdgpu: retire gfx ras query_utcl2_poison_status
authorTao Zhou <tao.zhou1@amd.com>
Mon, 19 Feb 2024 07:55:24 +0000 (15:55 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 20 Mar 2024 17:37:36 +0000 (13:37 -0400)
Replace it with related interface in gfxhub functions.

v2: replace node id with xcc id.
    get node id for query_utcl2_poison_status

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c

index 35dd6effa9a34a1be9ce83f83ab7915a38f6b4e8..d5fde8adf19b096d4da70a6e0ee10cb8a970c4ee 100644 (file)
@@ -769,10 +769,11 @@ int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
        return 0;
 }
 
-bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev)
+bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev,
+                       int xcc_id)
 {
-       if (adev->gfx.ras && adev->gfx.ras->query_utcl2_poison_status)
-               return adev->gfx.ras->query_utcl2_poison_status(adev);
+       if (adev->gfxhub.funcs->query_utcl2_poison_status)
+               return adev->gfxhub.funcs->query_utcl2_poison_status(adev, xcc_id);
        else
                return false;
 }
index 0ef223c2affb6174a36d4d1e96dcbc017fcb51e2..caee36e52a09ba0535bdbb72680950395c815cf0 100644 (file)
@@ -341,7 +341,8 @@ bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev);
 bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem);
 void amdgpu_amdkfd_block_mmu_notifications(void *p);
 int amdgpu_amdkfd_criu_resume(void *p);
-bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev);
+bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev,
+                       int xcc_id);
 int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
                uint64_t size, u32 alloc_flag, int8_t xcp_id);
 void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
index 8fcf889ddce92b86a1a724b56ef2c493f43a326e..04a86dff71e635b367c06b2f66ec990dd485d97c 100644 (file)
@@ -259,7 +259,6 @@ struct amdgpu_cu_info {
 struct amdgpu_gfx_ras {
        struct amdgpu_ras_block_object  ras_block;
        void (*enable_watchdog_timer)(struct amdgpu_device *adev);
-       bool (*query_utcl2_poison_status)(struct amdgpu_device *adev);
        int (*rlc_gc_fed_irq)(struct amdgpu_device *adev,
                                struct amdgpu_irq_src *source,
                                struct amdgpu_iv_entry *entry);
index 065b2bd5f5a656dd0763655607c37d65a53c22ea..3f4fd2f08163def8957dea468e2b0eec025483d0 100644 (file)
@@ -1909,18 +1909,7 @@ static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev)
        mutex_unlock(&adev->grbm_idx_mutex);
 }
 
-static bool gfx_v9_4_2_query_uctl2_poison_status(struct amdgpu_device *adev)
-{
-       u32 status = 0;
-       struct amdgpu_vmhub *hub;
-
-       hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
-       status = RREG32(hub->vm_l2_pro_fault_status);
-       /* reset page fault status */
-       WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
 
-       return REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED);
-}
 
 struct amdgpu_ras_block_hw_ops  gfx_v9_4_2_ras_ops = {
                .query_ras_error_count = &gfx_v9_4_2_query_ras_error_count,
@@ -1934,5 +1923,4 @@ struct amdgpu_gfx_ras gfx_v9_4_2_ras = {
                .hw_ops = &gfx_v9_4_2_ras_ops,
        },
        .enable_watchdog_timer = &gfx_v9_4_2_enable_watchdog_timer,
-       .query_utcl2_poison_status = gfx_v9_4_2_query_uctl2_poison_status,
 };
index 9a06c6fb6605851ae9c26ff4a81c66d358b9a69f..a8e76287dde0bbbe54c0de29979f7c3656d9a111 100644 (file)
@@ -367,10 +367,20 @@ static void event_interrupt_wq_v10(struct kfd_node *dev,
                   client_id == SOC15_IH_CLIENTID_UTCL2) {
                struct kfd_vm_fault_info info = {0};
                uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry);
+               uint32_t node_id = SOC15_NODEID_FROM_IH_ENTRY(ih_ring_entry);
+               uint32_t vmid_type = SOC15_VMID_TYPE_FROM_IH_ENTRY(ih_ring_entry);
+               int xcc_id = 0;
                struct kfd_hsa_memory_exception_data exception_data;
 
-               if (client_id == SOC15_IH_CLIENTID_UTCL2 &&
-                               amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev)) {
+               if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) {
+                       xcc_id = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev,
+                               node_id);
+                       if (xcc_id < 0)
+                               xcc_id = 0;
+               }
+
+               if (client_id == SOC15_IH_CLIENTID_UTCL2 && !vmid_type &&
+                   amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev, xcc_id)) {
                        event_interrupt_poison_consumption(dev, pasid, client_id);
                        return;
                }
index 91dd5e045b511d2aaa42aa4bd9b934e018c407ab..ff73923367957191b45d7aa669b4f103df5335b1 100644 (file)
@@ -413,10 +413,20 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
                   client_id == SOC15_IH_CLIENTID_UTCL2) {
                struct kfd_vm_fault_info info = {0};
                uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry);
+               uint32_t node_id = SOC15_NODEID_FROM_IH_ENTRY(ih_ring_entry);
+               uint32_t vmid_type = SOC15_VMID_TYPE_FROM_IH_ENTRY(ih_ring_entry);
+               int xcc_id = 0;
                struct kfd_hsa_memory_exception_data exception_data;
 
-               if (client_id == SOC15_IH_CLIENTID_UTCL2 &&
-                   amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev)) {
+               if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) {
+                       xcc_id = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev,
+                               node_id);
+                       if (xcc_id < 0)
+                               xcc_id = 0;
+               }
+
+               if (client_id == SOC15_IH_CLIENTID_UTCL2 && !vmid_type &&
+                   amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev, xcc_id)) {
                        event_interrupt_poison_consumption_v9(dev, pasid, client_id);
                        return;
                }