drm/amdgpu: move convert_error_address out of umc_ras
authorHawking Zhang <Hawking.Zhang@amd.com>
Fri, 14 Oct 2022 07:17:43 +0000 (15:17 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 17 Oct 2022 21:41:21 +0000 (17:41 -0400)
RAS error address translation algorithm is common
across dGPU and A + A platform as along as the SOC
integrates the same generation of UMC IP.

UMC RAS is managed by x86 MCA on A + A platform,
umc_ras in GPU driver is not initialized at all on
A + A platform. In such case, any umc_ras callback
implemented for dGPU config shouldn't be invoked
from A + A specific callback.

The change moves convert_error_address out of dGPU
umc_ras structure and makes it share between A + A
and dGPU config.

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Stanley Yang <Stanley.Yang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
drivers/gpu/drm/amd/amdgpu/umc_v6_7.h

index a4b47e1bd111d518548294d4ee2aec52ebaf8553..21a47f2bb87bc9cacaf7bc479930fd3525ee3244 100644 (file)
@@ -36,6 +36,7 @@
 #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
 #include "atom.h"
 #include "amdgpu_reset.h"
+#include "umc_v6_7.h"
 
 #ifdef CONFIG_X86_MCE_AMD
 #include <asm/mce.h>
@@ -2899,10 +2900,17 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
        /*
         * Translate UMC channel address to Physical address
         */
-       if (adev->umc.ras &&
-           adev->umc.ras->convert_ras_error_address)
-               adev->umc.ras->convert_ras_error_address(adev,
-                       &err_data, m->addr, ch_inst, umc_inst);
+       switch (adev->ip_versions[UMC_HWIP][0]) {
+       case IP_VERSION(6, 7, 0):
+               umc_v6_7_convert_error_address(adev,
+                               &err_data, m->addr, ch_inst, umc_inst);
+               break;
+       default:
+               dev_warn(adev->dev,
+                        "UMC address to Physical address translation is not supported\n");
+               kfree(err_data.err_addr);
+               return NOTIFY_DONE;
+       }
 
        if (amdgpu_bad_page_threshold != 0) {
                amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
index e46439274f3a0911b0f3b36e1d6689826b2aac98..3629d8f292ef9272e4f352493d1613c5aae08fce 100644 (file)
@@ -51,9 +51,6 @@ struct amdgpu_umc_ras {
        struct amdgpu_ras_block_object ras_block;
        void (*err_cnt_init)(struct amdgpu_device *adev);
        bool (*query_ras_poison_mode)(struct amdgpu_device *adev);
-       void (*convert_ras_error_address)(struct amdgpu_device *adev,
-                               struct ras_err_data *err_data, uint64_t err_addr,
-                               uint32_t ch_inst, uint32_t umc_inst);
        void (*ecc_info_query_ras_error_count)(struct amdgpu_device *adev,
                                      void *ras_error_status);
        void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev,
index 5d5d031c9e7d09d60dcca406b32932cd36d42628..72fd963f178bc4a0aa3e0bdd13f4f28b2abca6c3 100644 (file)
@@ -187,9 +187,9 @@ static void umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
        }
 }
 
-static void umc_v6_7_convert_error_address(struct amdgpu_device *adev,
-                                       struct ras_err_data *err_data, uint64_t err_addr,
-                                       uint32_t ch_inst, uint32_t umc_inst)
+void umc_v6_7_convert_error_address(struct amdgpu_device *adev,
+                                   struct ras_err_data *err_data, uint64_t err_addr,
+                                   uint32_t ch_inst, uint32_t umc_inst)
 {
        uint32_t channel_index;
        uint64_t soc_pa, retired_page, column;
@@ -553,5 +553,4 @@ struct amdgpu_umc_ras umc_v6_7_ras = {
        .query_ras_poison_mode = umc_v6_7_query_ras_poison_mode,
        .ecc_info_query_ras_error_count = umc_v6_7_ecc_info_query_ras_error_count,
        .ecc_info_query_ras_error_address = umc_v6_7_ecc_info_query_ras_error_address,
-       .convert_ras_error_address = umc_v6_7_convert_error_address,
 };
index fe41ed2f5945119a357cee57d2a2052ed2746b69..105245d5b6e50e22d3b1abf2bb8b6ef19e404d74 100644 (file)
@@ -71,5 +71,7 @@ extern const uint32_t
        umc_v6_7_channel_idx_tbl_second[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM];
 extern const uint32_t
        umc_v6_7_channel_idx_tbl_first[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM];
-
+void umc_v6_7_convert_error_address(struct amdgpu_device *adev,
+                                    struct ras_err_data *err_data, uint64_t err_addr,
+                                    uint32_t ch_inst, uint32_t umc_inst);
 #endif