drm/amdgpu: Modify umc block to fit for the unified ras block data and ops
authoryipechai <YiPeng.Chai@amd.com>
Thu, 6 Jan 2022 06:07:44 +0000 (14:07 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 14 Jan 2022 22:51:59 +0000 (17:51 -0500)
1.Modify umc block to fit for the unified ras block data and ops.
2.Change amdgpu_umc_ras_funcs to amdgpu_umc_ras, and the corresponding variable name remove _funcs suffix.
3.Remove the const flag of umc ras variable so that umc ras block can be able to be inserted into amdgpu device ras block link list.
4.Invoke amdgpu_ras_register_ras_block function to register umc ras block into amdgpu device ras block link list.
5.Remove the redundant code about umc in amdgpu_ras.c after using the unified ras block.
6.Fill unified ras block .name .block .ras_late_init and .ras_fini for all of umc versions. If .ras_late_init and .ras_fini had been defined by the selected umc version, the defined functions will take effect; if not defined, default fill them with amdgpu_umc_ras_late_init and amdgpu_umc_ras_fini.

Signed-off-by: yipechai <YiPeng.Chai@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: John Clements <john.clements@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
12 files changed:
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
drivers/gpu/drm/amd/amdgpu/umc_v6_1.h
drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
drivers/gpu/drm/amd/amdgpu/umc_v6_7.h
drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
drivers/gpu/drm/amd/amdgpu/umc_v8_7.h

index acf806c87673cf20f4f48dfd55e3baf34c5ecab9..de0ef21e15011f2a9544dca269e1d2b5d643801d 100644 (file)
@@ -440,9 +440,8 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
 {
        int r;
 
-       if (adev->umc.ras_funcs &&
-           adev->umc.ras_funcs->ras_late_init) {
-               r = adev->umc.ras_funcs->ras_late_init(adev);
+       if (adev->umc.ras && adev->umc.ras->ras_block.ras_late_init) {
+               r = adev->umc.ras->ras_block.ras_late_init(adev, NULL);
                if (r)
                        return r;
        }
@@ -496,9 +495,8 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
 
 void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
 {
-       if (adev->umc.ras_funcs &&
-           adev->umc.ras_funcs->ras_fini)
-               adev->umc.ras_funcs->ras_fini(adev);
+       if (adev->umc.ras && adev->umc.ras->ras_block.ras_fini)
+               adev->umc.ras->ras_block.ras_fini(adev);
 
        if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ras_fini)
                adev->mmhub.ras->ras_block.ras_fini(adev);
index 6d1ca9e9795bb36fc5c596c3c6d93caa1dff903e..fba1c415a2a8331d3d84bf1effed4f35c9bcd634 100644 (file)
@@ -939,24 +939,24 @@ static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_d
         */
        ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(ras->umc_ecc));
        if (ret == -EOPNOTSUPP) {
-               if (adev->umc.ras_funcs &&
-                       adev->umc.ras_funcs->query_ras_error_count)
-                       adev->umc.ras_funcs->query_ras_error_count(adev, err_data);
+               if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
+                       adev->umc.ras->ras_block.hw_ops->query_ras_error_count)
+                       adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
 
                /* umc query_ras_error_address is also responsible for clearing
                 * error status
                 */
-               if (adev->umc.ras_funcs &&
-                   adev->umc.ras_funcs->query_ras_error_address)
-                       adev->umc.ras_funcs->query_ras_error_address(adev, err_data);
+               if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
+                   adev->umc.ras->ras_block.hw_ops->query_ras_error_address)
+                       adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, err_data);
        } else if (!ret) {
-               if (adev->umc.ras_funcs &&
-                       adev->umc.ras_funcs->ecc_info_query_ras_error_count)
-                       adev->umc.ras_funcs->ecc_info_query_ras_error_count(adev, err_data);
+               if (adev->umc.ras &&
+                       adev->umc.ras->ecc_info_query_ras_error_count)
+                       adev->umc.ras->ecc_info_query_ras_error_count(adev, err_data);
 
-               if (adev->umc.ras_funcs &&
-                       adev->umc.ras_funcs->ecc_info_query_ras_error_address)
-                       adev->umc.ras_funcs->ecc_info_query_ras_error_address(adev, err_data);
+               if (adev->umc.ras &&
+                       adev->umc.ras->ecc_info_query_ras_error_address)
+                       adev->umc.ras->ecc_info_query_ras_error_address(adev, err_data);
        }
 }
 
@@ -2412,12 +2412,12 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
        }
        else if (adev->df.funcs &&
            adev->df.funcs->query_ras_poison_mode &&
-           adev->umc.ras_funcs &&
-           adev->umc.ras_funcs->query_ras_poison_mode) {
+           adev->umc.ras &&
+           adev->umc.ras->query_ras_poison_mode) {
                df_poison =
                        adev->df.funcs->query_ras_poison_mode(adev);
                umc_poison =
-                       adev->umc.ras_funcs->query_ras_poison_mode(adev);
+                       adev->umc.ras->query_ras_poison_mode(adev);
                /* Only poison is set in both DF and UMC, we can support it */
                if (df_poison && umc_poison)
                        con->poison_supported = true;
index e81ce465ff3abd85f74884f2160b4afd8e18f699..b4c68c09e0714f36312228332c41e7a4814ac3d2 100644 (file)
@@ -35,12 +35,12 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
        kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
        ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(con->umc_ecc));
        if (ret == -EOPNOTSUPP) {
-               if (adev->umc.ras_funcs &&
-                   adev->umc.ras_funcs->query_ras_error_count)
-                   adev->umc.ras_funcs->query_ras_error_count(adev, ras_error_status);
+               if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
+                   adev->umc.ras->ras_block.hw_ops->query_ras_error_count)
+                   adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, ras_error_status);
 
-               if (adev->umc.ras_funcs &&
-                   adev->umc.ras_funcs->query_ras_error_address &&
+               if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
+                   adev->umc.ras->ras_block.hw_ops->query_ras_error_address &&
                    adev->umc.max_ras_err_cnt_per_query) {
                        err_data->err_addr =
                                kcalloc(adev->umc.max_ras_err_cnt_per_query,
@@ -56,15 +56,15 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
                        /* umc query_ras_error_address is also responsible for clearing
                         * error status
                         */
-                       adev->umc.ras_funcs->query_ras_error_address(adev, ras_error_status);
+                       adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, ras_error_status);
                }
        } else if (!ret) {
-               if (adev->umc.ras_funcs &&
-                   adev->umc.ras_funcs->ecc_info_query_ras_error_count)
-                   adev->umc.ras_funcs->ecc_info_query_ras_error_count(adev, ras_error_status);
+               if (adev->umc.ras &&
+                   adev->umc.ras->ecc_info_query_ras_error_count)
+                   adev->umc.ras->ecc_info_query_ras_error_count(adev, ras_error_status);
 
-               if (adev->umc.ras_funcs &&
-                   adev->umc.ras_funcs->ecc_info_query_ras_error_address &&
+               if (adev->umc.ras &&
+                   adev->umc.ras->ecc_info_query_ras_error_address &&
                    adev->umc.max_ras_err_cnt_per_query) {
                        err_data->err_addr =
                                kcalloc(adev->umc.max_ras_err_cnt_per_query,
@@ -80,7 +80,7 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
                        /* umc query_ras_error_address is also responsible for clearing
                         * error status
                         */
-                       adev->umc.ras_funcs->ecc_info_query_ras_error_address(adev, ras_error_status);
+                       adev->umc.ras->ecc_info_query_ras_error_address(adev, ras_error_status);
                }
        }
 
@@ -136,7 +136,7 @@ static int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
        return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry, true);
 }
 
-int amdgpu_umc_ras_late_init(struct amdgpu_device *adev)
+int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_info)
 {
        int r;
        struct ras_fs_if fs_info = {
@@ -172,9 +172,9 @@ int amdgpu_umc_ras_late_init(struct amdgpu_device *adev)
        }
 
        /* ras init of specific umc version */
-       if (adev->umc.ras_funcs &&
-           adev->umc.ras_funcs->err_cnt_init)
-               adev->umc.ras_funcs->err_cnt_init(adev);
+       if (adev->umc.ras &&
+           adev->umc.ras->err_cnt_init)
+               adev->umc.ras->err_cnt_init(adev);
 
        return 0;
 
index b72194e8bfe5310d2398d8feca0e41263bb3145d..195740a6d97d57ca40715c829cb65b36a573b6cd 100644 (file)
@@ -20,6 +20,7 @@
  */
 #ifndef __AMDGPU_UMC_H__
 #define __AMDGPU_UMC_H__
+#include "amdgpu_ras.h"
 
 /*
  * (addr / 256) * 4096, the higher 26 bits in ErrorAddr
 #define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++)
 #define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst))
 
-struct amdgpu_umc_ras_funcs {
+struct amdgpu_umc_ras {
+       struct amdgpu_ras_block_object ras_block;
        void (*err_cnt_init)(struct amdgpu_device *adev);
-       int (*ras_late_init)(struct amdgpu_device *adev);
-       void (*ras_fini)(struct amdgpu_device *adev);
-       void (*query_ras_error_count)(struct amdgpu_device *adev,
-                                     void *ras_error_status);
-       void (*query_ras_error_address)(struct amdgpu_device *adev,
-                                       void *ras_error_status);
        bool (*query_ras_poison_mode)(struct amdgpu_device *adev);
        void (*ecc_info_query_ras_error_count)(struct amdgpu_device *adev,
                                      void *ras_error_status);
@@ -73,10 +69,10 @@ struct amdgpu_umc {
        struct ras_common_if *ras_if;
 
        const struct amdgpu_umc_funcs *funcs;
-       const struct amdgpu_umc_ras_funcs *ras_funcs;
+       struct amdgpu_umc_ras *ras;
 };
 
-int amdgpu_umc_ras_late_init(struct amdgpu_device *adev);
+int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_info);
 void amdgpu_umc_ras_fini(struct amdgpu_device *adev);
 int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
                void *ras_error_status,
index 38bb42727715d26948e4cee9dde57506a2832b68..5ef4ad28ab26ab707df15a908b644f7653e21c68 100644 (file)
@@ -664,11 +664,25 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev)
                adev->umc.umc_inst_num = UMC_V8_7_UMC_INSTANCE_NUM;
                adev->umc.channel_offs = UMC_V8_7_PER_CHANNEL_OFFSET_SIENNA;
                adev->umc.channel_idx_tbl = &umc_v8_7_channel_idx_tbl[0][0];
-               adev->umc.ras_funcs = &umc_v8_7_ras_funcs;
+               adev->umc.ras = &umc_v8_7_ras;
                break;
        default:
                break;
        }
+       if (adev->umc.ras) {
+               amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block);
+
+               strcpy(adev->umc.ras->ras_block.name,"umc");
+               adev->umc.ras->ras_block.block = AMDGPU_RAS_BLOCK__UMC;
+
+               /* If don't define special ras_late_init function, use default ras_late_init */
+               if (!adev->umc.ras->ras_block.ras_late_init)
+                               adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init;
+
+               /* If don't define special ras_fini function, use default ras_fini */
+               if (!adev->umc.ras->ras_block.ras_fini)
+                               adev->umc.ras->ras_block.ras_fini = amdgpu_umc_ras_fini;
+       }
 }
 
 
index 7506e198e6e1b4d5ffcae9adca2af7ed3308ae03..3965aae435f87a5df115f703aefbf5f9e998a5ba 100644 (file)
@@ -1202,7 +1202,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
                adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
                adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20;
                adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
-               adev->umc.ras_funcs = &umc_v6_1_ras_funcs;
+               adev->umc.ras = &umc_v6_1_ras;
                break;
        case IP_VERSION(6, 1, 2):
                adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
@@ -1210,7 +1210,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
                adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
                adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT;
                adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
-               adev->umc.ras_funcs = &umc_v6_1_ras_funcs;
+               adev->umc.ras = &umc_v6_1_ras;
                break;
        case IP_VERSION(6, 7, 0):
                adev->umc.max_ras_err_cnt_per_query = UMC_V6_7_TOTAL_CHANNEL_NUM;
@@ -1218,7 +1218,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
                adev->umc.umc_inst_num = UMC_V6_7_UMC_INSTANCE_NUM;
                adev->umc.channel_offs = UMC_V6_7_PER_CHANNEL_OFFSET;
                if (!adev->gmc.xgmi.connected_to_cpu)
-                       adev->umc.ras_funcs = &umc_v6_7_ras_funcs;
+                       adev->umc.ras = &umc_v6_7_ras;
                if (1 & adev->smuio.funcs->get_die_id(adev))
                        adev->umc.channel_idx_tbl = &umc_v6_7_channel_idx_tbl_first[0][0];
                else
@@ -1227,6 +1227,21 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
        default:
                break;
        }
+
+       if (adev->umc.ras) {
+               amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block);
+
+               strcpy(adev->umc.ras->ras_block.name,"umc");
+               adev->umc.ras->ras_block.block = AMDGPU_RAS_BLOCK__UMC;
+
+               /* If don't define special ras_late_init function, use default ras_late_init */
+               if (!adev->umc.ras->ras_block.ras_late_init)
+                               adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init;
+
+               /* If don't define special ras_fini function, use default ras_fini */
+               if (!adev->umc.ras->ras_block.ras_fini)
+                               adev->umc.ras->ras_block.ras_fini = amdgpu_umc_ras_fini;
+       }
 }
 
 static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev)
index 20b44983ac945fbbca71b80b97bbb1f551d61bc3..4776301972d49d3e4be511da5fa8ae550f046b3c 100644 (file)
@@ -465,10 +465,14 @@ static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev)
                umc_v6_1_enable_umc_index_mode(adev);
 }
 
-const struct amdgpu_umc_ras_funcs umc_v6_1_ras_funcs = {
-       .err_cnt_init = umc_v6_1_err_cnt_init,
-       .ras_late_init = amdgpu_umc_ras_late_init,
-       .ras_fini = amdgpu_umc_ras_fini,
+const struct amdgpu_ras_block_hw_ops umc_v6_1_ras_hw_ops = {
        .query_ras_error_count = umc_v6_1_query_ras_error_count,
        .query_ras_error_address = umc_v6_1_query_ras_error_address,
 };
+
+struct amdgpu_umc_ras umc_v6_1_ras = {
+       .ras_block = {
+               .hw_ops = &umc_v6_1_ras_hw_ops,
+       },
+       .err_cnt_init = umc_v6_1_err_cnt_init,
+};
\ No newline at end of file
index 5dc36c730bb2a25d635042935f044a797b4799ee..50c632eb4cc6ef72a49c4df072b1863c0128e8f2 100644 (file)
@@ -45,7 +45,7 @@
 /* umc ce count initial value */
 #define UMC_V6_1_CE_CNT_INIT   (UMC_V6_1_CE_CNT_MAX - UMC_V6_1_CE_INT_THRESHOLD)
 
-extern const struct amdgpu_umc_ras_funcs umc_v6_1_ras_funcs;
+extern struct amdgpu_umc_ras umc_v6_1_ras;
 extern const uint32_t
        umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM];
 
index 6dd1e19e8d43234231bfefc0b9bac7f600be6a86..6953426f0bedffbd57f4cbc8dd51b89c998d9bbc 100644 (file)
@@ -480,11 +480,15 @@ static bool umc_v6_7_query_ras_poison_mode(struct amdgpu_device *adev)
        return true;
 }
 
-const struct amdgpu_umc_ras_funcs umc_v6_7_ras_funcs = {
-       .ras_late_init = amdgpu_umc_ras_late_init,
-       .ras_fini = amdgpu_umc_ras_fini,
+const struct amdgpu_ras_block_hw_ops umc_v6_7_ras_hw_ops = {
        .query_ras_error_count = umc_v6_7_query_ras_error_count,
        .query_ras_error_address = umc_v6_7_query_ras_error_address,
+};
+
+struct amdgpu_umc_ras umc_v6_7_ras = {
+       .ras_block = {
+               .hw_ops = &umc_v6_7_ras_hw_ops,
+       },
        .query_ras_poison_mode = umc_v6_7_query_ras_poison_mode,
        .ecc_info_query_ras_error_count = umc_v6_7_ecc_info_query_ras_error_count,
        .ecc_info_query_ras_error_address = umc_v6_7_ecc_info_query_ras_error_address,
index 57f2557e7acab1b7c1431d4971a1ddc3b574e83b..1f2edf62537041710420abef15d731149360eacc 100644 (file)
@@ -43,7 +43,7 @@
 #define UMC_V6_7_TOTAL_CHANNEL_NUM     (UMC_V6_7_CHANNEL_INSTANCE_NUM * UMC_V6_7_UMC_INSTANCE_NUM)
 /* UMC regiser per channel offset */
 #define UMC_V6_7_PER_CHANNEL_OFFSET            0x400
-extern const struct amdgpu_umc_ras_funcs umc_v6_7_ras_funcs;
+extern struct amdgpu_umc_ras umc_v6_7_ras;
 extern const uint32_t
        umc_v6_7_channel_idx_tbl_second[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM];
 extern const uint32_t
index af59a35788e3eefe68d6b98137fe541f0cbf4b2d..ff9e1fac616d0498ca6a5c4b762588cccee9782f 100644 (file)
@@ -324,10 +324,14 @@ static void umc_v8_7_err_cnt_init(struct amdgpu_device *adev)
        }
 }
 
-const struct amdgpu_umc_ras_funcs umc_v8_7_ras_funcs = {
-       .err_cnt_init = umc_v8_7_err_cnt_init,
-       .ras_late_init = amdgpu_umc_ras_late_init,
-       .ras_fini = amdgpu_umc_ras_fini,
+const struct amdgpu_ras_block_hw_ops umc_v8_7_ras_hw_ops = {
        .query_ras_error_count = umc_v8_7_query_ras_error_count,
        .query_ras_error_address = umc_v8_7_query_ras_error_address,
 };
+
+struct amdgpu_umc_ras umc_v8_7_ras = {
+       .ras_block = {
+               .hw_ops = &umc_v8_7_ras_hw_ops,
+       },
+       .err_cnt_init = umc_v8_7_err_cnt_init,
+};
index 37e6dc7c28e0d963f035a21d649a20824c7415d8..dd4993f5f78f4d8aad5bdd5cd907631680dd793c 100644 (file)
@@ -44,7 +44,7 @@
 /* umc ce count initial value */
 #define UMC_V8_7_CE_CNT_INIT   (UMC_V8_7_CE_CNT_MAX - UMC_V8_7_CE_INT_THRESHOLD)
 
-extern const struct amdgpu_umc_ras_funcs umc_v8_7_ras_funcs;
+extern struct amdgpu_umc_ras umc_v8_7_ras;
 extern const uint32_t
        umc_v8_7_channel_idx_tbl[UMC_V8_7_UMC_INSTANCE_NUM][UMC_V8_7_CHANNEL_INSTANCE_NUM];