drm/amdgpu: Modify mmhub block to fit for the unified ras block data and ops
authoryipechai <YiPeng.Chai@amd.com>
Tue, 4 Jan 2022 11:04:41 +0000 (19:04 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 14 Jan 2022 22:51:59 +0000 (17:51 -0500)
1.Modify mmhub block to fit for the unified ras block data and ops.
2.Change amdgpu_mmhub_ras_funcs to amdgpu_mmhub_ras, and the corresponding variable name remove _funcs suffix.
3.Remove the const flag of mmhub ras variable so that mmhub ras block can be able to be inserted into amdgpu device ras block link list.
4.Invoke amdgpu_ras_register_ras_block function to register mmhub ras block into amdgpu device ras block link list. 5.Remove the redundant code about mmhub in amdgpu_ras.c after using the unified ras block.
5.Remove the redundant code about mmhub in amdgpu_ras.c after using the unified ras block.
6.Fill unified ras block .name .block .ras_late_init and .ras_fini for all of mmhub versions. If .ras_late_init and .ras_fini had been defined by the selected mmhub version, the defined functions will take effect; if not defined, default fill them with amdgpu_mmhub_ras_late_init and amdgpu_mmhub_ras_fini.

Signed-off-by: yipechai <YiPeng.Chai@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: John Clements <john.clements@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
12 files changed:
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h
drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.h
drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h

index 9230e4476d91a19e511de1873c9fc0ec95f7b3c7..71b814fe15f90e073535bc6787a6144b73baa031 100644 (file)
@@ -3307,9 +3307,9 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
                if (adev->asic_reset_res)
                        goto fail;
 
-               if (adev->mmhub.ras_funcs &&
-                   adev->mmhub.ras_funcs->reset_ras_error_count)
-                       adev->mmhub.ras_funcs->reset_ras_error_count(adev);
+               if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
+                   adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
+                       adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
        } else {
 
                task_barrier_full(&hive->tb);
@@ -4656,9 +4656,9 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
 
        if (!r && amdgpu_ras_intr_triggered()) {
                list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
-                       if (tmp_adev->mmhub.ras_funcs &&
-                           tmp_adev->mmhub.ras_funcs->reset_ras_error_count)
-                               tmp_adev->mmhub.ras_funcs->reset_ras_error_count(tmp_adev);
+                       if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
+                           tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
+                               tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
                }
 
                amdgpu_ras_intr_cleared();
index 58cc4dae1246e5a68164ca815541f82d292a7ab4..acf806c87673cf20f4f48dfd55e3baf34c5ecab9 100644 (file)
@@ -447,9 +447,8 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
                        return r;
        }
 
-       if (adev->mmhub.ras_funcs &&
-           adev->mmhub.ras_funcs->ras_late_init) {
-               r = adev->mmhub.ras_funcs->ras_late_init(adev);
+       if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ras_late_init) {
+               r = adev->mmhub.ras->ras_block.ras_late_init(adev, NULL);
                if (r)
                        return r;
        }
@@ -501,9 +500,8 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
            adev->umc.ras_funcs->ras_fini)
                adev->umc.ras_funcs->ras_fini(adev);
 
-       if (adev->mmhub.ras_funcs &&
-           adev->mmhub.ras_funcs->ras_fini)
-               adev->mmhub.ras_funcs->ras_fini(adev);
+       if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ras_fini)
+               adev->mmhub.ras->ras_block.ras_fini(adev);
 
        if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_fini)
                adev->gmc.xgmi.ras->ras_block.ras_fini(adev);
index 24297dc51434b2a07ce1b1f90cec1261449ecad7..f9b5472a75d70307d46e5b49c0138831a7c4ec23 100644 (file)
@@ -24,7 +24,7 @@
 #include "amdgpu.h"
 #include "amdgpu_ras.h"
 
-int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev)
+int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev, void *ras_info)
 {
        int r;
        struct ras_ih_if ih_info = {
index b27fcbccce2b6f27dcb70ec99527323db075863d..7deda9a3b81eba2dcd9f63dc38d617367d114908 100644 (file)
 #ifndef __AMDGPU_MMHUB_H__
 #define __AMDGPU_MMHUB_H__
 
-struct amdgpu_mmhub_ras_funcs {
-       int (*ras_late_init)(struct amdgpu_device *adev);
-       void (*ras_fini)(struct amdgpu_device *adev);
-       void (*query_ras_error_count)(struct amdgpu_device *adev,
-                                     void *ras_error_status);
-       void (*query_ras_error_status)(struct amdgpu_device *adev);
-       void (*reset_ras_error_count)(struct amdgpu_device *adev);
-       void (*reset_ras_error_status)(struct amdgpu_device *adev);
+struct amdgpu_mmhub_ras {
+       struct amdgpu_ras_block_object ras_block;
 };
 
 struct amdgpu_mmhub_funcs {
@@ -50,10 +44,10 @@ struct amdgpu_mmhub_funcs {
 struct amdgpu_mmhub {
        struct ras_common_if *ras_if;
        const struct amdgpu_mmhub_funcs *funcs;
-       const struct amdgpu_mmhub_ras_funcs *ras_funcs;
+       struct amdgpu_mmhub_ras  *ras;
 };
 
-int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev);
+int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev, void *ras_info);
 void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev);
 #endif
 
index c47a03252ec80494fada2859e27fcd672882d205..0ce26fd6abbdbe6aa6aacefc9e997460c3be0936 100644 (file)
@@ -986,6 +986,7 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
                }
                break;
        case AMDGPU_RAS_BLOCK__GFX:
+       case AMDGPU_RAS_BLOCK__MMHUB:
                if (!block_obj || !block_obj->hw_ops)   {
                        dev_info(adev->dev, "%s doesn't config ras function \n",
                                                get_ras_block_str(&info->head));
@@ -997,15 +998,6 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
                if (block_obj->hw_ops->query_ras_error_status)
                        block_obj->hw_ops->query_ras_error_status(adev);
                break;
-       case AMDGPU_RAS_BLOCK__MMHUB:
-               if (adev->mmhub.ras_funcs &&
-                   adev->mmhub.ras_funcs->query_ras_error_count)
-                       adev->mmhub.ras_funcs->query_ras_error_count(adev, &err_data);
-
-               if (adev->mmhub.ras_funcs &&
-                   adev->mmhub.ras_funcs->query_ras_error_status)
-                       adev->mmhub.ras_funcs->query_ras_error_status(adev);
-               break;
        case AMDGPU_RAS_BLOCK__PCIE_BIF:
                if (adev->nbio.ras_funcs &&
                    adev->nbio.ras_funcs->query_ras_error_count)
@@ -1089,6 +1081,7 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
 
        switch (block) {
        case AMDGPU_RAS_BLOCK__GFX:
+       case AMDGPU_RAS_BLOCK__MMHUB:
                if (!block_obj || !block_obj->hw_ops)   {
                        dev_info(adev->dev, "%s doesn't config ras function \n", ras_block_str(block));
                        return -EINVAL;
@@ -1100,15 +1093,6 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
                if (block_obj->hw_ops->reset_ras_error_status)
                        block_obj->hw_ops->reset_ras_error_status(adev);
                break;
-       case AMDGPU_RAS_BLOCK__MMHUB:
-               if (adev->mmhub.ras_funcs &&
-                   adev->mmhub.ras_funcs->reset_ras_error_count)
-                       adev->mmhub.ras_funcs->reset_ras_error_count(adev);
-
-               if (adev->mmhub.ras_funcs &&
-                   adev->mmhub.ras_funcs->reset_ras_error_status)
-                       adev->mmhub.ras_funcs->reset_ras_error_status(adev);
-               break;
        case AMDGPU_RAS_BLOCK__SDMA:
                if (adev->sdma.funcs->reset_ras_error_count)
                        adev->sdma.funcs->reset_ras_error_count(adev);
@@ -1825,24 +1809,19 @@ static void amdgpu_ras_error_status_query(struct amdgpu_device *adev,
         * Only two block need to query read/write
         * RspStatus at current state
         */
-       switch (info->head.block) {
-       case AMDGPU_RAS_BLOCK__GFX:
-               if (!block_obj || !block_obj->hw_ops)   {
-                       dev_info(adev->dev, "%s doesn't config ras function \n", get_ras_block_str(&info->head));
-                       return ;
-               }
+       if ((info->head.block != AMDGPU_RAS_BLOCK__GFX) &&
+               (info->head.block != AMDGPU_RAS_BLOCK__MMHUB))
+               return ;
 
-               if (block_obj->hw_ops->query_ras_error_status)
-                       block_obj->hw_ops->query_ras_error_status(adev);
-               break;
-       case AMDGPU_RAS_BLOCK__MMHUB:
-               if (adev->mmhub.ras_funcs &&
-                   adev->mmhub.ras_funcs->query_ras_error_status)
-                       adev->mmhub.ras_funcs->query_ras_error_status(adev);
-               break;
-       default:
-               break;
+       block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, info->head.sub_block_index);
+       if (!block_obj || !block_obj->hw_ops) {
+               dev_info(adev->dev, "%s doesn't config ras function \n", get_ras_block_str(&info->head));
+               return ;
        }
+
+       if (block_obj->hw_ops->query_ras_error_status)
+       block_obj->hw_ops->query_ras_error_status(adev);
+
 }
 
 static void amdgpu_ras_query_err_status(struct amdgpu_device *adev)
index 16ab572219edce345333356cc547f6534d8bcf4c..7506e198e6e1b4d5ffcae9adca2af7ed3308ae03 100644 (file)
@@ -1248,18 +1248,33 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev)
 {
        switch (adev->ip_versions[MMHUB_HWIP][0]) {
        case IP_VERSION(9, 4, 0):
-               adev->mmhub.ras_funcs = &mmhub_v1_0_ras_funcs;
+               adev->mmhub.ras = &mmhub_v1_0_ras;
                break;
        case IP_VERSION(9, 4, 1):
-               adev->mmhub.ras_funcs = &mmhub_v9_4_ras_funcs;
+               adev->mmhub.ras = &mmhub_v9_4_ras;
                break;
        case IP_VERSION(9, 4, 2):
-               adev->mmhub.ras_funcs = &mmhub_v1_7_ras_funcs;
+               adev->mmhub.ras = &mmhub_v1_7_ras;
                break;
        default:
                /* mmhub ras is not available */
                break;
        }
+
+       if (adev->mmhub.ras) {
+               amdgpu_ras_register_ras_block(adev, &adev->mmhub.ras->ras_block);
+
+               strcpy(adev->mmhub.ras->ras_block.name,"mmhub");
+               adev->mmhub.ras->ras_block.block = AMDGPU_RAS_BLOCK__MMHUB;
+
+               /* If don't define special ras_late_init function, use default ras_late_init */
+               if (!adev->mmhub.ras->ras_block.ras_late_init)
+                       adev->mmhub.ras->ras_block.ras_late_init = amdgpu_mmhub_ras_late_init;
+
+               /* If don't define special ras_fini function, use default ras_fini */
+               if (!adev->mmhub.ras->ras_block.ras_fini)
+                       adev->mmhub.ras->ras_block.ras_fini = amdgpu_mmhub_ras_fini;
+       }
 }
 
 static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev)
@@ -1343,9 +1358,9 @@ static int gmc_v9_0_late_init(void *handle)
        }
 
        if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
-               if (adev->mmhub.ras_funcs &&
-                   adev->mmhub.ras_funcs->reset_ras_error_count)
-                       adev->mmhub.ras_funcs->reset_ras_error_count(adev);
+               if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
+                   adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
+                       adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
 
                if (adev->hdp.ras && adev->hdp.ras->ras_block.hw_ops &&
                    adev->hdp.ras->ras_block.hw_ops->reset_ras_error_count)
index 1da2ec692057ee98445620a24b1c9953604fbe2c..4c9f0c0f31168a61d74dc0f5222142e82ed6c60b 100644 (file)
@@ -774,13 +774,17 @@ static void mmhub_v1_0_reset_ras_error_count(struct amdgpu_device *adev)
        }
 }
 
-const struct amdgpu_mmhub_ras_funcs mmhub_v1_0_ras_funcs = {
-       .ras_late_init = amdgpu_mmhub_ras_late_init,
-       .ras_fini = amdgpu_mmhub_ras_fini,
+struct amdgpu_ras_block_hw_ops mmhub_v1_0_ras_hw_ops = {
        .query_ras_error_count = mmhub_v1_0_query_ras_error_count,
        .reset_ras_error_count = mmhub_v1_0_reset_ras_error_count,
 };
 
+struct amdgpu_mmhub_ras mmhub_v1_0_ras = {
+       .ras_block = {
+               .hw_ops = &mmhub_v1_0_ras_hw_ops,
+       },
+};
+
 const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs = {
        .get_fb_location = mmhub_v1_0_get_fb_location,
        .init = mmhub_v1_0_init,
index 4661b094e00784d907f0efa22097fdb1a6452227..dae7ca48bd8b46d01b987fd11cd1f9534edce5c5 100644 (file)
@@ -24,6 +24,6 @@
 #define __MMHUB_V1_0_H__
 
 extern const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs;
-extern const struct amdgpu_mmhub_ras_funcs mmhub_v1_0_ras_funcs;
+extern struct amdgpu_mmhub_ras mmhub_v1_0_ras;
 
 #endif
index f5f7181f9af5fd1302365a4522b573576b701b13..3b901f941627ef2d0163a99efaa8d1b7443aec13 100644 (file)
@@ -1321,15 +1321,19 @@ static void mmhub_v1_7_reset_ras_error_status(struct amdgpu_device *adev)
        }
 }
 
-const struct amdgpu_mmhub_ras_funcs mmhub_v1_7_ras_funcs = {
-       .ras_late_init = amdgpu_mmhub_ras_late_init,
-       .ras_fini = amdgpu_mmhub_ras_fini,
+struct amdgpu_ras_block_hw_ops mmhub_v1_7_ras_hw_ops = {
        .query_ras_error_count = mmhub_v1_7_query_ras_error_count,
        .reset_ras_error_count = mmhub_v1_7_reset_ras_error_count,
        .query_ras_error_status = mmhub_v1_7_query_ras_error_status,
        .reset_ras_error_status = mmhub_v1_7_reset_ras_error_status,
 };
 
+struct amdgpu_mmhub_ras mmhub_v1_7_ras = {
+       .ras_block = {
+               .hw_ops = &mmhub_v1_7_ras_hw_ops,
+       },
+};
+
 const struct amdgpu_mmhub_funcs mmhub_v1_7_funcs = {
        .get_fb_location = mmhub_v1_7_get_fb_location,
        .init = mmhub_v1_7_init,
index a7f9dfc2469725bb6231e8855a5ef9a826cf64a9..629f49052137127aebe90e3827d98f87efb3ff96 100644 (file)
@@ -24,6 +24,6 @@
 #define __MMHUB_V1_7_H__
 
 extern const struct amdgpu_mmhub_funcs mmhub_v1_7_funcs;
-extern const struct amdgpu_mmhub_ras_funcs mmhub_v1_7_ras_funcs;
+extern struct amdgpu_mmhub_ras mmhub_v1_7_ras;
 
 #endif
index ff49eeaf78824534c5593614ed1e93cafe8c4fa3..619106f7d23de9c42e36867759d03318ba5fe592 100644 (file)
@@ -1655,14 +1655,18 @@ static void mmhub_v9_4_query_ras_error_status(struct amdgpu_device *adev)
        }
 }
 
-const struct amdgpu_mmhub_ras_funcs mmhub_v9_4_ras_funcs = {
-       .ras_late_init = amdgpu_mmhub_ras_late_init,
-       .ras_fini = amdgpu_mmhub_ras_fini,
+const struct amdgpu_ras_block_hw_ops mmhub_v9_4_ras_hw_ops = {
        .query_ras_error_count = mmhub_v9_4_query_ras_error_count,
        .reset_ras_error_count = mmhub_v9_4_reset_ras_error_count,
        .query_ras_error_status = mmhub_v9_4_query_ras_error_status,
 };
 
+struct amdgpu_mmhub_ras mmhub_v9_4_ras = {
+       .ras_block = {
+               .hw_ops = &mmhub_v9_4_ras_hw_ops,
+       },
+};
+
 const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs = {
        .get_fb_location = mmhub_v9_4_get_fb_location,
        .init = mmhub_v9_4_init,
index 90436efa92ef28c5a488b9bea0a32024a48215d0..a48329d95f71f8a894c0127f2a064b64fbe64709 100644 (file)
@@ -24,6 +24,6 @@
 #define __MMHUB_V9_4_H__
 
 extern const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs;
-extern const struct amdgpu_mmhub_ras_funcs mmhub_v9_4_ras_funcs;
+extern struct amdgpu_mmhub_ras mmhub_v9_4_ras;
 
 #endif