1.Modify umc block to fit for the unified ras block data and ops.
2.Change amdgpu_umc_ras_funcs to amdgpu_umc_ras, and the corresponding variable name remove _funcs suffix.
3.Remove the const flag of umc ras variable so that umc ras block can be able to be inserted into amdgpu device ras block link list.
4.Invoke amdgpu_ras_register_ras_block function to register umc ras block into amdgpu device ras block link list.
5.Remove the redundant code about umc in amdgpu_ras.c after using the unified ras block.
6.Fill unified ras block .name .block .ras_late_init and .ras_fini for all of umc versions. If .ras_late_init and .ras_fini had been defined by the selected umc version, the defined functions will take effect; if not defined, default fill them with amdgpu_umc_ras_late_init and amdgpu_umc_ras_fini.
Signed-off-by: yipechai <YiPeng.Chai@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: John Clements <john.clements@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
{
int r;
- if (adev->umc.ras_funcs &&
- adev->umc.ras_funcs->ras_late_init) {
- r = adev->umc.ras_funcs->ras_late_init(adev);
+ if (adev->umc.ras && adev->umc.ras->ras_block.ras_late_init) {
+ r = adev->umc.ras->ras_block.ras_late_init(adev, NULL);
if (r)
return r;
}
void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
{
- if (adev->umc.ras_funcs &&
- adev->umc.ras_funcs->ras_fini)
- adev->umc.ras_funcs->ras_fini(adev);
+ if (adev->umc.ras && adev->umc.ras->ras_block.ras_fini)
+ adev->umc.ras->ras_block.ras_fini(adev);
if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ras_fini)
adev->mmhub.ras->ras_block.ras_fini(adev);
*/
ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(ras->umc_ecc));
if (ret == -EOPNOTSUPP) {
- if (adev->umc.ras_funcs &&
- adev->umc.ras_funcs->query_ras_error_count)
- adev->umc.ras_funcs->query_ras_error_count(adev, err_data);
+ if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_count)
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
/* umc query_ras_error_address is also responsible for clearing
* error status
*/
- if (adev->umc.ras_funcs &&
- adev->umc.ras_funcs->query_ras_error_address)
- adev->umc.ras_funcs->query_ras_error_address(adev, err_data);
+ if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_address)
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, err_data);
} else if (!ret) {
- if (adev->umc.ras_funcs &&
- adev->umc.ras_funcs->ecc_info_query_ras_error_count)
- adev->umc.ras_funcs->ecc_info_query_ras_error_count(adev, err_data);
+ if (adev->umc.ras &&
+ adev->umc.ras->ecc_info_query_ras_error_count)
+ adev->umc.ras->ecc_info_query_ras_error_count(adev, err_data);
- if (adev->umc.ras_funcs &&
- adev->umc.ras_funcs->ecc_info_query_ras_error_address)
- adev->umc.ras_funcs->ecc_info_query_ras_error_address(adev, err_data);
+ if (adev->umc.ras &&
+ adev->umc.ras->ecc_info_query_ras_error_address)
+ adev->umc.ras->ecc_info_query_ras_error_address(adev, err_data);
}
}
}
else if (adev->df.funcs &&
adev->df.funcs->query_ras_poison_mode &&
- adev->umc.ras_funcs &&
- adev->umc.ras_funcs->query_ras_poison_mode) {
+ adev->umc.ras &&
+ adev->umc.ras->query_ras_poison_mode) {
df_poison =
adev->df.funcs->query_ras_poison_mode(adev);
umc_poison =
- adev->umc.ras_funcs->query_ras_poison_mode(adev);
+ adev->umc.ras->query_ras_poison_mode(adev);
/* Only poison is set in both DF and UMC, we can support it */
if (df_poison && umc_poison)
con->poison_supported = true;
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(con->umc_ecc));
if (ret == -EOPNOTSUPP) {
- if (adev->umc.ras_funcs &&
- adev->umc.ras_funcs->query_ras_error_count)
- adev->umc.ras_funcs->query_ras_error_count(adev, ras_error_status);
+ if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_count)
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, ras_error_status);
- if (adev->umc.ras_funcs &&
- adev->umc.ras_funcs->query_ras_error_address &&
+ if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_address &&
adev->umc.max_ras_err_cnt_per_query) {
err_data->err_addr =
kcalloc(adev->umc.max_ras_err_cnt_per_query,
/* umc query_ras_error_address is also responsible for clearing
* error status
*/
- adev->umc.ras_funcs->query_ras_error_address(adev, ras_error_status);
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, ras_error_status);
}
} else if (!ret) {
- if (adev->umc.ras_funcs &&
- adev->umc.ras_funcs->ecc_info_query_ras_error_count)
- adev->umc.ras_funcs->ecc_info_query_ras_error_count(adev, ras_error_status);
+ if (adev->umc.ras &&
+ adev->umc.ras->ecc_info_query_ras_error_count)
+ adev->umc.ras->ecc_info_query_ras_error_count(adev, ras_error_status);
- if (adev->umc.ras_funcs &&
- adev->umc.ras_funcs->ecc_info_query_ras_error_address &&
+ if (adev->umc.ras &&
+ adev->umc.ras->ecc_info_query_ras_error_address &&
adev->umc.max_ras_err_cnt_per_query) {
err_data->err_addr =
kcalloc(adev->umc.max_ras_err_cnt_per_query,
/* umc query_ras_error_address is also responsible for clearing
* error status
*/
- adev->umc.ras_funcs->ecc_info_query_ras_error_address(adev, ras_error_status);
+ adev->umc.ras->ecc_info_query_ras_error_address(adev, ras_error_status);
}
}
return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry, true);
}
-int amdgpu_umc_ras_late_init(struct amdgpu_device *adev)
+int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_info)
{
int r;
struct ras_fs_if fs_info = {
}
/* ras init of specific umc version */
- if (adev->umc.ras_funcs &&
- adev->umc.ras_funcs->err_cnt_init)
- adev->umc.ras_funcs->err_cnt_init(adev);
+ if (adev->umc.ras &&
+ adev->umc.ras->err_cnt_init)
+ adev->umc.ras->err_cnt_init(adev);
return 0;
*/
#ifndef __AMDGPU_UMC_H__
#define __AMDGPU_UMC_H__
+#include "amdgpu_ras.h"
/*
* (addr / 256) * 4096, the higher 26 bits in ErrorAddr
#define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++)
#define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst))
-struct amdgpu_umc_ras_funcs {
+struct amdgpu_umc_ras {
+ struct amdgpu_ras_block_object ras_block;
void (*err_cnt_init)(struct amdgpu_device *adev);
- int (*ras_late_init)(struct amdgpu_device *adev);
- void (*ras_fini)(struct amdgpu_device *adev);
- void (*query_ras_error_count)(struct amdgpu_device *adev,
- void *ras_error_status);
- void (*query_ras_error_address)(struct amdgpu_device *adev,
- void *ras_error_status);
bool (*query_ras_poison_mode)(struct amdgpu_device *adev);
void (*ecc_info_query_ras_error_count)(struct amdgpu_device *adev,
void *ras_error_status);
struct ras_common_if *ras_if;
const struct amdgpu_umc_funcs *funcs;
- const struct amdgpu_umc_ras_funcs *ras_funcs;
+ struct amdgpu_umc_ras *ras;
};
-int amdgpu_umc_ras_late_init(struct amdgpu_device *adev);
+int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_info);
void amdgpu_umc_ras_fini(struct amdgpu_device *adev);
int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
void *ras_error_status,
adev->umc.umc_inst_num = UMC_V8_7_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V8_7_PER_CHANNEL_OFFSET_SIENNA;
adev->umc.channel_idx_tbl = &umc_v8_7_channel_idx_tbl[0][0];
- adev->umc.ras_funcs = &umc_v8_7_ras_funcs;
+ adev->umc.ras = &umc_v8_7_ras;
break;
default:
break;
}
+ if (adev->umc.ras) {
+ amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block);
+
+ strcpy(adev->umc.ras->ras_block.name,"umc");
+ adev->umc.ras->ras_block.block = AMDGPU_RAS_BLOCK__UMC;
+
+ /* If don't define special ras_late_init function, use default ras_late_init */
+ if (!adev->umc.ras->ras_block.ras_late_init)
+ adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init;
+
+ /* If don't define special ras_fini function, use default ras_fini */
+ if (!adev->umc.ras->ras_block.ras_fini)
+ adev->umc.ras->ras_block.ras_fini = amdgpu_umc_ras_fini;
+ }
}
adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20;
adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
- adev->umc.ras_funcs = &umc_v6_1_ras_funcs;
+ adev->umc.ras = &umc_v6_1_ras;
break;
case IP_VERSION(6, 1, 2):
adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT;
adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
- adev->umc.ras_funcs = &umc_v6_1_ras_funcs;
+ adev->umc.ras = &umc_v6_1_ras;
break;
case IP_VERSION(6, 7, 0):
adev->umc.max_ras_err_cnt_per_query = UMC_V6_7_TOTAL_CHANNEL_NUM;
adev->umc.umc_inst_num = UMC_V6_7_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V6_7_PER_CHANNEL_OFFSET;
if (!adev->gmc.xgmi.connected_to_cpu)
- adev->umc.ras_funcs = &umc_v6_7_ras_funcs;
+ adev->umc.ras = &umc_v6_7_ras;
if (1 & adev->smuio.funcs->get_die_id(adev))
adev->umc.channel_idx_tbl = &umc_v6_7_channel_idx_tbl_first[0][0];
else
default:
break;
}
+
+ if (adev->umc.ras) {
+ amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block);
+
+ strcpy(adev->umc.ras->ras_block.name,"umc");
+ adev->umc.ras->ras_block.block = AMDGPU_RAS_BLOCK__UMC;
+
+ /* If don't define special ras_late_init function, use default ras_late_init */
+ if (!adev->umc.ras->ras_block.ras_late_init)
+ adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init;
+
+ /* If don't define special ras_fini function, use default ras_fini */
+ if (!adev->umc.ras->ras_block.ras_fini)
+ adev->umc.ras->ras_block.ras_fini = amdgpu_umc_ras_fini;
+ }
}
static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev)
umc_v6_1_enable_umc_index_mode(adev);
}
-const struct amdgpu_umc_ras_funcs umc_v6_1_ras_funcs = {
- .err_cnt_init = umc_v6_1_err_cnt_init,
- .ras_late_init = amdgpu_umc_ras_late_init,
- .ras_fini = amdgpu_umc_ras_fini,
+const struct amdgpu_ras_block_hw_ops umc_v6_1_ras_hw_ops = {
.query_ras_error_count = umc_v6_1_query_ras_error_count,
.query_ras_error_address = umc_v6_1_query_ras_error_address,
};
+
+struct amdgpu_umc_ras umc_v6_1_ras = {
+ .ras_block = {
+ .hw_ops = &umc_v6_1_ras_hw_ops,
+ },
+ .err_cnt_init = umc_v6_1_err_cnt_init,
+};
\ No newline at end of file
/* umc ce count initial value */
#define UMC_V6_1_CE_CNT_INIT (UMC_V6_1_CE_CNT_MAX - UMC_V6_1_CE_INT_THRESHOLD)
-extern const struct amdgpu_umc_ras_funcs umc_v6_1_ras_funcs;
+extern struct amdgpu_umc_ras umc_v6_1_ras;
extern const uint32_t
umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM];
return true;
}
-const struct amdgpu_umc_ras_funcs umc_v6_7_ras_funcs = {
- .ras_late_init = amdgpu_umc_ras_late_init,
- .ras_fini = amdgpu_umc_ras_fini,
+const struct amdgpu_ras_block_hw_ops umc_v6_7_ras_hw_ops = {
.query_ras_error_count = umc_v6_7_query_ras_error_count,
.query_ras_error_address = umc_v6_7_query_ras_error_address,
+};
+
+struct amdgpu_umc_ras umc_v6_7_ras = {
+ .ras_block = {
+ .hw_ops = &umc_v6_7_ras_hw_ops,
+ },
.query_ras_poison_mode = umc_v6_7_query_ras_poison_mode,
.ecc_info_query_ras_error_count = umc_v6_7_ecc_info_query_ras_error_count,
.ecc_info_query_ras_error_address = umc_v6_7_ecc_info_query_ras_error_address,
#define UMC_V6_7_TOTAL_CHANNEL_NUM (UMC_V6_7_CHANNEL_INSTANCE_NUM * UMC_V6_7_UMC_INSTANCE_NUM)
/* UMC regiser per channel offset */
#define UMC_V6_7_PER_CHANNEL_OFFSET 0x400
-extern const struct amdgpu_umc_ras_funcs umc_v6_7_ras_funcs;
+extern struct amdgpu_umc_ras umc_v6_7_ras;
extern const uint32_t
umc_v6_7_channel_idx_tbl_second[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM];
extern const uint32_t
}
}
-const struct amdgpu_umc_ras_funcs umc_v8_7_ras_funcs = {
- .err_cnt_init = umc_v8_7_err_cnt_init,
- .ras_late_init = amdgpu_umc_ras_late_init,
- .ras_fini = amdgpu_umc_ras_fini,
+const struct amdgpu_ras_block_hw_ops umc_v8_7_ras_hw_ops = {
.query_ras_error_count = umc_v8_7_query_ras_error_count,
.query_ras_error_address = umc_v8_7_query_ras_error_address,
};
+
+struct amdgpu_umc_ras umc_v8_7_ras = {
+ .ras_block = {
+ .hw_ops = &umc_v8_7_ras_hw_ops,
+ },
+ .err_cnt_init = umc_v8_7_err_cnt_init,
+};
/* umc ce count initial value */
#define UMC_V8_7_CE_CNT_INIT (UMC_V8_7_CE_CNT_MAX - UMC_V8_7_CE_INT_THRESHOLD)
-extern const struct amdgpu_umc_ras_funcs umc_v8_7_ras_funcs;
+extern struct amdgpu_umc_ras umc_v8_7_ras;
extern const uint32_t
umc_v8_7_channel_idx_tbl[UMC_V8_7_UMC_INSTANCE_NUM][UMC_V8_7_CHANNEL_INSTANCE_NUM];