From: Hawking Zhang Date: Mon, 2 Mar 2020 08:16:58 +0000 (+0800) Subject: drm/amdgpu: add reset_ras_error_count function for HDP X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=4a89ad9b39877037444caa8ab84badab1ba867dc;p=linux.git drm/amdgpu: add reset_ras_error_count function for HDP HDP ras error counters are dirty ones after cold reboot Read operation is needed to reset them to 0 Signed-off-by: Hawking Zhang Reviewed-by: Alex Deucher Reviewed-by: Tao Zhou Reviewed-by: Guchun Chen Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 97dd9596d17a8..9e90cba9d5c6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -579,6 +579,7 @@ struct amdgpu_asic_funcs { /* invalidate hdp read cache */ void (*invalidate_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring); + void (*reset_hdp_ras_error_count)(struct amdgpu_device *adev); /* check if the asic needs a full reset of if soft reset will work */ bool (*need_full_reset)(struct amdgpu_device *adev); /* initialize doorbell layout for specific asic*/ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 6e0817e86668c..9726ecb1ec61a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4196,7 +4196,6 @@ static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = { { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, - { SOC15_REG_ENTRY(HDP, 0, mmHDP_EDC_CNT), 0, 1, 1}, }; static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 2b488dfb2f21c..10833c505806b 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -831,6 +831,15 @@ static bool soc15_need_full_reset(struct amdgpu_device *adev) /* change this when we implement soft reset */ return true; } + +static void vega20_reset_hdp_ras_error_count(struct amdgpu_device *adev) +{ + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP)) + return; + /*read back hdp ras counter to reset it to 0 */ + RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT); +} + static void soc15_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, uint64_t *count1) { @@ -998,6 +1007,7 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs = .get_config_memsize = &soc15_get_config_memsize, .flush_hdp = &soc15_flush_hdp, .invalidate_hdp = &soc15_invalidate_hdp, + .reset_hdp_ras_error_count = &vega20_reset_hdp_ras_error_count, .need_full_reset = &soc15_need_full_reset, .init_doorbell_index = &vega20_doorbell_index_init, .get_pcie_usage = &vega20_get_pcie_usage, @@ -1243,6 +1253,10 @@ static int soc15_common_late_init(void *handle) if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_get_irq(adev); + if (adev->asic_funcs && + adev->asic_funcs->reset_hdp_ras_error_count) + adev->asic_funcs->reset_hdp_ras_error_count(adev); + if (adev->nbio.funcs->ras_late_init) r = adev->nbio.funcs->ras_late_init(adev);