drm/amdgpu: fix disable ras feature failed when unload drvier v2
authorStanley.Yang <Stanley.Yang@amd.com>
Fri, 26 Nov 2021 09:24:39 +0000 (17:24 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 1 Dec 2021 21:03:22 +0000 (16:03 -0500)
v2:
    still need call ras_disable_all_featrures to handle
    ras initilization failure case.

Function amdgpu_device_fini_hw is called before amdgpu_device_fini_sw,
so ras ta will unload before send ras disable command, ras dsiable operation
must before hw fini.

Signed-off-by: Stanley.Yang <Stanley.Yang@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

index 788b254caaa524c44f050d951fb4e46792bbfa54..efa9ff5dcd26572b854d234898563fecd7fea2a2 100644 (file)
@@ -2733,8 +2733,6 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
        if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
                amdgpu_virt_release_ras_err_handler_data(adev);
 
-       amdgpu_ras_pre_fini(adev);
-
        if (adev->gmc.xgmi.num_physical_nodes > 1)
                amdgpu_xgmi_remove_device(adev);
 
@@ -3844,6 +3842,9 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
                amdgpu_ucode_sysfs_fini(adev);
        sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
 
+       /* disable ras feature must before hw fini */
+       amdgpu_ras_pre_fini(adev);
+
        amdgpu_device_ip_fini_early(adev);
 
        amdgpu_irq_fini_hw(adev);
index 46910e7b292777ce11b48bbfb9dd7186edffb2f5..3c623e589b79c65c7c6c137cbb2acfbfab8f8e0b 100644 (file)
@@ -2503,7 +2503,6 @@ void amdgpu_ras_late_fini(struct amdgpu_device *adev,
        amdgpu_ras_sysfs_remove(adev, ras_block);
        if (ih_info->cb)
                amdgpu_ras_interrupt_remove_handler(adev, ih_info);
-       amdgpu_ras_feature_enable(adev, ras_block, 0);
 }
 
 /* do some init work after IP late init as dependence.