From: Jingwen Chen Date: Thu, 25 Feb 2021 09:08:02 +0000 (+0800) Subject: drm/amd/amdgpu: move inc gpu_reset_counter after drm_sched_stop X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=8f8c80f430096726a5d8701ca80564bce06fa8da;p=linux.git drm/amd/amdgpu: move inc gpu_reset_counter after drm_sched_stop Move gpu_reset_counter after drm_sched_stop to avoid race condition caused by job submitted between reset_count +1 and drm_sched_stop. Signed-off-by: Jingwen Chen Reviewed-by: Christian König Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 6447cd6ca5a8e..a0af65090e8dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4449,7 +4449,6 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, down_write(&adev->reset_sem); } - atomic_inc(&adev->gpu_reset_counter); switch (amdgpu_asic_reset_method(adev)) { case AMD_RESET_METHOD_MODE1: adev->mp1_state = PP_MP1_STATE_SHUTDOWN; @@ -4710,6 +4709,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, if (need_emergency_restart) amdgpu_job_stop_all_jobs_on_sched(&ring->sched); } + atomic_inc(&tmp_adev->gpu_reset_counter); } if (need_emergency_restart) @@ -5052,6 +5052,7 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta drm_sched_stop(&ring->sched, NULL); } + atomic_inc(&adev->gpu_reset_counter); return PCI_ERS_RESULT_NEED_RESET; case pci_channel_io_perm_failure: /* Permanent error, prepare for device removal */