From: Ahmad Rehman Date: Tue, 16 Apr 2024 18:29:15 +0000 (-0500) Subject: drm/amdgpu: Skip the coredump collection on reset during driver reload X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=ea137071ada1591a05ce0366de350158bf8dd6c7;p=linux.git drm/amdgpu: Skip the coredump collection on reset during driver reload In passthrough environment, the driver triggers the mode-1 reset on reload. The reset causes the core dump collection which is delayed task and prevents driver from unloading until it is completed. Since we do not need to collect data on "reset on reload" case, we can skip core dump collection. v2: Use the same flag to avoid calling amdgpu_reset_reg_dumps as well. Signed-off-by: Ahmad Rehman Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1b2e177bc2d6b..f3b7cb18fd460 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5357,7 +5357,9 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, /* Try reset handler method first */ tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, reset_list); - amdgpu_reset_reg_dumps(tmp_adev); + + if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) + amdgpu_reset_reg_dumps(tmp_adev); reset_context->reset_device_list = device_list_handle; r = amdgpu_reset_perform_reset(tmp_adev, reset_context); @@ -5430,7 +5432,8 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, vram_lost = amdgpu_device_check_vram_lost(tmp_adev); - amdgpu_coredump(tmp_adev, vram_lost, reset_context); + if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) + amdgpu_coredump(tmp_adev, vram_lost, reset_context); if (vram_lost) { DRM_INFO("VRAM is lost due to GPU reset!\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 6ea893ad9a362..c512f70b82723 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2481,6 +2481,7 @@ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work) /* Use a common context, just need to make sure full reset is done */ set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); + set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags); r = amdgpu_do_asic_reset(&device_list, &reset_context); if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index 66125d43cf217..b11d190ece535 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -32,6 +32,7 @@ enum AMDGPU_RESET_FLAGS { AMDGPU_NEED_FULL_RESET = 0, AMDGPU_SKIP_HW_RESET = 1, + AMDGPU_SKIP_COREDUMP = 2, }; struct amdgpu_reset_context {