From: Philip Yang Date: Tue, 20 Apr 2021 19:13:59 +0000 (-0400) Subject: drm/amdkfd: enable subsequent retry fault X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=b3dc91f973172dd71594076eb20484471d981a89;p=linux.git drm/amdkfd: enable subsequent retry fault After draining the stale retry fault, or failed to validate the range to recover, have to remove the fault address from fault filter ring, to be able to handle subsequent retry interrupt on same address. Otherwise the retry fault will not be processed to recover until timeout passed. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 00d759b257f43..d9111fea724b3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -2363,8 +2363,10 @@ retry_write_locked: mutex_lock(&prange->migrate_mutex); - if (svm_range_skip_recover(prange)) + if (svm_range_skip_recover(prange)) { + amdgpu_gmc_filter_faults_remove(adev, addr, pasid); goto out_unlock_range; + } timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp; /* skip duplicate vm fault on different pages of same range */ @@ -2426,6 +2428,7 @@ out: if (r == -EAGAIN) { pr_debug("recover vm fault later\n"); + amdgpu_gmc_filter_faults_remove(adev, addr, pasid); r = 0; } return r;