drm/amdgpu: Increase tlb flush timeout for sriov
authorDusica Milinkovic <Dusica.Milinkovic@amd.com>
Wed, 10 Aug 2022 07:43:15 +0000 (09:43 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 5 Sep 2022 08:30:11 +0000 (10:30 +0200)
[ Upstream commit 373008bfc9cdb0f050258947fa5a095f0657e1bc ]

[Why]
During multi-vf executing benchmark (Luxmark) observed kiq error timeout.
It happenes because all of VFs do the tlb invalidation at the same time.
Although each VF has the invalidate register set, from hardware side
the invalidate requests are queue to execute.

[How]
In case of 12 VF increase timeout on 12*100ms

Signed-off-by: Dusica Milinkovic <Dusica.Milinkovic@amd.com>
Acked-by: Shaoyun Liu <shaoyun.liu@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

index 5f95d03fd46a03a01892ec589a0cf0d94df00216..4f62f422bcb78f2c6ca545592cf219f30650f844 100644 (file)
@@ -312,7 +312,7 @@ enum amdgpu_kiq_irq {
        AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0,
        AMDGPU_CP_KIQ_IRQ_LAST
 };
-
+#define SRIOV_USEC_TIMEOUT  1200000 /* wait 12 * 100ms for SRIOV */
 #define MAX_KIQ_REG_WAIT       5000 /* in usecs, 5ms */
 #define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
 #define MAX_KIQ_REG_TRY 1000
index 93a4da4284ede07855ede90bcda33eaa24e663f1..9c07ec8b9732759aac4d747d9592317797b565ce 100644 (file)
@@ -414,6 +414,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
        uint32_t seq;
        uint16_t queried_pasid;
        bool ret;
+       u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
        struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
        struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 
@@ -432,7 +433,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 
                amdgpu_ring_commit(ring);
                spin_unlock(&adev->gfx.kiq.ring_lock);
-               r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+               r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
                if (r < 1) {
                        dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
                        return -ETIME;
index 0e731016921beb1a6674ae8ba4cd1bbdf5feb47f..70d24b522df8dc2cbd7d66eeba7883a7ca2f54b6 100644 (file)
@@ -863,6 +863,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
        uint32_t seq;
        uint16_t queried_pasid;
        bool ret;
+       u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
        struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
        struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 
@@ -902,7 +903,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 
                amdgpu_ring_commit(ring);
                spin_unlock(&adev->gfx.kiq.ring_lock);
-               r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+               r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
                if (r < 1) {
                        dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
                        up_read(&adev->reset_sem);