drm/amdgpu: skip bad page reservation once issuing from eeprom write
authorGuchun Chen <guchun.chen@amd.com>
Thu, 23 Jul 2020 07:50:42 +0000 (15:50 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 4 Aug 2020 21:26:38 +0000 (17:26 -0400)
Once the ras recovery is issued from eeprom write itself,
bad page reservation should be ignored, otherwise, recursive
calling of writting to eeprom would happen.

Signed-off-by: Guchun Chen <guchun.chen@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h

index 1a1652ea76b01aeade432c6d734c779071a60888..d081de232c6661842d333876dae1aad43f16818d 100644 (file)
@@ -62,8 +62,6 @@ const char *ras_block_string[] = {
 #define ras_err_str(i) (ras_error_string[ffs(i)])
 #define ras_block_str(i) (ras_block_string[i])
 
-#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS          1
-#define AMDGPU_RAS_FLAG_INIT_NEED_RESET                2
 #define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
 
 /* inject address is 52 bits */
index 4672649a9293c19352fbb99c1937c240342e61b8..cf9f60202334da2998175490a6589c55e00b7786 100644 (file)
 #include "ta_ras_if.h"
 #include "amdgpu_ras_eeprom.h"
 
+#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS          (0x1 << 0)
+#define AMDGPU_RAS_FLAG_INIT_NEED_RESET                (0x1 << 1)
+#define AMDGPU_RAS_FLAG_SKIP_BAD_PAGE_RESV     (0x1 << 2)
+
 enum amdgpu_ras_block {
        AMDGPU_RAS_BLOCK__UMC = 0,
        AMDGPU_RAS_BLOCK__SDMA,
@@ -503,10 +507,14 @@ static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
 {
        struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
 
-       /* save bad page to eeprom before gpu reset,
-        * i2c may be unstable in gpu reset
+       /*
+        * Save bad page to eeprom before gpu reset, i2c may be unstable
+        * in gpu reset.
+        *
+        * Also, exclude the case when ras recovery issuer is
+        * eeprom page write itself.
         */
-       if (in_task())
+       if (!(ras->flags & AMDGPU_RAS_FLAG_SKIP_BAD_PAGE_RESV) && in_task())
                amdgpu_ras_reserve_bad_pages(adev);
 
        if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)