drm/amdgpu: Fix ras features value calltrace
authorStanley.Yang <Stanley.Yang@amd.com>
Wed, 17 Jan 2024 07:23:41 +0000 (15:23 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 22 Jan 2024 22:13:25 +0000 (17:13 -0500)
The high three bits of ras features mask indicate socket
id, it should skip to check high three bits of ras features
mask before disable all ras features.

Signed-off-by: Stanley.Yang <Stanley.Yang@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h

index 7e7bb85341afc65743e42214e0954dcbfe67ff9f..e7aa274060f8aac3113172709877e96f5074a583 100644 (file)
@@ -3114,7 +3114,8 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
        /* Packed socket_id to ras feature mask bits[31:29] */
        if (adev->smuio.funcs &&
            adev->smuio.funcs->get_socket_id)
-               con->features |= ((adev->smuio.funcs->get_socket_id(adev)) << 29);
+               con->features |= ((adev->smuio.funcs->get_socket_id(adev)) <<
+                                       AMDGPU_RAS_FEATURES_SOCKETID_SHIFT);
 
        /* Get RAS schema for particular SOC */
        con->schema = amdgpu_get_ras_schema(adev);
@@ -3320,7 +3321,7 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev)
 
        amdgpu_ras_disable_all_features(adev, 0);
        /* Make sure all ras objects are disabled. */
-       if (con->features)
+       if (AMDGPU_RAS_GET_FEATURES(con->features))
                amdgpu_ras_disable_all_features(adev, 1);
 }
 
@@ -3370,7 +3371,7 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
 
 
        /* Need disable ras on all IPs here before ip [hw/sw]fini */
-       if (con->features)
+       if (AMDGPU_RAS_GET_FEATURES(con->features))
                amdgpu_ras_disable_all_features(adev, 0);
        amdgpu_ras_recovery_fini(adev);
        return 0;
@@ -3403,9 +3404,9 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
        amdgpu_ras_fs_fini(adev);
        amdgpu_ras_interrupt_remove_all(adev);
 
-       WARN(con->features, "Feature mask is not cleared");
+       WARN(AMDGPU_RAS_GET_FEATURES(con->features), "Feature mask is not cleared");
 
-       if (con->features)
+       if (AMDGPU_RAS_GET_FEATURES(con->features))
                amdgpu_ras_disable_all_features(adev, 1);
 
        cancel_delayed_work_sync(&con->ras_counte_delay_work);
index 9c3df9985fadac182103fc8cc3ca25e974de7a23..72022e2c6655a039b83384491c13e37d8ecd0c5e 100644 (file)
@@ -53,6 +53,12 @@ struct amdgpu_iv_entry;
 #define AMDGPU_RAS_INST_MASK 0xfffff000
 #define AMDGPU_RAS_INST_SHIFT 0xc
 
+#define AMDGPU_RAS_FEATURES_SOCKETID_SHIFT 29
+#define AMDGPU_RAS_FEATURES_SOCKETID_MASK 0xe0000000
+
+/* The high three bits indicates socketid */
+#define AMDGPU_RAS_GET_FEATURES(val)  ((val) & ~AMDGPU_RAS_FEATURES_SOCKETID_MASK)
+
 enum amdgpu_ras_block {
        AMDGPU_RAS_BLOCK__UMC = 0,
        AMDGPU_RAS_BLOCK__SDMA,