drm/amdgpu: add UTCL2 RAS poison query for Aldebaran (v2)

author Tao Zhou <tao.zhou1@amd.com>

Tue, 15 Mar 2022 09:48:18 +0000 (17:48 +0800)

committer Alex Deucher <alexander.deucher@amd.com>

Fri, 25 Mar 2022 16:40:26 +0000 (12:40 -0400)
author Tao Zhou <tao.zhou1@amd.com>
Tue, 15 Mar 2022 09:48:18 +0000 (17:48 +0800)
committer Alex Deucher <alexander.deucher@amd.com>
Fri, 25 Mar 2022 16:40:26 +0000 (12:40 -0400)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

index 6ca1db3c243f9c5291bcbc55fb78106eff0e51d5..c18c4be1e4acd3cab75e1c0cb8e302819ba5000d 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -724,3 +724,11 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bo
         else if (reset)
                 amdgpu_amdkfd_gpu_reset(adev);
  }
+
+bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev)
+{
+       if (adev->gfx.ras->query_utcl2_poison_status)
+               return adev->gfx.ras->query_utcl2_poison_status(adev);
+       else
+               return false;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

index 4cb14c2fe53fbe3fda50094f1c668279c232ec72..0838926a8ef06cd8fa23ebf02464f26444732ad2 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -301,6 +301,7 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
  bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem);
  void amdgpu_amdkfd_block_mmu_notifications(void *p);
  int amdgpu_amdkfd_criu_resume(void *p);
+bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev);
  
  #if IS_ENABLED(CONFIG_HSA_AMD)
  void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h

index dcb3c7871c73472e77a0711a36612cfb5c6d6f48..5ed9b8a4c571d49f6a98ef61f761fbcc57cf3e5d 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -202,6 +202,7 @@ struct amdgpu_cu_info {
  struct amdgpu_gfx_ras {
         struct amdgpu_ras_block_object  ras_block;
         void (*enable_watchdog_timer)(struct amdgpu_device *adev);
+       bool (*query_utcl2_poison_status)(struct amdgpu_device *adev);
  };
  
  struct amdgpu_gfx_funcs {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c

index 7653ebd0e67bd8513e167f18e95cfe11992bc428..3a797424579c5955ffe0f701cbec2344d7f2ef23 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
@@ -1930,6 +1930,19 @@ static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev)
         mutex_unlock(&adev->grbm_idx_mutex);
  }
  
+static bool gfx_v9_4_2_query_uctl2_poison_status(struct amdgpu_device *adev)
+{
+       u32 status = 0;
+       struct amdgpu_vmhub *hub;
+
+       hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+       status = RREG32(hub->vm_l2_pro_fault_status);
+       /* reset page fault status */
+       WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+
+       return REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED);
+}
+
  struct amdgpu_ras_block_hw_ops  gfx_v9_4_2_ras_ops = {
                 .ras_error_inject = &gfx_v9_4_2_ras_error_inject,
                 .query_ras_error_count = &gfx_v9_4_2_query_ras_error_count,
@@ -1943,4 +1956,5 @@ struct amdgpu_gfx_ras gfx_v9_4_2_ras = {
                 .hw_ops = &gfx_v9_4_2_ras_ops,
         },
         .enable_watchdog_timer = &gfx_v9_4_2_enable_watchdog_timer,
+       .query_utcl2_poison_status = gfx_v9_4_2_query_uctl2_poison_status,
  };
author	Tao Zhou <tao.zhou1@amd.com>
	Tue, 15 Mar 2022 09:48:18 +0000 (17:48 +0800)
committer	Alex Deucher <alexander.deucher@amd.com>
	Fri, 25 Mar 2022 16:40:26 +0000 (12:40 -0400)
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c		patch \| blob \| history