drm/lima: save task info dump when task fail

author Qiang Yu <yuq825@gmail.com>

Sat, 7 Mar 2020 13:44:23 +0000 (21:44 +0800)

committer Qiang Yu <yuq825@gmail.com>

Sun, 22 Mar 2020 01:35:19 +0000 (09:35 +0800)
author Qiang Yu <yuq825@gmail.com>
Sat, 7 Mar 2020 13:44:23 +0000 (21:44 +0800)
committer Qiang Yu <yuq825@gmail.com>
Sun, 22 Mar 2020 01:35:19 +0000 (09:35 +0800)
diff --git a/drivers/gpu/drm/lima/lima_device.c b/drivers/gpu/drm/lima/lima_device.c

index 19829b5430242f64d3fb88f6eb88548da9e5faf9..42a00171fea5a23bbb89303c8c72e13dc94e169f 100644 (file)
--- a/drivers/gpu/drm/lima/lima_device.c
+++ b/drivers/gpu/drm/lima/lima_device.c
@@ -344,6 +344,12 @@ int lima_device_init(struct lima_device *ldev)
         if (err)
                 goto err_out5;
  
+       ldev->dump.magic = LIMA_DUMP_MAGIC;
+       ldev->dump.version_major = LIMA_DUMP_MAJOR;
+       ldev->dump.version_minor = LIMA_DUMP_MINOR;
+       INIT_LIST_HEAD(&ldev->error_task_list);
+       mutex_init(&ldev->error_task_list_lock);
+
         dev_info(ldev->dev, "bus rate = %lu\n", clk_get_rate(ldev->clk_bus));
         dev_info(ldev->dev, "mod rate = %lu", clk_get_rate(ldev->clk_gpu));
  
@@ -370,6 +376,13 @@ err_out0:
  void lima_device_fini(struct lima_device *ldev)
  {
         int i;
+       struct lima_sched_error_task *et, *tmp;
+
+       list_for_each_entry_safe(et, tmp, &ldev->error_task_list, list) {
+               list_del(&et->list);
+               kvfree(et);
+       }
+       mutex_destroy(&ldev->error_task_list_lock);
  
         lima_fini_pp_pipe(ldev);
         lima_fini_gp_pipe(ldev);
diff --git a/drivers/gpu/drm/lima/lima_device.h b/drivers/gpu/drm/lima/lima_device.h

index 31158d86271c2b326b0c9b9e7a8917235e6b80ea..f17173f47f2652fdca88ff247f32d63c8cb26462 100644 (file)
--- a/drivers/gpu/drm/lima/lima_device.h
+++ b/drivers/gpu/drm/lima/lima_device.h
@@ -6,8 +6,11 @@
  
  #include <drm/drm_device.h>
  #include <linux/delay.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
  
  #include "lima_sched.h"
+#include "lima_dump.h"
  
  enum lima_gpu_id {
         lima_gpu_mali400 = 0,
@@ -94,6 +97,11 @@ struct lima_device {
  
         u32 *dlbu_cpu;
         dma_addr_t dlbu_dma;
+
+       /* debug info */
+       struct lima_dump_head dump;
+       struct list_head error_task_list;
+       struct mutex error_task_list_lock;
  };
  
  static inline struct lima_device *
diff --git a/drivers/gpu/drm/lima/lima_dump.h b/drivers/gpu/drm/lima/lima_dump.h

new file mode 100644 (file)

index 0000000..ca243d9
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_dump.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2020 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_DUMP_H__
+#define __LIMA_DUMP_H__
+
+#include <linux/types.h>
+
+/**
+ * dump file format for all the information to start a lima task
+ *
+ * top level format
+ * | magic code "LIMA" | format version | num tasks | data size |
+ * | reserved | reserved | reserved | reserved |
+ * | task 1 ID | task 1 size | num chunks | reserved | task 1 data |
+ * | task 2 ID | task 2 size | num chunks | reserved | task 2 data |
+ * ...
+ *
+ * task data format
+ * | chunk 1 ID | chunk 1 size | reserved | reserved | chunk 1 data |
+ * | chunk 2 ID | chunk 2 size | reserved | reserved | chunk 2 data |
+ * ...
+ *
+ */
+
+#define LIMA_DUMP_MAJOR 1
+#define LIMA_DUMP_MINOR 0
+
+#define LIMA_DUMP_MAGIC 0x414d494c
+
+struct lima_dump_head {
+       __u32 magic;
+       __u16 version_major;
+       __u16 version_minor;
+       __u32 num_tasks;
+       __u32 size;
+       __u32 reserved[4];
+};
+
+#define LIMA_DUMP_TASK_GP   0
+#define LIMA_DUMP_TASK_PP   1
+#define LIMA_DUMP_TASK_NUM  2
+
+struct lima_dump_task {
+       __u32 id;
+       __u32 size;
+       __u32 num_chunks;
+       __u32 reserved;
+};
+
+#define LIMA_DUMP_CHUNK_FRAME         0
+#define LIMA_DUMP_CHUNK_BUFFER        1
+#define LIMA_DUMP_CHUNK_PROCESS_NAME  2
+#define LIMA_DUMP_CHUNK_PROCESS_ID    3
+#define LIMA_DUMP_CHUNK_NUM           4
+
+struct lima_dump_chunk {
+       __u32 id;
+       __u32 size;
+       __u32 reserved[2];
+};
+
+struct lima_dump_chunk_buffer {
+       __u32 id;
+       __u32 size;
+       __u32 va;
+       __u32 reserved;
+};
+
+struct lima_dump_chunk_pid {
+       __u32 id;
+       __u32 size;
+       __u32 pid;
+       __u32 reserved;
+};
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c

index 3886999b453303a58b5b2935d92f30f10bab4ee7..86192422a68982b6f22be3e86e87b0145b86955c 100644 (file)
--- a/drivers/gpu/drm/lima/lima_sched.c
+++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -4,6 +4,7 @@
  #include <linux/kthread.h>
  #include <linux/slab.h>
  #include <linux/xarray.h>
+#include <linux/vmalloc.h>
  
  #include "lima_drv.h"
  #include "lima_sched.h"
@@ -256,6 +257,133 @@ static struct dma_fence *lima_sched_run_job(struct drm_sched_job *job)
         return task->fence;
  }
  
+static void lima_sched_build_error_task_list(struct lima_sched_task *task)
+{
+       struct lima_sched_error_task *et;
+       struct lima_sched_pipe *pipe = to_lima_pipe(task->base.sched);
+       struct lima_ip *ip = pipe->processor[0];
+       int pipe_id = ip->id == lima_ip_gp ? lima_pipe_gp : lima_pipe_pp;
+       struct lima_device *dev = ip->dev;
+       struct lima_sched_context *sched_ctx =
+               container_of(task->base.entity,
+                            struct lima_sched_context, base);
+       struct lima_ctx *ctx =
+               container_of(sched_ctx, struct lima_ctx, context[pipe_id]);
+       struct lima_dump_task *dt;
+       struct lima_dump_chunk *chunk;
+       struct lima_dump_chunk_pid *pid_chunk;
+       struct lima_dump_chunk_buffer *buffer_chunk;
+       u32 size, task_size, mem_size;
+       int i;
+
+       mutex_lock(&dev->error_task_list_lock);
+
+       if (dev->dump.num_tasks >= lima_max_error_tasks) {
+               dev_info(dev->dev, "fail to save task state: error task list is full\n");
+               goto out;
+       }
+
+       /* frame chunk */
+       size = sizeof(struct lima_dump_chunk) + pipe->frame_size;
+       /* process name chunk */
+       size += sizeof(struct lima_dump_chunk) + sizeof(ctx->pname);
+       /* pid chunk */
+       size += sizeof(struct lima_dump_chunk);
+       /* buffer chunks */
+       for (i = 0; i < task->num_bos; i++) {
+               struct lima_bo *bo = task->bos[i];
+
+               size += sizeof(struct lima_dump_chunk);
+               size += bo->heap_size ? bo->heap_size : lima_bo_size(bo);
+       }
+
+       task_size = size + sizeof(struct lima_dump_task);
+       mem_size = task_size + sizeof(*et);
+       et = kvmalloc(mem_size, GFP_KERNEL);
+       if (!et) {
+               dev_err(dev->dev, "fail to alloc task dump buffer of size %x\n",
+                       mem_size);
+               goto out;
+       }
+
+       et->data = et + 1;
+       et->size = task_size;
+
+       dt = et->data;
+       memset(dt, 0, sizeof(*dt));
+       dt->id = pipe_id;
+       dt->size = size;
+
+       chunk = (struct lima_dump_chunk *)(dt + 1);
+       memset(chunk, 0, sizeof(*chunk));
+       chunk->id = LIMA_DUMP_CHUNK_FRAME;
+       chunk->size = pipe->frame_size;
+       memcpy(chunk + 1, task->frame, pipe->frame_size);
+       dt->num_chunks++;
+
+       chunk = (void *)(chunk + 1) + chunk->size;
+       memset(chunk, 0, sizeof(*chunk));
+       chunk->id = LIMA_DUMP_CHUNK_PROCESS_NAME;
+       chunk->size = sizeof(ctx->pname);
+       memcpy(chunk + 1, ctx->pname, sizeof(ctx->pname));
+       dt->num_chunks++;
+
+       pid_chunk = (void *)(chunk + 1) + chunk->size;
+       memset(pid_chunk, 0, sizeof(*pid_chunk));
+       pid_chunk->id = LIMA_DUMP_CHUNK_PROCESS_ID;
+       pid_chunk->pid = ctx->pid;
+       dt->num_chunks++;
+
+       buffer_chunk = (void *)(pid_chunk + 1) + pid_chunk->size;
+       for (i = 0; i < task->num_bos; i++) {
+               struct lima_bo *bo = task->bos[i];
+               void *data;
+
+               memset(buffer_chunk, 0, sizeof(*buffer_chunk));
+               buffer_chunk->id = LIMA_DUMP_CHUNK_BUFFER;
+               buffer_chunk->va = lima_vm_get_va(task->vm, bo);
+
+               if (bo->heap_size) {
+                       buffer_chunk->size = bo->heap_size;
+
+                       data = vmap(bo->base.pages, bo->heap_size >> PAGE_SHIFT,
+                                   VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+                       if (!data) {
+                               kvfree(et);
+                               goto out;
+                       }
+
+                       memcpy(buffer_chunk + 1, data, buffer_chunk->size);
+
+                       vunmap(data);
+               } else {
+                       buffer_chunk->size = lima_bo_size(bo);
+
+                       data = drm_gem_shmem_vmap(&bo->base.base);
+                       if (IS_ERR_OR_NULL(data)) {
+                               kvfree(et);
+                               goto out;
+                       }
+
+                       memcpy(buffer_chunk + 1, data, buffer_chunk->size);
+
+                       drm_gem_shmem_vunmap(&bo->base.base, data);
+               }
+
+               buffer_chunk = (void *)(buffer_chunk + 1) + buffer_chunk->size;
+               dt->num_chunks++;
+       }
+
+       list_add(&et->list, &dev->error_task_list);
+       dev->dump.size += et->size;
+       dev->dump.num_tasks++;
+
+       dev_info(dev->dev, "save error task state success\n");
+
+out:
+       mutex_unlock(&dev->error_task_list_lock);
+}
+
  static void lima_sched_timedout_job(struct drm_sched_job *job)
  {
         struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
@@ -268,6 +396,8 @@ static void lima_sched_timedout_job(struct drm_sched_job *job)
  
         drm_sched_increase_karma(&task->base);
  
+       lima_sched_build_error_task_list(task);
+
         pipe->task_error(pipe);
  
         if (pipe->bcast_mmu)
diff --git a/drivers/gpu/drm/lima/lima_sched.h b/drivers/gpu/drm/lima/lima_sched.h

index d64393fb50a9b667ab05eca7fb4ec3712daf5824..a1496cb7bc4164ba109d588d999fabdb6e2feccf 100644 (file)
--- a/drivers/gpu/drm/lima/lima_sched.h
+++ b/drivers/gpu/drm/lima/lima_sched.h
@@ -5,9 +5,16 @@
  #define __LIMA_SCHED_H__
  
  #include <drm/gpu_scheduler.h>
+#include <linux/list.h>
  
  struct lima_vm;
  
+struct lima_sched_error_task {
+       struct list_head list;
+       void *data;
+       u32 size;
+};
+
  struct lima_sched_task {
         struct drm_sched_job base;
author	Qiang Yu <yuq825@gmail.com>
	Sat, 7 Mar 2020 13:44:23 +0000 (21:44 +0800)
committer	Qiang Yu <yuq825@gmail.com>
	Sun, 22 Mar 2020 01:35:19 +0000 (09:35 +0800)
drivers/gpu/drm/lima/lima_device.c		patch \| blob \| history
drivers/gpu/drm/lima/lima_device.h		patch \| blob \| history
drivers/gpu/drm/lima/lima_dump.h	[new file with mode: 0644]	patch \| blob
drivers/gpu/drm/lima/lima_sched.c		patch \| blob \| history
drivers/gpu/drm/lima/lima_sched.h		patch \| blob \| history