drm/v3d: Expose the total GPU usage stats on sysfs
authorMaíra Canal <mcanal@igalia.com>
Tue, 5 Sep 2023 21:06:35 +0000 (18:06 -0300)
committerMaíra Canal <mcanal@igalia.com>
Mon, 6 Nov 2023 13:09:29 +0000 (10:09 -0300)
The previous patch exposed the accumulated amount of active time per
client for each V3D queue. But this doesn't provide a global notion of
the GPU usage.

Therefore, provide the accumulated amount of active time for each V3D
queue (BIN, RENDER, CSD, TFU and CACHE_CLEAN), considering all the jobs
submitted to the queue, independent of the client.

This data is exposed through the sysfs interface, so that if the
interface is queried at two different points of time the usage percentage
of each of the queues can be calculated.

Co-developed-by: Jose Maria Casanova Crespo <jmcasanova@igalia.com>
Signed-off-by: Jose Maria Casanova Crespo <jmcasanova@igalia.com>
Signed-off-by: Maíra Canal <mcanal@igalia.com>
Acked-by: Jose Maria Casanova Crespo <jmcasanova@igalia.com>
Reviewed-by: Melissa Wen <mwen@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230905213416.1290219-3-mcanal@igalia.com
drivers/gpu/drm/v3d/Makefile
drivers/gpu/drm/v3d/v3d_drv.c
drivers/gpu/drm/v3d/v3d_drv.h
drivers/gpu/drm/v3d/v3d_gem.c
drivers/gpu/drm/v3d/v3d_irq.c
drivers/gpu/drm/v3d/v3d_sched.c
drivers/gpu/drm/v3d/v3d_sysfs.c [new file with mode: 0644]

index e8b31413702075993c41e0ae831d732b7e6f3b17..4b21b20e49981f0bd1279b81d56673b10dab8aeb 100644 (file)
@@ -11,7 +11,8 @@ v3d-y := \
        v3d_mmu.o \
        v3d_perfmon.o \
        v3d_trace_points.o \
-       v3d_sched.o
+       v3d_sched.o \
+       v3d_sysfs.o
 
 v3d-$(CONFIG_DEBUG_FS) += v3d_debugfs.o
 
index 82984b4df2a2ca10da02711758ebaa79d788f74f..44a1ca57d6a447282ce8d4455dacf8d584e99e03 100644 (file)
@@ -316,8 +316,14 @@ static int v3d_platform_drm_probe(struct platform_device *pdev)
        if (ret)
                goto irq_disable;
 
+       ret = v3d_sysfs_init(dev);
+       if (ret)
+               goto drm_unregister;
+
        return 0;
 
+drm_unregister:
+       drm_dev_unregister(drm);
 irq_disable:
        v3d_irq_disable(v3d);
 gem_destroy:
@@ -331,6 +337,9 @@ static void v3d_platform_drm_remove(struct platform_device *pdev)
 {
        struct drm_device *drm = platform_get_drvdata(pdev);
        struct v3d_dev *v3d = to_v3d_dev(drm);
+       struct device *dev = &pdev->dev;
+
+       v3d_sysfs_destroy(dev);
 
        drm_dev_unregister(drm);
 
index 8f9d93239a174d4322a346ad8e56cc238754f911..4c59fefaa0b4b18e27886e5d3335452b6f5cae9f 100644 (file)
@@ -38,6 +38,10 @@ struct v3d_queue_state {
 
        u64 fence_context;
        u64 emit_seqno;
+
+       u64 start_ns;
+       u64 enabled_ns;
+       u64 jobs_sent;
 };
 
 /* Performance monitor object. The perform lifetime is controlled by userspace
@@ -441,3 +445,7 @@ int v3d_perfmon_destroy_ioctl(struct drm_device *dev, void *data,
                              struct drm_file *file_priv);
 int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data,
                                 struct drm_file *file_priv);
+
+/* v3d_sysfs.c */
+int v3d_sysfs_init(struct device *dev);
+void v3d_sysfs_destroy(struct device *dev);
index a33e90f29bd52f86c7d77f5042abeb75ad245f62..712675134c048dcf7baf7d5d45d81f5c671ce3f0 100644 (file)
@@ -1014,8 +1014,12 @@ v3d_gem_init(struct drm_device *dev)
        u32 pt_size = 4096 * 1024;
        int ret, i;
 
-       for (i = 0; i < V3D_MAX_QUEUES; i++)
+       for (i = 0; i < V3D_MAX_QUEUES; i++) {
                v3d->queue[i].fence_context = dma_fence_context_alloc(1);
+               v3d->queue[i].start_ns = 0;
+               v3d->queue[i].enabled_ns = 0;
+               v3d->queue[i].jobs_sent = 0;
+       }
 
        spin_lock_init(&v3d->mm_lock);
        spin_lock_init(&v3d->job_lock);
index ba390b782103ac1d585797a610b83a1f85efaa76..afc76390a197a03fcfdd7804241eb60c6de1c9e6 100644 (file)
@@ -103,10 +103,17 @@ v3d_irq(int irq, void *arg)
                struct v3d_fence *fence =
                        to_v3d_fence(v3d->bin_job->base.irq_fence);
                struct v3d_file_priv *file = v3d->bin_job->base.file->driver_priv;
+               u64 runtime = local_clock() - file->start_ns[V3D_BIN];
 
                file->enabled_ns[V3D_BIN] += local_clock() - file->start_ns[V3D_BIN];
                file->jobs_sent[V3D_BIN]++;
+               v3d->queue[V3D_BIN].jobs_sent++;
+
                file->start_ns[V3D_BIN] = 0;
+               v3d->queue[V3D_BIN].start_ns = 0;
+
+               file->enabled_ns[V3D_BIN] += runtime;
+               v3d->queue[V3D_BIN].enabled_ns += runtime;
 
                trace_v3d_bcl_irq(&v3d->drm, fence->seqno);
                dma_fence_signal(&fence->base);
@@ -117,10 +124,17 @@ v3d_irq(int irq, void *arg)
                struct v3d_fence *fence =
                        to_v3d_fence(v3d->render_job->base.irq_fence);
                struct v3d_file_priv *file = v3d->render_job->base.file->driver_priv;
+               u64 runtime = local_clock() - file->start_ns[V3D_RENDER];
 
                file->enabled_ns[V3D_RENDER] += local_clock() - file->start_ns[V3D_RENDER];
                file->jobs_sent[V3D_RENDER]++;
+               v3d->queue[V3D_RENDER].jobs_sent++;
+
                file->start_ns[V3D_RENDER] = 0;
+               v3d->queue[V3D_RENDER].start_ns = 0;
+
+               file->enabled_ns[V3D_RENDER] += runtime;
+               v3d->queue[V3D_RENDER].enabled_ns += runtime;
 
                trace_v3d_rcl_irq(&v3d->drm, fence->seqno);
                dma_fence_signal(&fence->base);
@@ -131,10 +145,17 @@ v3d_irq(int irq, void *arg)
                struct v3d_fence *fence =
                        to_v3d_fence(v3d->csd_job->base.irq_fence);
                struct v3d_file_priv *file = v3d->csd_job->base.file->driver_priv;
+               u64 runtime = local_clock() - file->start_ns[V3D_CSD];
 
                file->enabled_ns[V3D_CSD] += local_clock() - file->start_ns[V3D_CSD];
                file->jobs_sent[V3D_CSD]++;
+               v3d->queue[V3D_CSD].jobs_sent++;
+
                file->start_ns[V3D_CSD] = 0;
+               v3d->queue[V3D_CSD].start_ns = 0;
+
+               file->enabled_ns[V3D_CSD] += runtime;
+               v3d->queue[V3D_CSD].enabled_ns += runtime;
 
                trace_v3d_csd_irq(&v3d->drm, fence->seqno);
                dma_fence_signal(&fence->base);
@@ -172,10 +193,17 @@ v3d_hub_irq(int irq, void *arg)
                struct v3d_fence *fence =
                        to_v3d_fence(v3d->tfu_job->base.irq_fence);
                struct v3d_file_priv *file = v3d->tfu_job->base.file->driver_priv;
+               u64 runtime = local_clock() - file->start_ns[V3D_TFU];
 
                file->enabled_ns[V3D_TFU] += local_clock() - file->start_ns[V3D_TFU];
                file->jobs_sent[V3D_TFU]++;
+               v3d->queue[V3D_TFU].jobs_sent++;
+
                file->start_ns[V3D_TFU] = 0;
+               v3d->queue[V3D_TFU].start_ns = 0;
+
+               file->enabled_ns[V3D_TFU] += runtime;
+               v3d->queue[V3D_TFU].enabled_ns += runtime;
 
                trace_v3d_tfu_irq(&v3d->drm, fence->seqno);
                dma_fence_signal(&fence->base);
index 19afceacda837f1037f6d9125744dcf0eb417ad8..fccbea2a5f2ebd5f5b6b100d710db1476544b16e 100644 (file)
@@ -110,6 +110,7 @@ static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
                            job->start, job->end);
 
        file->start_ns[V3D_BIN] = local_clock();
+       v3d->queue[V3D_BIN].start_ns = file->start_ns[V3D_BIN];
 
        v3d_switch_perfmon(v3d, &job->base);
 
@@ -164,6 +165,7 @@ static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job)
                            job->start, job->end);
 
        file->start_ns[V3D_RENDER] = local_clock();
+       v3d->queue[V3D_RENDER].start_ns = file->start_ns[V3D_RENDER];
 
        v3d_switch_perfmon(v3d, &job->base);
 
@@ -199,6 +201,7 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job)
        trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
 
        file->start_ns[V3D_TFU] = local_clock();
+       v3d->queue[V3D_TFU].start_ns = file->start_ns[V3D_TFU];
 
        V3D_WRITE(V3D_TFU_IIA(v3d->ver), job->args.iia);
        V3D_WRITE(V3D_TFU_IIS(v3d->ver), job->args.iis);
@@ -245,6 +248,7 @@ v3d_csd_job_run(struct drm_sched_job *sched_job)
        trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno);
 
        file->start_ns[V3D_CSD] = local_clock();
+       v3d->queue[V3D_CSD].start_ns = file->start_ns[V3D_CSD];
 
        v3d_switch_perfmon(v3d, &job->base);
 
@@ -264,14 +268,23 @@ v3d_cache_clean_job_run(struct drm_sched_job *sched_job)
        struct v3d_job *job = to_v3d_job(sched_job);
        struct v3d_dev *v3d = job->v3d;
        struct v3d_file_priv *file = job->file->driver_priv;
+       u64 runtime;
 
        file->start_ns[V3D_CACHE_CLEAN] = local_clock();
+       v3d->queue[V3D_CACHE_CLEAN].start_ns = file->start_ns[V3D_CACHE_CLEAN];
 
        v3d_clean_caches(v3d);
 
-       file->enabled_ns[V3D_CACHE_CLEAN] += local_clock() - file->start_ns[V3D_CACHE_CLEAN];
+       runtime = local_clock() - file->start_ns[V3D_CACHE_CLEAN];
+
+       file->enabled_ns[V3D_CACHE_CLEAN] += runtime;
+       v3d->queue[V3D_CACHE_CLEAN].enabled_ns += runtime;
+
        file->jobs_sent[V3D_CACHE_CLEAN]++;
+       v3d->queue[V3D_CACHE_CLEAN].jobs_sent++;
+
        file->start_ns[V3D_CACHE_CLEAN] = 0;
+       v3d->queue[V3D_CACHE_CLEAN].start_ns = 0;
 
        return NULL;
 }
diff --git a/drivers/gpu/drm/v3d/v3d_sysfs.c b/drivers/gpu/drm/v3d/v3d_sysfs.c
new file mode 100644 (file)
index 0000000..d106845
--- /dev/null
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Igalia S.L.
+ */
+
+#include <linux/sched/clock.h>
+#include <linux/sysfs.h>
+
+#include "v3d_drv.h"
+
+static ssize_t
+gpu_stats_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       struct drm_device *drm = dev_get_drvdata(dev);
+       struct v3d_dev *v3d = to_v3d_dev(drm);
+       enum v3d_queue queue;
+       u64 timestamp = local_clock();
+       u64 active_runtime;
+       ssize_t len = 0;
+
+       len += sysfs_emit(buf, "queue\ttimestamp\tjobs\truntime\n");
+
+       for (queue = 0; queue < V3D_MAX_QUEUES; queue++) {
+               if (v3d->queue[queue].start_ns)
+                       active_runtime = timestamp - v3d->queue[queue].start_ns;
+               else
+                       active_runtime = 0;
+
+               /* Each line will display the queue name, timestamp, the number
+                * of jobs sent to that queue and the runtime, as can be seem here:
+                *
+                * queue        timestamp       jobs    runtime
+                * bin          239043069420    22620   17438164056
+                * render       239043069420    22619   27284814161
+                * tfu          239043069420    8763    394592566
+                * csd          239043069420    3168    10787905530
+                * cache_clean  239043069420    6127    237375940
+                */
+               len += sysfs_emit_at(buf, len, "%s\t%llu\t%llu\t%llu\n",
+                                    v3d_queue_to_string(queue),
+                                    timestamp,
+                                    v3d->queue[queue].jobs_sent,
+                                    v3d->queue[queue].enabled_ns + active_runtime);
+       }
+
+       return len;
+}
+static DEVICE_ATTR_RO(gpu_stats);
+
+static struct attribute *v3d_sysfs_entries[] = {
+       &dev_attr_gpu_stats.attr,
+       NULL,
+};
+
+static struct attribute_group v3d_sysfs_attr_group = {
+       .attrs = v3d_sysfs_entries,
+};
+
+int
+v3d_sysfs_init(struct device *dev)
+{
+       return sysfs_create_group(&dev->kobj, &v3d_sysfs_attr_group);
+}
+
+void
+v3d_sysfs_destroy(struct device *dev)
+{
+       return sysfs_remove_group(&dev->kobj, &v3d_sysfs_attr_group);
+}