drm/amdgpu: Convert to common fdinfo format v5

author Christian König <christian.koenig@amd.com>

Wed, 11 May 2022 09:06:26 +0000 (11:06 +0200)

committer Alex Deucher <alexander.deucher@amd.com>

Thu, 26 May 2022 18:56:34 +0000 (14:56 -0400)
author Christian König <christian.koenig@amd.com>
Wed, 11 May 2022 09:06:26 +0000 (11:06 +0200)
committer Alex Deucher <alexander.deucher@amd.com>
Thu, 26 May 2022 18:56:34 +0000 (14:56 -0400)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

index a61e4c83a545a141fa87c3e021b1e97e65e6fb2d..7dc92ef36b2b06fa444e71c6962f71f32433128f 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -162,17 +162,50 @@ static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
         return hw_prio;
  }
  
+/* Calculate the time spend on the hw */
+static ktime_t amdgpu_ctx_fence_time(struct dma_fence *fence)
+{
+       struct drm_sched_fence *s_fence;
+
+       if (!fence)
+               return ns_to_ktime(0);
+
+       /* When the fence is not even scheduled it can't have spend time */
+       s_fence = to_drm_sched_fence(fence);
+       if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->scheduled.flags))
+               return ns_to_ktime(0);
+
+       /* When it is still running account how much already spend */
+       if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->finished.flags))
+               return ktime_sub(ktime_get(), s_fence->scheduled.timestamp);
+
+       return ktime_sub(s_fence->finished.timestamp,
+                        s_fence->scheduled.timestamp);
+}
+
+static ktime_t amdgpu_ctx_entity_time(struct amdgpu_ctx *ctx,
+                                     struct amdgpu_ctx_entity *centity)
+{
+       ktime_t res = ns_to_ktime(0);
+       uint32_t i;
+
+       spin_lock(&ctx->ring_lock);
+       for (i = 0; i < amdgpu_sched_jobs; i++) {
+               res = ktime_add(res, amdgpu_ctx_fence_time(centity->fences[i]));
+       }
+       spin_unlock(&ctx->ring_lock);
+       return res;
+}
  
  static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
                                   const u32 ring)
  {
+       struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
         struct amdgpu_device *adev = ctx->mgr->adev;
         struct amdgpu_ctx_entity *entity;
-       struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
-       unsigned num_scheds = 0;
-       int32_t ctx_prio;
-       unsigned int hw_prio;
         enum drm_sched_priority drm_prio;
+       unsigned int hw_prio, num_scheds;
+       int32_t ctx_prio;
         int r;
  
         entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs),
@@ -182,6 +215,7 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
  
         ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
                         ctx->init_priority : ctx->override_priority;
+       entity->hw_ip = hw_ip;
         entity->sequence = 1;
         hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
         drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio);
@@ -220,6 +254,23 @@ error_free_entity:
         return r;
  }
  
+static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
+{
+       ktime_t res = ns_to_ktime(0);
+       int i;
+
+       if (!entity)
+               return res;
+
+       for (i = 0; i < amdgpu_sched_jobs; ++i) {
+               res = ktime_add(res, amdgpu_ctx_fence_time(entity->fences[i]));
+               dma_fence_put(entity->fences[i]);
+       }
+
+       kfree(entity);
+       return res;
+}
+
  static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
                            struct drm_file *filp, struct amdgpu_ctx *ctx)
  {
@@ -246,20 +297,6 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
         return 0;
  }
  
-static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
-{
-
-       int i;
-
-       if (!entity)
-               return;
-
-       for (i = 0; i < amdgpu_sched_jobs; ++i)
-               dma_fence_put(entity->fences[i]);
-
-       kfree(entity);
-}
-
  static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
                                         u32 *stable_pstate)
  {
@@ -351,8 +388,10 @@ static void amdgpu_ctx_fini(struct kref *ref)
  
         for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
                 for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
-                       amdgpu_ctx_fini_entity(ctx->entities[i][j]);
-                       ctx->entities[i][j] = NULL;
+                       ktime_t spend;
+
+                       spend = amdgpu_ctx_fini_entity(ctx->entities[i][j]);
+                       atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]);
                 }
         }
  
@@ -689,6 +728,9 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
         centity->sequence++;
         spin_unlock(&ctx->ring_lock);
  
+       atomic64_add(ktime_to_ns(amdgpu_ctx_fence_time(other)),
+                    &ctx->mgr->time_spend[centity->hw_ip]);
+
         dma_fence_put(other);
         return seq;
  }
@@ -795,9 +837,14 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
  void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
                          struct amdgpu_device *adev)
  {
+       unsigned int i;
+
         mgr->adev = adev;
         mutex_init(&mgr->lock);
         idr_init(&mgr->ctx_handles);
+
+       for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
+               atomic64_set(&mgr->time_spend[i], 0);
  }
  
  long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
@@ -873,80 +920,38 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
         mutex_destroy(&mgr->lock);
  }
  
-static void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx,
-               struct amdgpu_ctx_entity *centity, ktime_t *total, ktime_t *max)
-{
-       ktime_t now, t1;
-       uint32_t i;
-
-       *total = *max = 0;
-
-       now = ktime_get();
-       for (i = 0; i < amdgpu_sched_jobs; i++) {
-               struct dma_fence *fence;
-               struct drm_sched_fence *s_fence;
-
-               spin_lock(&ctx->ring_lock);
-               fence = dma_fence_get(centity->fences[i]);
-               spin_unlock(&ctx->ring_lock);
-               if (!fence)
-                       continue;
-               s_fence = to_drm_sched_fence(fence);
-               if (!dma_fence_is_signaled(&s_fence->scheduled)) {
-                       dma_fence_put(fence);
-                       continue;
-               }
-               t1 = s_fence->scheduled.timestamp;
-               if (!ktime_before(t1, now)) {
-                       dma_fence_put(fence);
-                       continue;
-               }
-               if (dma_fence_is_signaled(&s_fence->finished) &&
-                       s_fence->finished.timestamp < now)
-                       *total += ktime_sub(s_fence->finished.timestamp, t1);
-               else
-                       *total += ktime_sub(now, t1);
-               t1 = ktime_sub(now, t1);
-               dma_fence_put(fence);
-               *max = max(t1, *max);
-       }
-}
-
-ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
-               uint32_t idx, uint64_t *elapsed)
+void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
+                         ktime_t usage[AMDGPU_HW_IP_NUM])
  {
-       struct idr *idp;
         struct amdgpu_ctx *ctx;
+       unsigned int hw_ip, i;
         uint32_t id;
-       struct amdgpu_ctx_entity *centity;
-       ktime_t total = 0, max = 0;
  
-       if (idx >= AMDGPU_MAX_ENTITY_NUM)
-               return 0;
-       idp = &mgr->ctx_handles;
+       /*
+        * This is a little bit racy because it can be that a ctx or a fence are
+        * destroyed just in the moment we try to account them. But that is ok
+        * since exactly that case is explicitely allowed by the interface.
+        */
         mutex_lock(&mgr->lock);
-       idr_for_each_entry(idp, ctx, id) {
-               ktime_t ttotal, tmax;
+       for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
+               uint64_t ns = atomic64_read(&mgr->time_spend[hw_ip]);
  
-               if (!ctx->entities[hwip][idx])
-                       continue;
+               usage[hw_ip] = ns_to_ktime(ns);
+       }
  
-               centity = ctx->entities[hwip][idx];
-               amdgpu_ctx_fence_time(ctx, centity, &ttotal, &tmax);
+       idr_for_each_entry(&mgr->ctx_handles, ctx, id) {
+               for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
+                       for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) {
+                               struct amdgpu_ctx_entity *centity;
+                               ktime_t spend;
  
-               /* Harmonic mean approximation diverges for very small
-                * values. If ratio < 0.01% ignore
-                */
-               if (AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(tmax, ttotal))
-                       continue;
-
-               total = ktime_add(total, ttotal);
-               max = ktime_after(tmax, max) ? tmax : max;
+                               centity = ctx->entities[hw_ip][i];
+                               if (!centity)
+                                       continue;
+                               spend = amdgpu_ctx_entity_time(ctx, centity);
+                               usage[hw_ip] = ktime_add(usage[hw_ip], spend);
+                       }
+               }
         }
-
         mutex_unlock(&mgr->lock);
-       if (elapsed)
-               *elapsed = max;
-
-       return total;
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h

index 681050bc828c3741675f07085f9b30a00a0e247d..cc7c8afff4144ccac981315a4e2657974e2d3ba3 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -23,16 +23,20 @@
  #ifndef __AMDGPU_CTX_H__
  #define __AMDGPU_CTX_H__
  
+#include <linux/ktime.h>
+#include <linux/types.h>
+
  #include "amdgpu_ring.h"
  
  struct drm_device;
  struct drm_file;
  struct amdgpu_fpriv;
+struct amdgpu_ctx_mgr;
  
  #define AMDGPU_MAX_ENTITY_NUM 4
-#define AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(max, total) ((max) > 16384ULL*(total))
  
  struct amdgpu_ctx_entity {
+       uint32_t                hw_ip;
         uint64_t                sequence;
         struct drm_sched_entity entity;
         struct dma_fence        *fences[];
@@ -61,6 +65,7 @@ struct amdgpu_ctx_mgr {
         struct mutex            lock;
         /* protected by lock */
         struct idr              ctx_handles;
+       atomic64_t              time_spend[AMDGPU_HW_IP_NUM];
  };
  
  extern const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM];
@@ -90,6 +95,7 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
  void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
  long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout);
  void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
-ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
-               uint32_t idx, uint64_t *elapsed);
+void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
+                         ktime_t usage[AMDGPU_HW_IP_NUM]);
+
  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c

index 5a6857c44bb6698f1618618ac2f150d03be9579e..4d453845235cc1011dc7414dcc997568e2cabd61 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
@@ -32,6 +32,7 @@
  
  #include <drm/amdgpu_drm.h>
  #include <drm/drm_debugfs.h>
+#include <drm/drm_drv.h>
  
  #include "amdgpu.h"
  #include "amdgpu_vm.h"
@@ -54,26 +55,23 @@ static const char *amdgpu_ip_name[AMDGPU_HW_IP_NUM] = {
  
  void amdgpu_show_fdinfo(struct seq_file *m, struct file *f)
  {
-       struct amdgpu_fpriv *fpriv;
-       uint32_t bus, dev, fn, i, domain;
-       uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0;
         struct drm_file *file = f->private_data;
         struct amdgpu_device *adev = drm_to_adev(file->minor->dev);
+       struct amdgpu_fpriv *fpriv = file->driver_priv;
+
+       uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0;
+       ktime_t usage[AMDGPU_HW_IP_NUM];
+       uint32_t bus, dev, fn, domain;
         struct amdgpu_bo *root;
+       unsigned int hw_ip;
         int ret;
  
-       ret = amdgpu_file_to_fpriv(f, &fpriv);
-       if (ret)
-               return;
         bus = adev->pdev->bus->number;
         domain = pci_domain_nr(adev->pdev->bus);
         dev = PCI_SLOT(adev->pdev->devfn);
         fn = PCI_FUNC(adev->pdev->devfn);
  
-       root = amdgpu_bo_ref(fpriv->vm.root.bo);
-       if (!root)
-               return;
-
+       root = fpriv->vm.root.bo;
         ret = amdgpu_bo_reserve(root, false);
         if (ret) {
                 DRM_ERROR("Fail to reserve bo\n");
@@ -81,31 +79,26 @@ void amdgpu_show_fdinfo(struct seq_file *m, struct file *f)
         }
         amdgpu_vm_get_memory(&fpriv->vm, &vram_mem, &gtt_mem, &cpu_mem);
         amdgpu_bo_unreserve(root);
-       amdgpu_bo_unref(&root);
  
-       seq_printf(m, "pdev:\t%04x:%02x:%02x.%d\npasid:\t%u\n", domain, bus,
-                       dev, fn, fpriv->vm.pasid);
-       seq_printf(m, "vram mem:\t%llu kB\n", vram_mem/1024UL);
-       seq_printf(m, "gtt mem:\t%llu kB\n", gtt_mem/1024UL);
-       seq_printf(m, "cpu mem:\t%llu kB\n", cpu_mem/1024UL);
-       for (i = 0; i < AMDGPU_HW_IP_NUM; i++) {
-               uint32_t count = amdgpu_ctx_num_entities[i];
-               int idx = 0;
-               uint64_t total = 0, min = 0;
-               uint32_t perc, frac;
+       amdgpu_ctx_mgr_usage(&fpriv->ctx_mgr, usage);
  
-               for (idx = 0; idx < count; idx++) {
-                       total = amdgpu_ctx_mgr_fence_usage(&fpriv->ctx_mgr,
-                               i, idx, &min);
-                       if ((total == 0) || (min == 0))
-                               continue;
+       /*
+        * ******************************************************************
+        * For text output format description please see drm-usage-stats.rst!
+        * ******************************************************************
+        */
  
-                       perc = div64_u64(10000 * total, min);
-                       frac = perc % 100;
+       seq_printf(m, "pasid:\t%u\n", fpriv->vm.pasid);
+       seq_printf(m, "drm-driver:\t%s\n", file->minor->dev->driver->name);
+       seq_printf(m, "drm-pdev:\t%04x:%02x:%02x.%d\n", domain, bus, dev, fn);
+       seq_printf(m, "drm-memory-vram:\t%llu KiB\n", vram_mem/1024UL);
+       seq_printf(m, "drm-memory-gtt: \t%llu KiB\n", gtt_mem/1024UL);
+       seq_printf(m, "drm-memory-cpu: \t%llu KiB\n", cpu_mem/1024UL);
+       for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
+               if (!usage[hw_ip])
+                       continue;
  
-                       seq_printf(m, "%s%d:\t%d.%d%%\n",
-                                       amdgpu_ip_name[i],
-                                       idx, perc/100, frac);
-               }
+               seq_printf(m, "drm-engine-%s:\t%Ld ns\n", amdgpu_ip_name[hw_ip],
+                          ktime_to_ns(usage[hw_ip]));
         }
  }
author	Christian König <christian.koenig@amd.com>
	Wed, 11 May 2022 09:06:26 +0000 (11:06 +0200)
committer	Alex Deucher <alexander.deucher@amd.com>
	Thu, 26 May 2022 18:56:34 +0000 (14:56 -0400)
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c		patch \| blob \| history