habanalabs: count dropped CS because max CS in-flight
authorOded Gabbay <oded.gabbay@gmail.com>
Tue, 1 Sep 2020 08:22:05 +0000 (11:22 +0300)
committerOded Gabbay <oded.gabbay@gmail.com>
Tue, 22 Sep 2020 15:49:52 +0000 (18:49 +0300)
There is a case where the user reaches the maximum number of CS in-flight.
In that case, the driver rejects the new CS of the user with EAGAIN. Count
that event so the user can query the driver later to see if it happened.

Reviewed-by: Tomer Tayar <ttayar@habana.ai>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
drivers/misc/habanalabs/common/command_submission.c
include/uapi/misc/habanalabs.h

index a811a9fdf13b735965f5eb897664baffefcd5e9c..470bffbe9bdc7afb2e1b119447b9c785ec4dcf68 100644 (file)
@@ -252,6 +252,8 @@ static void cs_counters_aggregate(struct hl_device *hdev, struct hl_ctx *ctx)
                        ctx->cs_counters.parsing_drop_cnt;
        hdev->aggregated_cs_counters.queue_full_drop_cnt +=
                        ctx->cs_counters.queue_full_drop_cnt;
+       hdev->aggregated_cs_counters.max_cs_in_flight_drop_cnt +=
+                       ctx->cs_counters.max_cs_in_flight_drop_cnt;
 }
 
 static void cs_do_release(struct kref *ref)
@@ -431,8 +433,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
                                (hdev->asic_prop.max_pending_cs - 1)];
 
        if (other && !completion_done(&other->completion)) {
-               dev_dbg(hdev->dev,
+               dev_dbg_ratelimited(hdev->dev,
                        "Rejecting CS because of too many in-flights CS\n");
+               ctx->cs_counters.max_cs_in_flight_drop_cnt++;
                rc = -EAGAIN;
                goto free_fence;
        }
index a2dcad29340feb4a8635d3469dbac8b03f07f8b0..69fb44d352922f98b678fcd946156ffb7db3b2b4 100644 (file)
@@ -401,12 +401,14 @@ struct hl_info_sync_manager {
  * @parsing_drop_cnt: dropped due to error in packet parsing
  * @queue_full_drop_cnt: dropped due to queue full
  * @device_in_reset_drop_cnt: dropped due to device in reset
+ * @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight
  */
 struct hl_cs_counters {
        __u64 out_of_mem_drop_cnt;
        __u64 parsing_drop_cnt;
        __u64 queue_full_drop_cnt;
        __u64 device_in_reset_drop_cnt;
+       __u64 max_cs_in_flight_drop_cnt;
 };
 
 struct hl_info_cs_counters {