habanalabs: Increase queues depth
authorOfir Bitton <obitton@habana.ai>
Mon, 15 Jun 2020 09:09:50 +0000 (12:09 +0300)
committerOded Gabbay <oded.gabbay@gmail.com>
Fri, 24 Jul 2020 17:31:35 +0000 (20:31 +0300)
After recent concurrent cs amount increase, we must also
increase queues depth since much more concurrent work can be done.
All external queue depths were increased to 4096 as gaudi's
internal queue depths were also increased to 1024.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
drivers/misc/habanalabs/gaudi/gaudiP.h
drivers/misc/habanalabs/habanalabs.h
drivers/misc/habanalabs/hw_queue.c
drivers/misc/habanalabs/irq.c

index 3958fe38c8ee0e47b76e78dffd841efd4796bc77..bdc5f96085a7783c60a7e320216611098fda5d5e 100644 (file)
 
 /* Internal QMANs PQ sizes */
 
-#define MME_QMAN_LENGTH                        64
+#define MME_QMAN_LENGTH                        1024
 #define MME_QMAN_SIZE_IN_BYTES         (MME_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)
 
-#define HBM_DMA_QMAN_LENGTH            64
+#define HBM_DMA_QMAN_LENGTH            1024
 #define HBM_DMA_QMAN_SIZE_IN_BYTES     \
                                (HBM_DMA_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)
 
-#define TPC_QMAN_LENGTH                        64
+#define TPC_QMAN_LENGTH                        1024
 #define TPC_QMAN_SIZE_IN_BYTES         (TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)
 
 #define SRAM_USER_BASE_OFFSET  GAUDI_DRIVER_SRAM_RESERVED_SIZE_FROM_START
index 4e68a41cce77fd4b04fce83171d9771ca411ba8e..e4d6f7c9119400074c1cb2c386d0609a848a1d57 100644 (file)
@@ -378,38 +378,15 @@ struct hl_cb {
 
 struct hl_cs_job;
 
-/*
- * Currently, there are two limitations on the maximum length of a queue:
- *
- * 1. The memory footprint of the queue. The current allocated space for the
- *    queue is PAGE_SIZE. Because each entry in the queue is HL_BD_SIZE,
- *    the maximum length of the queue can be PAGE_SIZE / HL_BD_SIZE,
- *    which currently is 4096/16 = 256 entries.
- *
- *    To increase that, we need either to decrease the size of the
- *    BD (difficult), or allocate more than a single page (easier).
- *
- * 2. Because the size of the JOB handle field in the BD CTL / completion queue
- *    is 10-bit, we can have up to 1024 open jobs per hardware queue.
- *    Therefore, each queue can hold up to 1024 entries.
- *
- * HL_QUEUE_LENGTH is in units of struct hl_bd.
- * HL_QUEUE_LENGTH * sizeof(struct hl_bd) should be <= HL_PAGE_SIZE
- */
-
-#define HL_PAGE_SIZE                   4096 /* minimum page size */
-/* Must be power of 2 (HL_PAGE_SIZE / HL_BD_SIZE) */
-#define HL_QUEUE_LENGTH                        256
+/* Queue length of external and HW queues */
+#define HL_QUEUE_LENGTH                        4096
 #define HL_QUEUE_SIZE_IN_BYTES         (HL_QUEUE_LENGTH * HL_BD_SIZE)
 
-/*
- * HL_CQ_LENGTH is in units of struct hl_cq_entry.
- * HL_CQ_LENGTH should be <= HL_PAGE_SIZE
- */
+/* HL_CQ_LENGTH is in units of struct hl_cq_entry */
 #define HL_CQ_LENGTH                   HL_QUEUE_LENGTH
 #define HL_CQ_SIZE_IN_BYTES            (HL_CQ_LENGTH * HL_CQ_ENTRY_SIZE)
 
-/* Must be power of 2 (HL_PAGE_SIZE / HL_EQ_ENTRY_SIZE) */
+/* Must be power of 2 */
 #define HL_EQ_LENGTH                   64
 #define HL_EQ_SIZE_IN_BYTES            (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE)
 
index 27f0c34b63b9b03e5862c7a1740a5e4a25d68c95..f5a10a5ac3003435652da1131a491bf44b2dccfb 100644 (file)
@@ -780,8 +780,6 @@ static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
 {
        int rc;
 
-       BUILD_BUG_ON(HL_QUEUE_SIZE_IN_BYTES > HL_PAGE_SIZE);
-
        q->hw_queue_id = hw_queue_id;
 
        switch (q->queue_type) {
index 6981d67153b11c2118e3412a45b90f656a935500..7a4878edb1a3e1040c3d388471ab033b39cf977a 100644 (file)
@@ -220,8 +220,6 @@ int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id)
 {
        void *p;
 
-       BUILD_BUG_ON(HL_CQ_SIZE_IN_BYTES > HL_PAGE_SIZE);
-
        p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
                                &q->bus_address, GFP_KERNEL | __GFP_ZERO);
        if (!p)
@@ -282,8 +280,6 @@ int hl_eq_init(struct hl_device *hdev, struct hl_eq *q)
 {
        void *p;
 
-       BUILD_BUG_ON(HL_EQ_SIZE_IN_BYTES > HL_PAGE_SIZE);
-
        p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
                                                        HL_EQ_SIZE_IN_BYTES,
                                                        &q->bus_address);