accel/habanalabs: add event queue extra validation
authorOfir Bitton <obitton@habana.ai>
Sun, 21 May 2023 07:24:13 +0000 (10:24 +0300)
committerOded Gabbay <ogabbay@kernel.org>
Thu, 8 Jun 2023 09:35:56 +0000 (12:35 +0300)
In order to increase reliability of the event queue interface,
we apply to Gaudi2 the same mechanism we have in Gaudi1.
The extra validation is basically checking that the received
event index matches the expected index.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
drivers/accel/habanalabs/common/irq.c
drivers/accel/habanalabs/gaudi2/gaudi2.c

index c67895b1cdeb18b91222bff22647d03a8d2d81e7..b1010d206c2ef1fa95914fd1aa92b2d1772e7b0a 100644 (file)
@@ -430,7 +430,7 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg)
                cur_eqe_index = FIELD_GET(EQ_CTL_INDEX_MASK, cur_eqe);
                if ((hdev->event_queue.check_eqe_index) &&
                                (((eq->prev_eqe_index + 1) & EQ_CTL_INDEX_MASK) != cur_eqe_index)) {
-                       dev_dbg(hdev->dev,
+                       dev_err(hdev->dev,
                                "EQE %#x in queue is ready but index does not match %d!=%d",
                                cur_eqe,
                                ((eq->prev_eqe_index + 1) & EQ_CTL_INDEX_MASK),
index 0d41adf4792cd6395acd7bee33a7f5671e96cda9..20c4583f12b0d20cfc58207e2ba67bcef08e5ba1 100644 (file)
@@ -3619,6 +3619,12 @@ static int gaudi2_sw_init(struct hl_device *hdev)
 
        prop->supports_compute_reset = true;
 
+       /* Event queue sanity check added in FW version 1.11 */
+       if (hl_is_fw_sw_ver_below(hdev, 1, 11))
+               hdev->event_queue.check_eqe_index = false;
+       else
+               hdev->event_queue.check_eqe_index = true;
+
        hdev->asic_funcs->set_pci_memory_regions(hdev);
 
        rc = gaudi2_special_blocks_iterator_config(hdev);