From fac91dd54f3b6f5dbd20c1cb26e59eceb128ca42 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Sun, 21 May 2023 10:24:13 +0300 Subject: [PATCH] accel/habanalabs: add event queue extra validation In order to increase reliability of the event queue interface, we apply to Gaudi2 the same mechanism we have in Gaudi1. The extra validation is basically checking that the received event index matches the expected index. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/common/irq.c | 2 +- drivers/accel/habanalabs/gaudi2/gaudi2.c | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/accel/habanalabs/common/irq.c b/drivers/accel/habanalabs/common/irq.c index c67895b1cdeb1..b1010d206c2ef 100644 --- a/drivers/accel/habanalabs/common/irq.c +++ b/drivers/accel/habanalabs/common/irq.c @@ -430,7 +430,7 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg) cur_eqe_index = FIELD_GET(EQ_CTL_INDEX_MASK, cur_eqe); if ((hdev->event_queue.check_eqe_index) && (((eq->prev_eqe_index + 1) & EQ_CTL_INDEX_MASK) != cur_eqe_index)) { - dev_dbg(hdev->dev, + dev_err(hdev->dev, "EQE %#x in queue is ready but index does not match %d!=%d", cur_eqe, ((eq->prev_eqe_index + 1) & EQ_CTL_INDEX_MASK), diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index 0d41adf4792cd..20c4583f12b0d 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -3619,6 +3619,12 @@ static int gaudi2_sw_init(struct hl_device *hdev) prop->supports_compute_reset = true; + /* Event queue sanity check added in FW version 1.11 */ + if (hl_is_fw_sw_ver_below(hdev, 1, 11)) + hdev->event_queue.check_eqe_index = false; + else + hdev->event_queue.check_eqe_index = true; + hdev->asic_funcs->set_pci_memory_regions(hdev); rc = gaudi2_special_blocks_iterator_config(hdev); -- 2.30.2