accel/habanalabs: fix bug in decoder wait for cs completion
authorfarah kassabri <fkassabri@habana.ai>
Wed, 20 Sep 2023 09:22:02 +0000 (12:22 +0300)
committerOded Gabbay <ogabbay@kernel.org>
Mon, 9 Oct 2023 09:37:24 +0000 (12:37 +0300)
The decoder interrupts are handled in the interrupt context
same as all user interrupts.
In such case, the wait list should be protected by
spin_lock_irqsave in order to avoid deadlock that might happen
with the user submission flow.

Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
drivers/accel/habanalabs/common/command_submission.c

index 4f7b70d9754c314a7945adecae6833af89a5ef77..3aa6eeef443b4174466e8e397e30795da293091a 100644 (file)
@@ -3526,7 +3526,7 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
                                u64 *timestamp)
 {
        struct hl_user_pending_interrupt *pend;
-       unsigned long timeout;
+       unsigned long timeout, flags;
        u64 completion_value;
        long completion_rc;
        int rc = 0;
@@ -3546,9 +3546,9 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
        /* Add pending user interrupt to relevant list for the interrupt
         * handler to monitor
         */
-       spin_lock(&interrupt->wait_list_lock);
+       spin_lock_irqsave(&interrupt->wait_list_lock, flags);
        list_add_tail(&pend->list_node, &interrupt->wait_list_head);
-       spin_unlock(&interrupt->wait_list_lock);
+       spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 
        /* We check for completion value as interrupt could have been received
         * before we added the node to the wait list
@@ -3579,14 +3579,14 @@ wait_again:
         * If comparison fails, keep waiting until timeout expires
         */
        if (completion_rc > 0) {
-               spin_lock(&interrupt->wait_list_lock);
+               spin_lock_irqsave(&interrupt->wait_list_lock, flags);
                /* reinit_completion must be called before we check for user
                 * completion value, otherwise, if interrupt is received after
                 * the comparison and before the next wait_for_completion,
                 * we will reach timeout and fail
                 */
                reinit_completion(&pend->fence.completion);
-               spin_unlock(&interrupt->wait_list_lock);
+               spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 
                if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
                        dev_err(hdev->dev, "Failed to copy completion value from user\n");
@@ -3623,9 +3623,9 @@ wait_again:
        }
 
 remove_pending_user_interrupt:
-       spin_lock(&interrupt->wait_list_lock);
+       spin_lock_irqsave(&interrupt->wait_list_lock, flags);
        list_del(&pend->list_node);
-       spin_unlock(&interrupt->wait_list_lock);
+       spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 
        *timestamp = ktime_to_ns(pend->fence.timestamp);