Bluetooth: Add ncmd=0 recovery handling
authorManish Mandlik <mmandlik@google.com>
Thu, 29 Apr 2021 17:24:22 +0000 (10:24 -0700)
committerMarcel Holtmann <marcel@holtmann.org>
Sat, 26 Jun 2021 05:12:27 +0000 (07:12 +0200)
During command status or command complete event, the controller may set
ncmd=0 indicating that it is not accepting any more commands. In such a
case, host holds off sending any more commands to the controller. If the
controller doesn't recover from such condition, host will wait forever,
until the user decides that the Bluetooth is broken and may power cycles
the Bluetooth.

This patch triggers the hardware error to reset the controller and
driver when it gets into such state as there is no other wat out.

Reviewed-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Signed-off-by: Manish Mandlik <mmandlik@google.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
include/net/bluetooth/hci.h
include/net/bluetooth/hci_core.h
net/bluetooth/hci_core.c
net/bluetooth/hci_event.c

index ea4ae551c426872b1ac2ce6b8a0d0a375d8b8225..c4b0650fb9ae85b41eaa9aaac245af8ae4ec7997 100644 (file)
@@ -339,6 +339,7 @@ enum {
 #define HCI_PAIRING_TIMEOUT    msecs_to_jiffies(60000) /* 60 seconds */
 #define HCI_INIT_TIMEOUT       msecs_to_jiffies(10000) /* 10 seconds */
 #define HCI_CMD_TIMEOUT                msecs_to_jiffies(2000)  /* 2 seconds */
+#define HCI_NCMD_TIMEOUT       msecs_to_jiffies(4000)  /* 4 seconds */
 #define HCI_ACL_TX_TIMEOUT     msecs_to_jiffies(45000) /* 45 seconds */
 #define HCI_AUTO_OFF_TIMEOUT   msecs_to_jiffies(2000)  /* 2 seconds */
 #define HCI_POWER_OFF_TIMEOUT  msecs_to_jiffies(5000)  /* 5 seconds */
index 8f5f390363f54107c39e9e1e93f537425e279660..43b08bebae74e312b1693450a82ff20ccae20474 100644 (file)
@@ -470,6 +470,7 @@ struct hci_dev {
        struct delayed_work     service_cache;
 
        struct delayed_work     cmd_timer;
+       struct delayed_work     ncmd_timer;
 
        struct work_struct      rx_work;
        struct work_struct      cmd_work;
index 25484bb0773d5276e4f866905411935a077ff4e7..572f2362ddb72caab603a8a5ebf7ca8114c8f43c 100644 (file)
@@ -1730,6 +1730,7 @@ int hci_dev_do_close(struct hci_dev *hdev)
        }
 
        cancel_delayed_work(&hdev->power_off);
+       cancel_delayed_work(&hdev->ncmd_timer);
 
        hci_request_cancel_all(hdev);
        hci_req_sync_lock(hdev);
@@ -2777,6 +2778,24 @@ static void hci_cmd_timeout(struct work_struct *work)
        queue_work(hdev->workqueue, &hdev->cmd_work);
 }
 
+/* HCI ncmd timer function */
+static void hci_ncmd_timeout(struct work_struct *work)
+{
+       struct hci_dev *hdev = container_of(work, struct hci_dev,
+                                           ncmd_timer.work);
+
+       bt_dev_err(hdev, "Controller not accepting commands anymore: ncmd = 0");
+
+       /* During HCI_INIT phase no events can be injected if the ncmd timer
+        * triggers since the procedure has its own timeout handling.
+        */
+       if (test_bit(HCI_INIT, &hdev->flags))
+               return;
+
+       /* This is an irrecoverable state, inject hardware error event */
+       hci_reset_dev(hdev);
+}
+
 struct oob_data *hci_find_remote_oob_data(struct hci_dev *hdev,
                                          bdaddr_t *bdaddr, u8 bdaddr_type)
 {
@@ -3841,6 +3860,7 @@ struct hci_dev *hci_alloc_dev(void)
        init_waitqueue_head(&hdev->suspend_wait_q);
 
        INIT_DELAYED_WORK(&hdev->cmd_timer, hci_cmd_timeout);
+       INIT_DELAYED_WORK(&hdev->ncmd_timer, hci_ncmd_timeout);
 
        hci_request_setup(hdev);
 
@@ -4078,6 +4098,8 @@ int hci_reset_dev(struct hci_dev *hdev)
        hci_skb_pkt_type(skb) = HCI_EVENT_PKT;
        skb_put_data(skb, hw_err, 3);
 
+       bt_dev_err(hdev, "Injecting HCI hardware error event");
+
        /* Send Hardware Error to upper stack */
        return hci_recv_frame(hdev, skb);
 }
index 59c5329354e122d6d66809c0e0e390e284961713..18339ebc595976b70221ebf1defdb9a24babb3c0 100644 (file)
@@ -3268,6 +3268,23 @@ unlock:
        hci_dev_unlock(hdev);
 }
 
+static inline void handle_cmd_cnt_and_timer(struct hci_dev *hdev,
+                                           u16 opcode, u8 ncmd)
+{
+       if (opcode != HCI_OP_NOP)
+               cancel_delayed_work(&hdev->cmd_timer);
+
+       if (!test_bit(HCI_RESET, &hdev->flags)) {
+               if (ncmd) {
+                       cancel_delayed_work(&hdev->ncmd_timer);
+                       atomic_set(&hdev->cmd_cnt, 1);
+               } else {
+                       schedule_delayed_work(&hdev->ncmd_timer,
+                                             HCI_NCMD_TIMEOUT);
+               }
+       }
+}
+
 static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
                                 u16 *opcode, u8 *status,
                                 hci_req_complete_t *req_complete,
@@ -3630,11 +3647,7 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
                break;
        }
 
-       if (*opcode != HCI_OP_NOP)
-               cancel_delayed_work(&hdev->cmd_timer);
-
-       if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags))
-               atomic_set(&hdev->cmd_cnt, 1);
+       handle_cmd_cnt_and_timer(hdev, *opcode, ev->ncmd);
 
        hci_req_cmd_complete(hdev, *opcode, *status, req_complete,
                             req_complete_skb);
@@ -3735,11 +3748,7 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb,
                break;
        }
 
-       if (*opcode != HCI_OP_NOP)
-               cancel_delayed_work(&hdev->cmd_timer);
-
-       if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags))
-               atomic_set(&hdev->cmd_cnt, 1);
+       handle_cmd_cnt_and_timer(hdev, *opcode, ev->ncmd);
 
        /* Indicate request completion if the command failed. Also, if
         * we're not waiting for a special event and we get a success