struct ath10k_htc *htc = &ar->htc;
        struct ath10k_sdio_rx_data *pkt;
        struct ath10k_htc_ep *ep;
+       struct ath10k_skb_rxcb *cb;
        enum ath10k_htc_ep_id id;
        int ret, i, *n_lookahead_local;
        u32 *lookaheads_local;
                if (ret)
                        goto out;
 
-               if (!pkt->trailer_only)
-                       ep->ep_ops.ep_rx_complete(ar_sdio->ar, pkt->skb);
-               else
+               if (!pkt->trailer_only) {
+                       cb = ATH10K_SKB_RXCB(pkt->skb);
+                       cb->eid = id;
+
+                       skb_queue_tail(&ar_sdio->rx_head, pkt->skb);
+                       queue_work(ar->workqueue_aux,
+                                  &ar_sdio->async_work_rx);
+               } else {
                        kfree_skb(pkt->skb);
+               }
 
                /* The RX complete handler now owns the skb...*/
                pkt->skb = NULL;
        ath10k_sdio_free_bus_req(ar, req);
 }
 
+/* To improve throughput use workqueue to deliver packets to HTC layer,
+ * this way SDIO bus is utilised much better.
+ */
+static void ath10k_rx_indication_async_work(struct work_struct *work)
+{
+       struct ath10k_sdio *ar_sdio = container_of(work, struct ath10k_sdio,
+                                                  async_work_rx);
+       struct ath10k *ar = ar_sdio->ar;
+       struct ath10k_htc_ep *ep;
+       struct ath10k_skb_rxcb *cb;
+       struct sk_buff *skb;
+
+       while (true) {
+               skb = skb_dequeue(&ar_sdio->rx_head);
+               if (!skb)
+                       break;
+               cb = ATH10K_SKB_RXCB(skb);
+               ep = &ar->htc.endpoint[cb->eid];
+               ep->ep_ops.ep_rx_complete(ar, skb);
+       }
+}
+
 static void ath10k_sdio_write_async_work(struct work_struct *work)
 {
        struct ath10k_sdio *ar_sdio = container_of(work, struct ath10k_sdio,
        for (i = 0; i < ATH10K_SDIO_BUS_REQUEST_MAX_NUM; i++)
                ath10k_sdio_free_bus_req(ar, &ar_sdio->bus_req[i]);
 
+       skb_queue_head_init(&ar_sdio->rx_head);
+       INIT_WORK(&ar_sdio->async_work_rx, ath10k_rx_indication_async_work);
+
        dev_id_base = FIELD_GET(QCA_MANUFACTURER_ID_BASE, id->device);
        switch (dev_id_base) {
        case QCA_MANUFACTURER_ID_AR6005_BASE:
 
        struct ath10k_sdio_bus_request bus_req[ATH10K_SDIO_BUS_REQUEST_MAX_NUM];
        /* free list of bus requests */
        struct list_head bus_req_freeq;
+
+       struct sk_buff_head rx_head;
+
        /* protects access to bus_req_freeq */
        spinlock_t lock;
 
        struct list_head wr_asyncq;
        /* protects access to wr_asyncq */
        spinlock_t wr_async_lock;
+
+       struct work_struct async_work_rx;
 };
 
 static inline struct ath10k_sdio *ath10k_sdio_priv(struct ath10k *ar)