#define HE_PREP(f, m, v)       le16_encode_bits(le32_get_bits(v, MT_CRXV_HE_##m),\
                                                 IEEE80211_RADIOTAP_HE_##f)
 
+static u8
+mt7921_next_pid(struct mt7921_dev *dev, struct mt76_wcid *wcid)
+{
+       wcid->packet_id = (wcid->packet_id + 1) & MT_PACKET_ID_MASK;
+       if (wcid->packet_id == MT_PACKET_ID_NO_ACK ||
+           wcid->packet_id == MT_PACKET_ID_NO_SKB)
+               wcid->packet_id = MT_PACKET_ID_FIRST;
+       return wcid->packet_id;
+}
+
+static unsigned long
+mt7921_next_txs_set(struct mt7921_dev *dev, struct mt76_wcid *wcid,
+                   u32 timeout)
+{
+       struct mt7921_sta *msta;
+
+       msta = container_of(wcid, struct mt7921_sta, wcid);
+       msta->next_txs_ts = jiffies + msecs_to_jiffies(timeout);
+       return msta->next_txs_ts;
+}
+
+static bool
+mt7921_next_txs_timeout(struct mt7921_dev *dev, struct mt76_wcid *wcid)
+{
+       struct mt7921_sta *msta;
+
+       msta = container_of(wcid, struct mt7921_sta, wcid);
+       return time_is_before_jiffies(msta->next_txs_ts);
+}
+
 static struct mt76_wcid *mt7921_rx_get_wcid(struct mt7921_dev *dev,
                                            u16 idx, bool unicast)
 {
        struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
        struct ieee80211_vif *vif = info->control.vif;
        struct mt76_phy *mphy = &dev->mphy;
-       u8 p_fmt, q_idx, omac_idx = 0, wmm_idx = 0;
+       u8 pid, p_fmt, q_idx, omac_idx = 0, wmm_idx = 0;
        bool is_8023 = info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP;
        u16 tx_count = 15;
        u32 val;
                txwi[6] |= cpu_to_le32(val);
                txwi[3] |= cpu_to_le32(MT_TXD3_BA_DISABLE);
        }
+
+       if ((FIELD_GET(MT_TXD2_FRAME_TYPE, txwi[2]) &
+               (IEEE80211_FTYPE_DATA >> 2)) &&
+               mt7921_next_txs_timeout(dev, wcid)) {
+               mt7921_next_txs_set(dev, wcid, 250);
+               pid = mt7921_next_pid(dev, wcid);
+               val = MT_TXD5_TX_STATUS_MCU | FIELD_PREP(MT_TXD5_PID, pid);
+               txwi[5] |= cpu_to_le32(val);
+       }
 }
 
 static void
        }
 }
 
-static void
-mt7921_mac_sta_stats_work(struct mt7921_phy *phy)
-{
-       struct mt7921_dev *dev = phy->dev;
-       struct mt7921_sta *msta;
-       LIST_HEAD(list);
-
-       spin_lock_bh(&dev->sta_poll_lock);
-       list_splice_init(&phy->stats_list, &list);
-
-       while (!list_empty(&list)) {
-               msta = list_first_entry(&list, struct mt7921_sta, stats_list);
-               list_del_init(&msta->stats_list);
-               spin_unlock_bh(&dev->sta_poll_lock);
-
-               /* query wtbl info to report tx rate for further devices */
-               mt7921_get_wtbl_info(dev, msta->wcid.idx);
-
-               spin_lock_bh(&dev->sta_poll_lock);
-       }
-
-       spin_unlock_bh(&dev->sta_poll_lock);
-}
-
 void mt7921_mac_work(struct work_struct *work)
 {
        struct mt7921_phy *phy;
 
                mt7921_mac_update_mib_stats(phy);
        }
-       if (++phy->sta_work_count == 4) {
-               phy->sta_work_count = 0;
-               mt7921_mac_sta_stats_work(phy);
-       }
 
        mt7921_mutex_release(phy->dev);
        ieee80211_queue_delayed_work(phy->mt76->hw, &mphy->mac_work,
 
        trace_lp_event(dev, event->state);
 }
 
+static void
+mt7921_mcu_tx_done_event(struct mt7921_dev *dev, struct sk_buff *skb)
+{
+       struct mt7921_mcu_tx_done_event *event;
+       struct mt7921_sta *msta;
+       struct mt7921_phy *mphy = &dev->phy;
+       struct mt7921_mcu_peer_cap peer;
+       struct ieee80211_sta *sta;
+       LIST_HEAD(list);
+
+       skb_pull(skb, sizeof(struct mt7921_mcu_rxd));
+       event = (struct mt7921_mcu_tx_done_event *)skb->data;
+
+       spin_lock_bh(&dev->sta_poll_lock);
+       list_splice_init(&mphy->stats_list, &list);
+
+       while (!list_empty(&list)) {
+               msta = list_first_entry(&list, struct mt7921_sta, stats_list);
+               list_del_init(&msta->stats_list);
+
+               if (msta->wcid.idx != event->wlan_idx)
+                       continue;
+
+               spin_unlock_bh(&dev->sta_poll_lock);
+
+               sta = wcid_to_sta(&msta->wcid);
+
+               /* peer config based on IEEE SPEC */
+               memset(&peer, 0x0, sizeof(peer));
+               peer.bw = event->bw;
+               peer.g2 = !!(sta->ht_cap.cap & IEEE80211_HT_CAP_SGI_20);
+               peer.g4 = !!(sta->ht_cap.cap & IEEE80211_HT_CAP_SGI_40);
+               peer.g8 = !!(sta->vht_cap.cap & IEEE80211_VHT_CAP_SHORT_GI_80);
+               peer.g16 = !!(sta->vht_cap.cap & IEEE80211_VHT_CAP_SHORT_GI_160);
+               mt7921_mcu_tx_rate_parse(mphy->mt76, &peer,
+                                        &msta->stats.tx_rate, event->tx_rate);
+
+               spin_lock_bh(&dev->sta_poll_lock);
+               break;
+       }
+       spin_unlock_bh(&dev->sta_poll_lock);
+}
+
 static void
 mt7921_mcu_rx_unsolicited_event(struct mt7921_dev *dev, struct sk_buff *skb)
 {
        case MCU_EVENT_LP_INFO:
                mt7921_mcu_low_power_event(dev, skb);
                break;
+       case MCU_EVENT_TX_DONE:
+               mt7921_mcu_tx_done_event(dev, skb);
+               break;
        default:
                break;
        }
            rxd->eid == MCU_EVENT_SCHED_SCAN_DONE ||
            rxd->eid == MCU_EVENT_BSS_ABSENCE ||
            rxd->eid == MCU_EVENT_SCAN_DONE ||
+           rxd->eid == MCU_EVENT_TX_DONE ||
            rxd->eid == MCU_EVENT_DBG_MSG ||
            rxd->eid == MCU_EVENT_COREDUMP ||
            rxd->eid == MCU_EVENT_LP_INFO ||