ice: xsk: Tx multi-buffer support
authorMaciej Fijalkowski <maciej.fijalkowski@intel.com>
Wed, 19 Jul 2023 13:24:12 +0000 (15:24 +0200)
committerAlexei Starovoitov <ast@kernel.org>
Wed, 19 Jul 2023 16:56:50 +0000 (09:56 -0700)
Most of this patch is about actually supporting XDP_TX action. Pure Tx
ZC support is only about looking at XDP_PKT_CONTD presence at options
field and based on that generating EOP bit on Tx HW descriptor. This is
that simple due to the implementation on
xsk_tx_peek_release_desc_batch() where we are making sure that last
produced descriptor is an EOP one.

Overwrite xdp_zc_max_segs with a value that defines max scatter-gatter
count on Tx side that HW can handle.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/r/20230719132421.584801-16-maciej.fijalkowski@intel.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
drivers/net/ethernet/intel/ice/ice_main.c
drivers/net/ethernet/intel/ice/ice_xsk.c

index 19a5e7f3a075ee3c1a4462b1a826f63c33eb3db4..ca83379b2de12b14a6b4ea02bb3287332eba3a59 100644 (file)
@@ -3392,6 +3392,7 @@ static void ice_set_ops(struct ice_vsi *vsi)
        netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
                               NETDEV_XDP_ACT_XSK_ZEROCOPY |
                               NETDEV_XDP_ACT_RX_SG;
+       netdev->xdp_zc_max_segs = ICE_MAX_BUF_TXD;
 }
 
 /**
index 91cdd5e4790dd856abc32bc55191aad7a512148f..2a3f0834e13917018ff67561301e559093033785 100644 (file)
@@ -613,7 +613,7 @@ out:
  * ice_clean_xdp_irq_zc - produce AF_XDP descriptors to CQ
  * @xdp_ring: XDP Tx ring
  */
-static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring)
+static u32 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring)
 {
        u16 ntc = xdp_ring->next_to_clean;
        struct ice_tx_desc *tx_desc;
@@ -635,7 +635,7 @@ static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring)
        }
 
        if (!completed_frames)
-               return;
+               return 0;
 
        if (likely(!xdp_ring->xdp_tx_active)) {
                xsk_frames = completed_frames;
@@ -665,6 +665,8 @@ skip:
                xdp_ring->next_to_clean -= cnt;
        if (xsk_frames)
                xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
+
+       return completed_frames;
 }
 
 /**
@@ -682,37 +684,72 @@ skip:
 static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp,
                              struct ice_tx_ring *xdp_ring)
 {
+       struct skb_shared_info *sinfo = NULL;
        u32 size = xdp->data_end - xdp->data;
        u32 ntu = xdp_ring->next_to_use;
        struct ice_tx_desc *tx_desc;
        struct ice_tx_buf *tx_buf;
-       dma_addr_t dma;
+       struct xdp_buff *head;
+       u32 nr_frags = 0;
+       u32 free_space;
+       u32 frag = 0;
 
-       if (ICE_DESC_UNUSED(xdp_ring) < ICE_RING_QUARTER(xdp_ring)) {
-               ice_clean_xdp_irq_zc(xdp_ring);
-               if (!ICE_DESC_UNUSED(xdp_ring)) {
-                       xdp_ring->ring_stats->tx_stats.tx_busy++;
-                       return ICE_XDP_CONSUMED;
-               }
-       }
+       free_space = ICE_DESC_UNUSED(xdp_ring);
+       if (free_space < ICE_RING_QUARTER(xdp_ring))
+               free_space += ice_clean_xdp_irq_zc(xdp_ring);
 
-       dma = xsk_buff_xdp_get_dma(xdp);
-       xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, size);
+       if (unlikely(!free_space))
+               goto busy;
+
+       if (unlikely(xdp_buff_has_frags(xdp))) {
+               sinfo = xdp_get_shared_info_from_buff(xdp);
+               nr_frags = sinfo->nr_frags;
+               if (free_space < nr_frags + 1)
+                       goto busy;
+       }
 
-       tx_buf = &xdp_ring->tx_buf[ntu];
-       tx_buf->xdp = xdp;
-       tx_buf->type = ICE_TX_BUF_XSK_TX;
        tx_desc = ICE_TX_DESC(xdp_ring, ntu);
-       tx_desc->buf_addr = cpu_to_le64(dma);
-       tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP,
-                                                     0, size, 0);
-       xdp_ring->xdp_tx_active++;
+       tx_buf = &xdp_ring->tx_buf[ntu];
+       head = xdp;
+
+       for (;;) {
+               dma_addr_t dma;
+
+               dma = xsk_buff_xdp_get_dma(xdp);
+               xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, size);
+
+               tx_buf->xdp = xdp;
+               tx_buf->type = ICE_TX_BUF_XSK_TX;
+               tx_desc->buf_addr = cpu_to_le64(dma);
+               tx_desc->cmd_type_offset_bsz = ice_build_ctob(0, 0, size, 0);
+               /* account for each xdp_buff from xsk_buff_pool */
+               xdp_ring->xdp_tx_active++;
+
+               if (++ntu == xdp_ring->count)
+                       ntu = 0;
+
+               if (frag == nr_frags)
+                       break;
+
+               tx_desc = ICE_TX_DESC(xdp_ring, ntu);
+               tx_buf = &xdp_ring->tx_buf[ntu];
+
+               xdp = xsk_buff_get_frag(head);
+               size = skb_frag_size(&sinfo->frags[frag]);
+               frag++;
+       }
 
-       if (++ntu == xdp_ring->count)
-               ntu = 0;
        xdp_ring->next_to_use = ntu;
+       /* update last descriptor from a frame with EOP */
+       tx_desc->cmd_type_offset_bsz |=
+               cpu_to_le64(ICE_TX_DESC_CMD_EOP << ICE_TXD_QW1_CMD_S);
 
        return ICE_XDP_TX;
+
+busy:
+       xdp_ring->ring_stats->tx_stats.tx_busy++;
+
+       return ICE_XDP_CONSUMED;
 }
 
 /**
@@ -960,7 +997,7 @@ static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc,
 
        tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use++);
        tx_desc->buf_addr = cpu_to_le64(dma);
-       tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP,
+       tx_desc->cmd_type_offset_bsz = ice_build_ctob(xsk_is_eop_desc(desc),
                                                      0, desc->len, 0);
 
        *total_bytes += desc->len;
@@ -987,7 +1024,7 @@ static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *de
 
                tx_desc = ICE_TX_DESC(xdp_ring, ntu++);
                tx_desc->buf_addr = cpu_to_le64(dma);
-               tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP,
+               tx_desc->cmd_type_offset_bsz = ice_build_ctob(xsk_is_eop_desc(&descs[i]),
                                                              0, descs[i].len, 0);
 
                *total_bytes += descs[i].len;