static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
{
struct device *dev = ice_pf_to_dev(vsi->back);
- int i;
+ struct ice_tx_desc *tx_desc;
+ int i, j;
for (i = 0; i < vsi->num_xdp_txq; i++) {
u16 xdp_q_idx = vsi->alloc_txq + i;
xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx];
xdp_ring->vsi = vsi;
xdp_ring->netdev = NULL;
+ xdp_ring->next_dd = ICE_TX_THRESH - 1;
+ xdp_ring->next_rs = ICE_TX_THRESH - 1;
xdp_ring->dev = dev;
xdp_ring->count = vsi->num_tx_desc;
WRITE_ONCE(vsi->xdp_rings[i], xdp_ring);
goto free_xdp_rings;
ice_set_ring_xdp(xdp_ring);
xdp_ring->xsk_pool = ice_tx_xsk_pool(xdp_ring);
+ for (j = 0; j < xdp_ring->count; j++) {
+ tx_desc = ICE_TX_DESC(xdp_ring, j);
+ tx_desc->cmd_type_offset_bsz = cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE);
+ }
}
ice_for_each_rxq(vsi, i)
total_bytes += tx_buf->bytecount;
total_pkts += tx_buf->gso_segs;
- if (ice_ring_is_xdp(tx_ring))
- page_frag_free(tx_buf->raw_buf);
- else
- /* free the skb */
- napi_consume_skb(tx_buf->skb, napi_budget);
+ /* free the skb */
+ napi_consume_skb(tx_buf->skb, napi_budget);
/* unmap skb header data */
dma_unmap_single(tx_ring->dev,
ice_update_tx_ring_stats(tx_ring, total_pkts, total_bytes);
- if (ice_ring_is_xdp(tx_ring))
- return !!budget;
-
netdev_tx_completed_queue(txring_txq(tx_ring), total_pkts,
total_bytes);
* budget and be more aggressive about cleaning up the Tx descriptors.
*/
ice_for_each_tx_ring(tx_ring, q_vector->tx) {
- bool wd = tx_ring->xsk_pool ?
- ice_clean_tx_irq_zc(tx_ring, budget) :
- ice_clean_tx_irq(tx_ring, budget);
+ bool wd;
+
+ if (tx_ring->xsk_pool)
+ wd = ice_clean_tx_irq_zc(tx_ring, budget);
+ else if (ice_ring_is_xdp(tx_ring))
+ wd = true;
+ else
+ wd = ice_clean_tx_irq(tx_ring, budget);
if (!wd)
clean_complete = false;
#define ICE_MAX_CHAINED_RX_BUFS 5
#define ICE_MAX_BUF_TXD 8
#define ICE_MIN_TX_LEN 17
+#define ICE_TX_THRESH 32
/* The size limit for a transmit buffer in a descriptor is (16K - 1).
* In order to align with the read requests we will align the value to
struct ice_vsi *vsi; /* Backreference to associated VSI */
/* CL2 - 2nd cacheline starts here */
dma_addr_t dma; /* physical address of ring */
+ struct xsk_buff_pool *xsk_pool;
u16 next_to_use;
u16 next_to_clean;
+ u16 next_rs;
+ u16 next_dd;
+ u16 q_handle; /* Queue handle per TC */
+ u16 reg_idx; /* HW register index of the ring */
u16 count; /* Number of descriptors */
u16 q_index; /* Queue number of ring */
- struct xsk_buff_pool *xsk_pool;
-
/* stats structs */
struct ice_q_stats stats;
struct u64_stats_sync syncp;
DECLARE_BITMAP(xps_state, ICE_TX_NBITS); /* XPS Config State */
struct ice_ptp_tx *tx_tstamps;
u32 txq_teid; /* Added Tx queue TEID */
- u16 q_handle; /* Queue handle per TC */
- u16 reg_idx; /* HW register index of the ring */
#define ICE_TX_FLAGS_RING_XDP BIT(0)
u8 flags;
u8 dcb_tc; /* Traffic class of ring */
#include "ice_txrx_lib.h"
#include "ice_eswitch.h"
+#include "ice_lib.h"
/**
* ice_release_rx_desc - Store the new tail and head values
napi_gro_receive(&rx_ring->q_vector->napi, skb);
}
+/**
+ * ice_clean_xdp_irq - Reclaim resources after transmit completes on XDP ring
+ * @xdp_ring: XDP ring to clean
+ */
+static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring)
+{
+ unsigned int total_bytes = 0, total_pkts = 0;
+ u16 ntc = xdp_ring->next_to_clean;
+ struct ice_tx_desc *next_dd_desc;
+ u16 next_dd = xdp_ring->next_dd;
+ struct ice_tx_buf *tx_buf;
+ int i;
+
+ next_dd_desc = ICE_TX_DESC(xdp_ring, next_dd);
+ if (!(next_dd_desc->cmd_type_offset_bsz &
+ cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
+ return;
+
+ for (i = 0; i < ICE_TX_THRESH; i++) {
+ tx_buf = &xdp_ring->tx_buf[ntc];
+
+ total_bytes += tx_buf->bytecount;
+ /* normally tx_buf->gso_segs was taken but at this point
+ * it's always 1 for us
+ */
+ total_pkts++;
+
+ page_frag_free(tx_buf->raw_buf);
+ dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma),
+ dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_buf, len, 0);
+ tx_buf->raw_buf = NULL;
+
+ ntc++;
+ if (ntc >= xdp_ring->count)
+ ntc = 0;
+ }
+
+ next_dd_desc->cmd_type_offset_bsz = 0;
+ xdp_ring->next_dd = xdp_ring->next_dd + ICE_TX_THRESH;
+ if (xdp_ring->next_dd > xdp_ring->count)
+ xdp_ring->next_dd = ICE_TX_THRESH - 1;
+ xdp_ring->next_to_clean = ntc;
+ ice_update_tx_ring_stats(xdp_ring, total_pkts, total_bytes);
+}
+
/**
* ice_xmit_xdp_ring - submit single packet to XDP ring for transmission
* @data: packet data pointer
struct ice_tx_buf *tx_buf;
dma_addr_t dma;
+ if (ICE_DESC_UNUSED(xdp_ring) < ICE_TX_THRESH)
+ ice_clean_xdp_irq(xdp_ring);
+
if (!unlikely(ICE_DESC_UNUSED(xdp_ring))) {
xdp_ring->tx_stats.tx_busy++;
return ICE_XDP_CONSUMED;
tx_desc = ICE_TX_DESC(xdp_ring, i);
tx_desc->buf_addr = cpu_to_le64(dma);
- tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TXD_LAST_DESC_CMD, 0,
+ tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP, 0,
size, 0);
- /* Make certain all of the status bits have been updated
- * before next_to_watch is written.
- */
- smp_wmb();
-
i++;
- if (i == xdp_ring->count)
+ if (i == xdp_ring->count) {
i = 0;
-
- tx_buf->next_to_watch = tx_desc;
+ tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
+ tx_desc->cmd_type_offset_bsz |=
+ cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
+ xdp_ring->next_rs = ICE_TX_THRESH - 1;
+ }
xdp_ring->next_to_use = i;
+ if (i > xdp_ring->next_rs) {
+ tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
+ tx_desc->cmd_type_offset_bsz |=
+ cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
+ xdp_ring->next_rs += ICE_TX_THRESH;
+ }
+
return ICE_XDP_TX;
}