From: Jesse Brandeburg Date: Sat, 15 Sep 2018 00:37:48 +0000 (-0700) Subject: iavf: move i40evf files to new name X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=5ec8b7d11470f742214708902d8903047855727e;p=linux.git iavf: move i40evf files to new name Simply move the i40evf files to the new name, updating the #includes to track the new names, and updating the Makefile as well. A future patch will remove the i40e references (after the code removal patches later in this series). Signed-off-by: Jesse Brandeburg Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- diff --git a/drivers/net/ethernet/intel/iavf/Makefile b/drivers/net/ethernet/intel/iavf/Makefile index 1b050d9d5f493..c1e4b91de4b3a 100644 --- a/drivers/net/ethernet/intel/iavf/Makefile +++ b/drivers/net/ethernet/intel/iavf/Makefile @@ -11,5 +11,5 @@ subdir-ccflags-y += -I$(src) obj-$(CONFIG_IAVF) += iavf.o -iavf-objs := i40evf_main.o i40evf_ethtool.o i40evf_virtchnl.o \ - i40e_txrx.o i40e_common.o i40e_adminq.o i40evf_client.o +iavf-objs := iavf_main.o iavf_ethtool.o iavf_virtchnl.o \ + iavf_txrx.o i40e_common.o i40e_adminq.o iavf_client.o diff --git a/drivers/net/ethernet/intel/iavf/i40e_txrx.c b/drivers/net/ethernet/intel/iavf/i40e_txrx.c deleted file mode 100644 index 6d8f848824ad8..0000000000000 --- a/drivers/net/ethernet/intel/iavf/i40e_txrx.c +++ /dev/null @@ -1,2508 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright(c) 2013 - 2018 Intel Corporation. */ - -#include -#include - -#include "i40evf.h" -#include "i40e_trace.h" -#include "i40e_prototype.h" - -static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size, - u32 td_tag) -{ - return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA | - ((u64)td_cmd << I40E_TXD_QW1_CMD_SHIFT) | - ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) | - ((u64)size << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) | - ((u64)td_tag << I40E_TXD_QW1_L2TAG1_SHIFT)); -} - -#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) - -/** - * i40e_unmap_and_free_tx_resource - Release a Tx buffer - * @ring: the ring that owns the buffer - * @tx_buffer: the buffer to free - **/ -static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, - struct i40e_tx_buffer *tx_buffer) -{ - if (tx_buffer->skb) { - if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) - kfree(tx_buffer->raw_buf); - else - dev_kfree_skb_any(tx_buffer->skb); - if (dma_unmap_len(tx_buffer, len)) - dma_unmap_single(ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), - DMA_TO_DEVICE); - } else if (dma_unmap_len(tx_buffer, len)) { - dma_unmap_page(ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), - DMA_TO_DEVICE); - } - - tx_buffer->next_to_watch = NULL; - tx_buffer->skb = NULL; - dma_unmap_len_set(tx_buffer, len, 0); - /* tx_buffer must be completely set up in the transmit path */ -} - -/** - * iavf_clean_tx_ring - Free any empty Tx buffers - * @tx_ring: ring to be cleaned - **/ -void iavf_clean_tx_ring(struct i40e_ring *tx_ring) -{ - unsigned long bi_size; - u16 i; - - /* ring already cleared, nothing to do */ - if (!tx_ring->tx_bi) - return; - - /* Free all the Tx ring sk_buffs */ - for (i = 0; i < tx_ring->count; i++) - i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]); - - bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count; - memset(tx_ring->tx_bi, 0, bi_size); - - /* Zero out the descriptor ring */ - memset(tx_ring->desc, 0, tx_ring->size); - - tx_ring->next_to_use = 0; - tx_ring->next_to_clean = 0; - - if (!tx_ring->netdev) - return; - - /* cleanup Tx queue statistics */ - netdev_tx_reset_queue(txring_txq(tx_ring)); -} - -/** - * iavf_free_tx_resources - Free Tx resources per queue - * @tx_ring: Tx descriptor ring for a specific queue - * - * Free all transmit software resources - **/ -void iavf_free_tx_resources(struct i40e_ring *tx_ring) -{ - iavf_clean_tx_ring(tx_ring); - kfree(tx_ring->tx_bi); - tx_ring->tx_bi = NULL; - - if (tx_ring->desc) { - dma_free_coherent(tx_ring->dev, tx_ring->size, - tx_ring->desc, tx_ring->dma); - tx_ring->desc = NULL; - } -} - -/** - * iavf_get_tx_pending - how many Tx descriptors not processed - * @ring: the ring of descriptors - * @in_sw: is tx_pending being checked in SW or HW - * - * Since there is no access to the ring head register - * in XL710, we need to use our local copies - **/ -u32 iavf_get_tx_pending(struct i40e_ring *ring, bool in_sw) -{ - u32 head, tail; - - head = ring->next_to_clean; - tail = readl(ring->tail); - - if (head != tail) - return (head < tail) ? - tail - head : (tail + ring->count - head); - - return 0; -} - -/** - * iavf_detect_recover_hung - Function to detect and recover hung_queues - * @vsi: pointer to vsi struct with tx queues - * - * VSI has netdev and netdev has TX queues. This function is to check each of - * those TX queues if they are hung, trigger recovery by issuing SW interrupt. - **/ -void iavf_detect_recover_hung(struct i40e_vsi *vsi) -{ - struct i40e_ring *tx_ring = NULL; - struct net_device *netdev; - unsigned int i; - int packets; - - if (!vsi) - return; - - if (test_bit(__I40E_VSI_DOWN, vsi->state)) - return; - - netdev = vsi->netdev; - if (!netdev) - return; - - if (!netif_carrier_ok(netdev)) - return; - - for (i = 0; i < vsi->back->num_active_queues; i++) { - tx_ring = &vsi->back->tx_rings[i]; - if (tx_ring && tx_ring->desc) { - /* If packet counter has not changed the queue is - * likely stalled, so force an interrupt for this - * queue. - * - * prev_pkt_ctr would be negative if there was no - * pending work. - */ - packets = tx_ring->stats.packets & INT_MAX; - if (tx_ring->tx_stats.prev_pkt_ctr == packets) { - iavf_force_wb(vsi, tx_ring->q_vector); - continue; - } - - /* Memory barrier between read of packet count and call - * to iavf_get_tx_pending() - */ - smp_rmb(); - tx_ring->tx_stats.prev_pkt_ctr = - iavf_get_tx_pending(tx_ring, true) ? packets : -1; - } - } -} - -#define WB_STRIDE 4 - -/** - * i40e_clean_tx_irq - Reclaim resources after transmit completes - * @vsi: the VSI we care about - * @tx_ring: Tx ring to clean - * @napi_budget: Used to determine if we are in netpoll - * - * Returns true if there's any budget left (e.g. the clean is finished) - **/ -static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, - struct i40e_ring *tx_ring, int napi_budget) -{ - u16 i = tx_ring->next_to_clean; - struct i40e_tx_buffer *tx_buf; - struct i40e_tx_desc *tx_desc; - unsigned int total_bytes = 0, total_packets = 0; - unsigned int budget = vsi->work_limit; - - tx_buf = &tx_ring->tx_bi[i]; - tx_desc = I40E_TX_DESC(tx_ring, i); - i -= tx_ring->count; - - do { - struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch; - - /* if next_to_watch is not set then there is no work pending */ - if (!eop_desc) - break; - - /* prevent any other reads prior to eop_desc */ - smp_rmb(); - - i40e_trace(clean_tx_irq, tx_ring, tx_desc, tx_buf); - /* if the descriptor isn't done, no work yet to do */ - if (!(eop_desc->cmd_type_offset_bsz & - cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE))) - break; - - /* clear next_to_watch to prevent false hangs */ - tx_buf->next_to_watch = NULL; - - /* update the statistics for this packet */ - total_bytes += tx_buf->bytecount; - total_packets += tx_buf->gso_segs; - - /* free the skb */ - napi_consume_skb(tx_buf->skb, napi_budget); - - /* unmap skb header data */ - dma_unmap_single(tx_ring->dev, - dma_unmap_addr(tx_buf, dma), - dma_unmap_len(tx_buf, len), - DMA_TO_DEVICE); - - /* clear tx_buffer data */ - tx_buf->skb = NULL; - dma_unmap_len_set(tx_buf, len, 0); - - /* unmap remaining buffers */ - while (tx_desc != eop_desc) { - i40e_trace(clean_tx_irq_unmap, - tx_ring, tx_desc, tx_buf); - - tx_buf++; - tx_desc++; - i++; - if (unlikely(!i)) { - i -= tx_ring->count; - tx_buf = tx_ring->tx_bi; - tx_desc = I40E_TX_DESC(tx_ring, 0); - } - - /* unmap any remaining paged data */ - if (dma_unmap_len(tx_buf, len)) { - dma_unmap_page(tx_ring->dev, - dma_unmap_addr(tx_buf, dma), - dma_unmap_len(tx_buf, len), - DMA_TO_DEVICE); - dma_unmap_len_set(tx_buf, len, 0); - } - } - - /* move us one more past the eop_desc for start of next pkt */ - tx_buf++; - tx_desc++; - i++; - if (unlikely(!i)) { - i -= tx_ring->count; - tx_buf = tx_ring->tx_bi; - tx_desc = I40E_TX_DESC(tx_ring, 0); - } - - prefetch(tx_desc); - - /* update budget accounting */ - budget--; - } while (likely(budget)); - - i += tx_ring->count; - tx_ring->next_to_clean = i; - u64_stats_update_begin(&tx_ring->syncp); - tx_ring->stats.bytes += total_bytes; - tx_ring->stats.packets += total_packets; - u64_stats_update_end(&tx_ring->syncp); - tx_ring->q_vector->tx.total_bytes += total_bytes; - tx_ring->q_vector->tx.total_packets += total_packets; - - if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) { - /* check to see if there are < 4 descriptors - * waiting to be written back, then kick the hardware to force - * them to be written back in case we stay in NAPI. - * In this mode on X722 we do not enable Interrupt. - */ - unsigned int j = iavf_get_tx_pending(tx_ring, false); - - if (budget && - ((j / WB_STRIDE) == 0) && (j > 0) && - !test_bit(__I40E_VSI_DOWN, vsi->state) && - (I40E_DESC_UNUSED(tx_ring) != tx_ring->count)) - tx_ring->arm_wb = true; - } - - /* notify netdev of completed buffers */ - netdev_tx_completed_queue(txring_txq(tx_ring), - total_packets, total_bytes); - -#define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2)) - if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && - (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) { - /* Make sure that anybody stopping the queue after this - * sees the new next_to_clean. - */ - smp_mb(); - if (__netif_subqueue_stopped(tx_ring->netdev, - tx_ring->queue_index) && - !test_bit(__I40E_VSI_DOWN, vsi->state)) { - netif_wake_subqueue(tx_ring->netdev, - tx_ring->queue_index); - ++tx_ring->tx_stats.restart_queue; - } - } - - return !!budget; -} - -/** - * iavf_enable_wb_on_itr - Arm hardware to do a wb, interrupts are not enabled - * @vsi: the VSI we care about - * @q_vector: the vector on which to enable writeback - * - **/ -static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi, - struct i40e_q_vector *q_vector) -{ - u16 flags = q_vector->tx.ring[0].flags; - u32 val; - - if (!(flags & I40E_TXR_FLAGS_WB_ON_ITR)) - return; - - if (q_vector->arm_wb_state) - return; - - val = I40E_VFINT_DYN_CTLN1_WB_ON_ITR_MASK | - I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK; /* set noitr */ - - wr32(&vsi->back->hw, - I40E_VFINT_DYN_CTLN1(q_vector->reg_idx), val); - q_vector->arm_wb_state = true; -} - -/** - * iavf_force_wb - Issue SW Interrupt so HW does a wb - * @vsi: the VSI we care about - * @q_vector: the vector on which to force writeback - * - **/ -void iavf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) -{ - u32 val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | - I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK | /* set noitr */ - I40E_VFINT_DYN_CTLN1_SWINT_TRIG_MASK | - I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK - /* allow 00 to be written to the index */; - - wr32(&vsi->back->hw, - I40E_VFINT_DYN_CTLN1(q_vector->reg_idx), - val); -} - -static inline bool i40e_container_is_rx(struct i40e_q_vector *q_vector, - struct i40e_ring_container *rc) -{ - return &q_vector->rx == rc; -} - -static inline unsigned int i40e_itr_divisor(struct i40e_q_vector *q_vector) -{ - unsigned int divisor; - - switch (q_vector->adapter->link_speed) { - case I40E_LINK_SPEED_40GB: - divisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024; - break; - case I40E_LINK_SPEED_25GB: - case I40E_LINK_SPEED_20GB: - divisor = I40E_ITR_ADAPTIVE_MIN_INC * 512; - break; - default: - case I40E_LINK_SPEED_10GB: - divisor = I40E_ITR_ADAPTIVE_MIN_INC * 256; - break; - case I40E_LINK_SPEED_1GB: - case I40E_LINK_SPEED_100MB: - divisor = I40E_ITR_ADAPTIVE_MIN_INC * 32; - break; - } - - return divisor; -} - -/** - * i40e_update_itr - update the dynamic ITR value based on statistics - * @q_vector: structure containing interrupt and ring information - * @rc: structure containing ring performance data - * - * Stores a new ITR value based on packets and byte - * counts during the last interrupt. The advantage of per interrupt - * computation is faster updates and more accurate ITR for the current - * traffic pattern. Constants in this function were computed - * based on theoretical maximum wire speed and thresholds were set based - * on testing data as well as attempting to minimize response time - * while increasing bulk throughput. - **/ -static void i40e_update_itr(struct i40e_q_vector *q_vector, - struct i40e_ring_container *rc) -{ - unsigned int avg_wire_size, packets, bytes, itr; - unsigned long next_update = jiffies; - - /* If we don't have any rings just leave ourselves set for maximum - * possible latency so we take ourselves out of the equation. - */ - if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting)) - return; - - /* For Rx we want to push the delay up and default to low latency. - * for Tx we want to pull the delay down and default to high latency. - */ - itr = i40e_container_is_rx(q_vector, rc) ? - I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY : - I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY; - - /* If we didn't update within up to 1 - 2 jiffies we can assume - * that either packets are coming in so slow there hasn't been - * any work, or that there is so much work that NAPI is dealing - * with interrupt moderation and we don't need to do anything. - */ - if (time_after(next_update, rc->next_update)) - goto clear_counts; - - /* If itr_countdown is set it means we programmed an ITR within - * the last 4 interrupt cycles. This has a side effect of us - * potentially firing an early interrupt. In order to work around - * this we need to throw out any data received for a few - * interrupts following the update. - */ - if (q_vector->itr_countdown) { - itr = rc->target_itr; - goto clear_counts; - } - - packets = rc->total_packets; - bytes = rc->total_bytes; - - if (i40e_container_is_rx(q_vector, rc)) { - /* If Rx there are 1 to 4 packets and bytes are less than - * 9000 assume insufficient data to use bulk rate limiting - * approach unless Tx is already in bulk rate limiting. We - * are likely latency driven. - */ - if (packets && packets < 4 && bytes < 9000 && - (q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) { - itr = I40E_ITR_ADAPTIVE_LATENCY; - goto adjust_by_size; - } - } else if (packets < 4) { - /* If we have Tx and Rx ITR maxed and Tx ITR is running in - * bulk mode and we are receiving 4 or fewer packets just - * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so - * that the Rx can relax. - */ - if (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS && - (q_vector->rx.target_itr & I40E_ITR_MASK) == - I40E_ITR_ADAPTIVE_MAX_USECS) - goto clear_counts; - } else if (packets > 32) { - /* If we have processed over 32 packets in a single interrupt - * for Tx assume we need to switch over to "bulk" mode. - */ - rc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY; - } - - /* We have no packets to actually measure against. This means - * either one of the other queues on this vector is active or - * we are a Tx queue doing TSO with too high of an interrupt rate. - * - * Between 4 and 56 we can assume that our current interrupt delay - * is only slightly too low. As such we should increase it by a small - * fixed amount. - */ - if (packets < 56) { - itr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC; - if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) { - itr &= I40E_ITR_ADAPTIVE_LATENCY; - itr += I40E_ITR_ADAPTIVE_MAX_USECS; - } - goto clear_counts; - } - - if (packets <= 256) { - itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr); - itr &= I40E_ITR_MASK; - - /* Between 56 and 112 is our "goldilocks" zone where we are - * working out "just right". Just report that our current - * ITR is good for us. - */ - if (packets <= 112) - goto clear_counts; - - /* If packet count is 128 or greater we are likely looking - * at a slight overrun of the delay we want. Try halving - * our delay to see if that will cut the number of packets - * in half per interrupt. - */ - itr /= 2; - itr &= I40E_ITR_MASK; - if (itr < I40E_ITR_ADAPTIVE_MIN_USECS) - itr = I40E_ITR_ADAPTIVE_MIN_USECS; - - goto clear_counts; - } - - /* The paths below assume we are dealing with a bulk ITR since - * number of packets is greater than 256. We are just going to have - * to compute a value and try to bring the count under control, - * though for smaller packet sizes there isn't much we can do as - * NAPI polling will likely be kicking in sooner rather than later. - */ - itr = I40E_ITR_ADAPTIVE_BULK; - -adjust_by_size: - /* If packet counts are 256 or greater we can assume we have a gross - * overestimation of what the rate should be. Instead of trying to fine - * tune it just use the formula below to try and dial in an exact value - * give the current packet size of the frame. - */ - avg_wire_size = bytes / packets; - - /* The following is a crude approximation of: - * wmem_default / (size + overhead) = desired_pkts_per_int - * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate - * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value - * - * Assuming wmem_default is 212992 and overhead is 640 bytes per - * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the - * formula down to - * - * (170 * (size + 24)) / (size + 640) = ITR - * - * We first do some math on the packet size and then finally bitshift - * by 8 after rounding up. We also have to account for PCIe link speed - * difference as ITR scales based on this. - */ - if (avg_wire_size <= 60) { - /* Start at 250k ints/sec */ - avg_wire_size = 4096; - } else if (avg_wire_size <= 380) { - /* 250K ints/sec to 60K ints/sec */ - avg_wire_size *= 40; - avg_wire_size += 1696; - } else if (avg_wire_size <= 1084) { - /* 60K ints/sec to 36K ints/sec */ - avg_wire_size *= 15; - avg_wire_size += 11452; - } else if (avg_wire_size <= 1980) { - /* 36K ints/sec to 30K ints/sec */ - avg_wire_size *= 5; - avg_wire_size += 22420; - } else { - /* plateau at a limit of 30K ints/sec */ - avg_wire_size = 32256; - } - - /* If we are in low latency mode halve our delay which doubles the - * rate to somewhere between 100K to 16K ints/sec - */ - if (itr & I40E_ITR_ADAPTIVE_LATENCY) - avg_wire_size /= 2; - - /* Resultant value is 256 times larger than it needs to be. This - * gives us room to adjust the value as needed to either increase - * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc. - * - * Use addition as we have already recorded the new latency flag - * for the ITR value. - */ - itr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) * - I40E_ITR_ADAPTIVE_MIN_INC; - - if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) { - itr &= I40E_ITR_ADAPTIVE_LATENCY; - itr += I40E_ITR_ADAPTIVE_MAX_USECS; - } - -clear_counts: - /* write back value */ - rc->target_itr = itr; - - /* next update should occur within next jiffy */ - rc->next_update = next_update + 1; - - rc->total_bytes = 0; - rc->total_packets = 0; -} - -/** - * iavf_setup_tx_descriptors - Allocate the Tx descriptors - * @tx_ring: the tx ring to set up - * - * Return 0 on success, negative on error - **/ -int iavf_setup_tx_descriptors(struct i40e_ring *tx_ring) -{ - struct device *dev = tx_ring->dev; - int bi_size; - - if (!dev) - return -ENOMEM; - - /* warn if we are about to overwrite the pointer */ - WARN_ON(tx_ring->tx_bi); - bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count; - tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL); - if (!tx_ring->tx_bi) - goto err; - - /* round up to nearest 4K */ - tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc); - tx_ring->size = ALIGN(tx_ring->size, 4096); - tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, - &tx_ring->dma, GFP_KERNEL); - if (!tx_ring->desc) { - dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n", - tx_ring->size); - goto err; - } - - tx_ring->next_to_use = 0; - tx_ring->next_to_clean = 0; - tx_ring->tx_stats.prev_pkt_ctr = -1; - return 0; - -err: - kfree(tx_ring->tx_bi); - tx_ring->tx_bi = NULL; - return -ENOMEM; -} - -/** - * iavf_clean_rx_ring - Free Rx buffers - * @rx_ring: ring to be cleaned - **/ -void iavf_clean_rx_ring(struct i40e_ring *rx_ring) -{ - unsigned long bi_size; - u16 i; - - /* ring already cleared, nothing to do */ - if (!rx_ring->rx_bi) - return; - - if (rx_ring->skb) { - dev_kfree_skb(rx_ring->skb); - rx_ring->skb = NULL; - } - - /* Free all the Rx ring sk_buffs */ - for (i = 0; i < rx_ring->count; i++) { - struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i]; - - if (!rx_bi->page) - continue; - - /* Invalidate cache lines that may have been written to by - * device so that we avoid corrupting memory. - */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_bi->dma, - rx_bi->page_offset, - rx_ring->rx_buf_len, - DMA_FROM_DEVICE); - - /* free resources associated with mapping */ - dma_unmap_page_attrs(rx_ring->dev, rx_bi->dma, - i40e_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, - I40E_RX_DMA_ATTR); - - __page_frag_cache_drain(rx_bi->page, rx_bi->pagecnt_bias); - - rx_bi->page = NULL; - rx_bi->page_offset = 0; - } - - bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count; - memset(rx_ring->rx_bi, 0, bi_size); - - /* Zero out the descriptor ring */ - memset(rx_ring->desc, 0, rx_ring->size); - - rx_ring->next_to_alloc = 0; - rx_ring->next_to_clean = 0; - rx_ring->next_to_use = 0; -} - -/** - * iavf_free_rx_resources - Free Rx resources - * @rx_ring: ring to clean the resources from - * - * Free all receive software resources - **/ -void iavf_free_rx_resources(struct i40e_ring *rx_ring) -{ - iavf_clean_rx_ring(rx_ring); - kfree(rx_ring->rx_bi); - rx_ring->rx_bi = NULL; - - if (rx_ring->desc) { - dma_free_coherent(rx_ring->dev, rx_ring->size, - rx_ring->desc, rx_ring->dma); - rx_ring->desc = NULL; - } -} - -/** - * iavf_setup_rx_descriptors - Allocate Rx descriptors - * @rx_ring: Rx descriptor ring (for a specific queue) to setup - * - * Returns 0 on success, negative on failure - **/ -int iavf_setup_rx_descriptors(struct i40e_ring *rx_ring) -{ - struct device *dev = rx_ring->dev; - int bi_size; - - /* warn if we are about to overwrite the pointer */ - WARN_ON(rx_ring->rx_bi); - bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count; - rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL); - if (!rx_ring->rx_bi) - goto err; - - u64_stats_init(&rx_ring->syncp); - - /* Round up to nearest 4K */ - rx_ring->size = rx_ring->count * sizeof(union i40e_32byte_rx_desc); - rx_ring->size = ALIGN(rx_ring->size, 4096); - rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, - &rx_ring->dma, GFP_KERNEL); - - if (!rx_ring->desc) { - dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n", - rx_ring->size); - goto err; - } - - rx_ring->next_to_alloc = 0; - rx_ring->next_to_clean = 0; - rx_ring->next_to_use = 0; - - return 0; -err: - kfree(rx_ring->rx_bi); - rx_ring->rx_bi = NULL; - return -ENOMEM; -} - -/** - * i40e_release_rx_desc - Store the new tail and head values - * @rx_ring: ring to bump - * @val: new head index - **/ -static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val) -{ - rx_ring->next_to_use = val; - - /* update next to alloc since we have filled the ring */ - rx_ring->next_to_alloc = val; - - /* Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. (Only - * applicable for weak-ordered memory model archs, - * such as IA-64). - */ - wmb(); - writel(val, rx_ring->tail); -} - -/** - * i40e_rx_offset - Return expected offset into page to access data - * @rx_ring: Ring we are requesting offset of - * - * Returns the offset value for ring into the data buffer. - */ -static inline unsigned int i40e_rx_offset(struct i40e_ring *rx_ring) -{ - return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0; -} - -/** - * i40e_alloc_mapped_page - recycle or make a new page - * @rx_ring: ring to use - * @bi: rx_buffer struct to modify - * - * Returns true if the page was successfully allocated or - * reused. - **/ -static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring, - struct i40e_rx_buffer *bi) -{ - struct page *page = bi->page; - dma_addr_t dma; - - /* since we are recycling buffers we should seldom need to alloc */ - if (likely(page)) { - rx_ring->rx_stats.page_reuse_count++; - return true; - } - - /* alloc new page for storage */ - page = dev_alloc_pages(i40e_rx_pg_order(rx_ring)); - if (unlikely(!page)) { - rx_ring->rx_stats.alloc_page_failed++; - return false; - } - - /* map page for use */ - dma = dma_map_page_attrs(rx_ring->dev, page, 0, - i40e_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, - I40E_RX_DMA_ATTR); - - /* if mapping failed free memory back to system since - * there isn't much point in holding memory we can't use - */ - if (dma_mapping_error(rx_ring->dev, dma)) { - __free_pages(page, i40e_rx_pg_order(rx_ring)); - rx_ring->rx_stats.alloc_page_failed++; - return false; - } - - bi->dma = dma; - bi->page = page; - bi->page_offset = i40e_rx_offset(rx_ring); - - /* initialize pagecnt_bias to 1 representing we fully own page */ - bi->pagecnt_bias = 1; - - return true; -} - -/** - * i40e_receive_skb - Send a completed packet up the stack - * @rx_ring: rx ring in play - * @skb: packet to send up - * @vlan_tag: vlan tag for packet - **/ -static void i40e_receive_skb(struct i40e_ring *rx_ring, - struct sk_buff *skb, u16 vlan_tag) -{ - struct i40e_q_vector *q_vector = rx_ring->q_vector; - - if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) && - (vlan_tag & VLAN_VID_MASK)) - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); - - napi_gro_receive(&q_vector->napi, skb); -} - -/** - * iavf_alloc_rx_buffers - Replace used receive buffers - * @rx_ring: ring to place buffers on - * @cleaned_count: number of buffers to replace - * - * Returns false if all allocations were successful, true if any fail - **/ -bool iavf_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count) -{ - u16 ntu = rx_ring->next_to_use; - union i40e_rx_desc *rx_desc; - struct i40e_rx_buffer *bi; - - /* do nothing if no valid netdev defined */ - if (!rx_ring->netdev || !cleaned_count) - return false; - - rx_desc = I40E_RX_DESC(rx_ring, ntu); - bi = &rx_ring->rx_bi[ntu]; - - do { - if (!i40e_alloc_mapped_page(rx_ring, bi)) - goto no_buffers; - - /* sync the buffer for use by the device */ - dma_sync_single_range_for_device(rx_ring->dev, bi->dma, - bi->page_offset, - rx_ring->rx_buf_len, - DMA_FROM_DEVICE); - - /* Refresh the desc even if buffer_addrs didn't change - * because each write-back erases this info. - */ - rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); - - rx_desc++; - bi++; - ntu++; - if (unlikely(ntu == rx_ring->count)) { - rx_desc = I40E_RX_DESC(rx_ring, 0); - bi = rx_ring->rx_bi; - ntu = 0; - } - - /* clear the status bits for the next_to_use descriptor */ - rx_desc->wb.qword1.status_error_len = 0; - - cleaned_count--; - } while (cleaned_count); - - if (rx_ring->next_to_use != ntu) - i40e_release_rx_desc(rx_ring, ntu); - - return false; - -no_buffers: - if (rx_ring->next_to_use != ntu) - i40e_release_rx_desc(rx_ring, ntu); - - /* make sure to come back via polling to try again after - * allocation failure - */ - return true; -} - -/** - * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum - * @vsi: the VSI we care about - * @skb: skb currently being received and modified - * @rx_desc: the receive descriptor - **/ -static inline void i40e_rx_checksum(struct i40e_vsi *vsi, - struct sk_buff *skb, - union i40e_rx_desc *rx_desc) -{ - struct i40e_rx_ptype_decoded decoded; - u32 rx_error, rx_status; - bool ipv4, ipv6; - u8 ptype; - u64 qword; - - qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT; - rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >> - I40E_RXD_QW1_ERROR_SHIFT; - rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> - I40E_RXD_QW1_STATUS_SHIFT; - decoded = decode_rx_desc_ptype(ptype); - - skb->ip_summed = CHECKSUM_NONE; - - skb_checksum_none_assert(skb); - - /* Rx csum enabled and ip headers found? */ - if (!(vsi->netdev->features & NETIF_F_RXCSUM)) - return; - - /* did the hardware decode the packet and checksum? */ - if (!(rx_status & BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT))) - return; - - /* both known and outer_ip must be set for the below code to work */ - if (!(decoded.known && decoded.outer_ip)) - return; - - ipv4 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) && - (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4); - ipv6 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) && - (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6); - - if (ipv4 && - (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) | - BIT(I40E_RX_DESC_ERROR_EIPE_SHIFT)))) - goto checksum_fail; - - /* likely incorrect csum if alternate IP extension headers found */ - if (ipv6 && - rx_status & BIT(I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) - /* don't increment checksum err here, non-fatal err */ - return; - - /* there was some L4 error, count error and punt packet to the stack */ - if (rx_error & BIT(I40E_RX_DESC_ERROR_L4E_SHIFT)) - goto checksum_fail; - - /* handle packets that were not able to be checksummed due - * to arrival speed, in this case the stack can compute - * the csum. - */ - if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT)) - return; - - /* Only report checksum unnecessary for TCP, UDP, or SCTP */ - switch (decoded.inner_prot) { - case I40E_RX_PTYPE_INNER_PROT_TCP: - case I40E_RX_PTYPE_INNER_PROT_UDP: - case I40E_RX_PTYPE_INNER_PROT_SCTP: - skb->ip_summed = CHECKSUM_UNNECESSARY; - /* fall though */ - default: - break; - } - - return; - -checksum_fail: - vsi->back->hw_csum_rx_error++; -} - -/** - * i40e_ptype_to_htype - get a hash type - * @ptype: the ptype value from the descriptor - * - * Returns a hash type to be used by skb_set_hash - **/ -static inline int i40e_ptype_to_htype(u8 ptype) -{ - struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype); - - if (!decoded.known) - return PKT_HASH_TYPE_NONE; - - if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && - decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4) - return PKT_HASH_TYPE_L4; - else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && - decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3) - return PKT_HASH_TYPE_L3; - else - return PKT_HASH_TYPE_L2; -} - -/** - * i40e_rx_hash - set the hash value in the skb - * @ring: descriptor ring - * @rx_desc: specific descriptor - * @skb: skb currently being received and modified - * @rx_ptype: Rx packet type - **/ -static inline void i40e_rx_hash(struct i40e_ring *ring, - union i40e_rx_desc *rx_desc, - struct sk_buff *skb, - u8 rx_ptype) -{ - u32 hash; - const __le64 rss_mask = - cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH << - I40E_RX_DESC_STATUS_FLTSTAT_SHIFT); - - if (ring->netdev->features & NETIF_F_RXHASH) - return; - - if ((rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) { - hash = le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss); - skb_set_hash(skb, hash, i40e_ptype_to_htype(rx_ptype)); - } -} - -/** - * iavf_process_skb_fields - Populate skb header fields from Rx descriptor - * @rx_ring: rx descriptor ring packet is being transacted on - * @rx_desc: pointer to the EOP Rx descriptor - * @skb: pointer to current skb being populated - * @rx_ptype: the packet type decoded by hardware - * - * This function checks the ring, descriptor, and packet information in - * order to populate the hash, checksum, VLAN, protocol, and - * other fields within the skb. - **/ -static inline -void iavf_process_skb_fields(struct i40e_ring *rx_ring, - union i40e_rx_desc *rx_desc, struct sk_buff *skb, - u8 rx_ptype) -{ - i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype); - - i40e_rx_checksum(rx_ring->vsi, skb, rx_desc); - - skb_record_rx_queue(skb, rx_ring->queue_index); - - /* modifies the skb - consumes the enet header */ - skb->protocol = eth_type_trans(skb, rx_ring->netdev); -} - -/** - * i40e_cleanup_headers - Correct empty headers - * @rx_ring: rx descriptor ring packet is being transacted on - * @skb: pointer to current skb being fixed - * - * Also address the case where we are pulling data in on pages only - * and as such no data is present in the skb header. - * - * In addition if skb is not at least 60 bytes we need to pad it so that - * it is large enough to qualify as a valid Ethernet frame. - * - * Returns true if an error was encountered and skb was freed. - **/ -static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb) -{ - /* if eth_skb_pad returns an error the skb was freed */ - if (eth_skb_pad(skb)) - return true; - - return false; -} - -/** - * i40e_reuse_rx_page - page flip buffer and store it back on the ring - * @rx_ring: rx descriptor ring to store buffers on - * @old_buff: donor buffer to have page reused - * - * Synchronizes page for reuse by the adapter - **/ -static void i40e_reuse_rx_page(struct i40e_ring *rx_ring, - struct i40e_rx_buffer *old_buff) -{ - struct i40e_rx_buffer *new_buff; - u16 nta = rx_ring->next_to_alloc; - - new_buff = &rx_ring->rx_bi[nta]; - - /* update, and store next to alloc */ - nta++; - rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; - - /* transfer page from old buffer to new buffer */ - new_buff->dma = old_buff->dma; - new_buff->page = old_buff->page; - new_buff->page_offset = old_buff->page_offset; - new_buff->pagecnt_bias = old_buff->pagecnt_bias; -} - -/** - * i40e_page_is_reusable - check if any reuse is possible - * @page: page struct to check - * - * A page is not reusable if it was allocated under low memory - * conditions, or it's not in the same NUMA node as this CPU. - */ -static inline bool i40e_page_is_reusable(struct page *page) -{ - return (page_to_nid(page) == numa_mem_id()) && - !page_is_pfmemalloc(page); -} - -/** - * i40e_can_reuse_rx_page - Determine if this page can be reused by - * the adapter for another receive - * - * @rx_buffer: buffer containing the page - * - * If page is reusable, rx_buffer->page_offset is adjusted to point to - * an unused region in the page. - * - * For small pages, @truesize will be a constant value, half the size - * of the memory at page. We'll attempt to alternate between high and - * low halves of the page, with one half ready for use by the hardware - * and the other half being consumed by the stack. We use the page - * ref count to determine whether the stack has finished consuming the - * portion of this page that was passed up with a previous packet. If - * the page ref count is >1, we'll assume the "other" half page is - * still busy, and this page cannot be reused. - * - * For larger pages, @truesize will be the actual space used by the - * received packet (adjusted upward to an even multiple of the cache - * line size). This will advance through the page by the amount - * actually consumed by the received packets while there is still - * space for a buffer. Each region of larger pages will be used at - * most once, after which the page will not be reused. - * - * In either case, if the page is reusable its refcount is increased. - **/ -static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer) -{ - unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; - struct page *page = rx_buffer->page; - - /* Is any reuse possible? */ - if (unlikely(!i40e_page_is_reusable(page))) - return false; - -#if (PAGE_SIZE < 8192) - /* if we are only owner of page we can reuse it */ - if (unlikely((page_count(page) - pagecnt_bias) > 1)) - return false; -#else -#define I40E_LAST_OFFSET \ - (SKB_WITH_OVERHEAD(PAGE_SIZE) - I40E_RXBUFFER_2048) - if (rx_buffer->page_offset > I40E_LAST_OFFSET) - return false; -#endif - - /* If we have drained the page fragment pool we need to update - * the pagecnt_bias and page count so that we fully restock the - * number of references the driver holds. - */ - if (unlikely(!pagecnt_bias)) { - page_ref_add(page, USHRT_MAX); - rx_buffer->pagecnt_bias = USHRT_MAX; - } - - return true; -} - -/** - * i40e_add_rx_frag - Add contents of Rx buffer to sk_buff - * @rx_ring: rx descriptor ring to transact packets on - * @rx_buffer: buffer containing page to add - * @skb: sk_buff to place the data into - * @size: packet length from rx_desc - * - * This function will add the data contained in rx_buffer->page to the skb. - * It will just attach the page as a frag to the skb. - * - * The function will then update the page offset. - **/ -static void i40e_add_rx_frag(struct i40e_ring *rx_ring, - struct i40e_rx_buffer *rx_buffer, - struct sk_buff *skb, - unsigned int size) -{ -#if (PAGE_SIZE < 8192) - unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; -#else - unsigned int truesize = SKB_DATA_ALIGN(size + i40e_rx_offset(rx_ring)); -#endif - - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, - rx_buffer->page_offset, size, truesize); - - /* page is being used so we must update the page offset */ -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif -} - -/** - * i40e_get_rx_buffer - Fetch Rx buffer and synchronize data for use - * @rx_ring: rx descriptor ring to transact packets on - * @size: size of buffer to add to skb - * - * This function will pull an Rx buffer from the ring and synchronize it - * for use by the CPU. - */ -static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring, - const unsigned int size) -{ - struct i40e_rx_buffer *rx_buffer; - - rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean]; - prefetchw(rx_buffer->page); - - /* we are reusing so sync this buffer for CPU use */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - size, - DMA_FROM_DEVICE); - - /* We have pulled a buffer for use, so decrement pagecnt_bias */ - rx_buffer->pagecnt_bias--; - - return rx_buffer; -} - -/** - * i40e_construct_skb - Allocate skb and populate it - * @rx_ring: rx descriptor ring to transact packets on - * @rx_buffer: rx buffer to pull data from - * @size: size of buffer to add to skb - * - * This function allocates an skb. It then populates it with the page - * data from the current receive descriptor, taking care to set up the - * skb correctly. - */ -static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, - struct i40e_rx_buffer *rx_buffer, - unsigned int size) -{ - void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; -#if (PAGE_SIZE < 8192) - unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; -#else - unsigned int truesize = SKB_DATA_ALIGN(size); -#endif - unsigned int headlen; - struct sk_buff *skb; - - /* prefetch first cache line of first page */ - prefetch(va); -#if L1_CACHE_BYTES < 128 - prefetch(va + L1_CACHE_BYTES); -#endif - - /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, - I40E_RX_HDR_SIZE, - GFP_ATOMIC | __GFP_NOWARN); - if (unlikely(!skb)) - return NULL; - - /* Determine available headroom for copy */ - headlen = size; - if (headlen > I40E_RX_HDR_SIZE) - headlen = eth_get_headlen(va, I40E_RX_HDR_SIZE); - - /* align pull length to size of long to optimize memcpy performance */ - memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); - - /* update all of the pointers */ - size -= headlen; - if (size) { - skb_add_rx_frag(skb, 0, rx_buffer->page, - rx_buffer->page_offset + headlen, - size, truesize); - - /* buffer is used by skb, update page_offset */ -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif - } else { - /* buffer is unused, reset bias back to rx_buffer */ - rx_buffer->pagecnt_bias++; - } - - return skb; -} - -/** - * i40e_build_skb - Build skb around an existing buffer - * @rx_ring: Rx descriptor ring to transact packets on - * @rx_buffer: Rx buffer to pull data from - * @size: size of buffer to add to skb - * - * This function builds an skb around an existing Rx buffer, taking care - * to set up the skb correctly and avoid any memcpy overhead. - */ -static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, - struct i40e_rx_buffer *rx_buffer, - unsigned int size) -{ - void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; -#if (PAGE_SIZE < 8192) - unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; -#else - unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + - SKB_DATA_ALIGN(I40E_SKB_PAD + size); -#endif - struct sk_buff *skb; - - /* prefetch first cache line of first page */ - prefetch(va); -#if L1_CACHE_BYTES < 128 - prefetch(va + L1_CACHE_BYTES); -#endif - /* build an skb around the page buffer */ - skb = build_skb(va - I40E_SKB_PAD, truesize); - if (unlikely(!skb)) - return NULL; - - /* update pointers within the skb to store the data */ - skb_reserve(skb, I40E_SKB_PAD); - __skb_put(skb, size); - - /* buffer is used by skb, update page_offset */ -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif - - return skb; -} - -/** - * i40e_put_rx_buffer - Clean up used buffer and either recycle or free - * @rx_ring: rx descriptor ring to transact packets on - * @rx_buffer: rx buffer to pull data from - * - * This function will clean up the contents of the rx_buffer. It will - * either recycle the buffer or unmap it and free the associated resources. - */ -static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, - struct i40e_rx_buffer *rx_buffer) -{ - if (i40e_can_reuse_rx_page(rx_buffer)) { - /* hand second half of page back to the ring */ - i40e_reuse_rx_page(rx_ring, rx_buffer); - rx_ring->rx_stats.page_reuse_count++; - } else { - /* we are not reusing the buffer so unmap it */ - dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, - i40e_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, I40E_RX_DMA_ATTR); - __page_frag_cache_drain(rx_buffer->page, - rx_buffer->pagecnt_bias); - } - - /* clear contents of buffer_info */ - rx_buffer->page = NULL; -} - -/** - * i40e_is_non_eop - process handling of non-EOP buffers - * @rx_ring: Rx ring being processed - * @rx_desc: Rx descriptor for current buffer - * @skb: Current socket buffer containing buffer in progress - * - * This function updates next to clean. If the buffer is an EOP buffer - * this function exits returning false, otherwise it will place the - * sk_buff in the next buffer to be chained and return true indicating - * that this is in fact a non-EOP buffer. - **/ -static bool i40e_is_non_eop(struct i40e_ring *rx_ring, - union i40e_rx_desc *rx_desc, - struct sk_buff *skb) -{ - u32 ntc = rx_ring->next_to_clean + 1; - - /* fetch, update, and store next to clean */ - ntc = (ntc < rx_ring->count) ? ntc : 0; - rx_ring->next_to_clean = ntc; - - prefetch(I40E_RX_DESC(rx_ring, ntc)); - - /* if we are the last buffer then there is nothing else to do */ -#define I40E_RXD_EOF BIT(I40E_RX_DESC_STATUS_EOF_SHIFT) - if (likely(i40e_test_staterr(rx_desc, I40E_RXD_EOF))) - return false; - - rx_ring->rx_stats.non_eop_descs++; - - return true; -} - -/** - * i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf - * @rx_ring: rx descriptor ring to transact packets on - * @budget: Total limit on number of packets to process - * - * This function provides a "bounce buffer" approach to Rx interrupt - * processing. The advantage to this is that on systems that have - * expensive overhead for IOMMU access this provides a means of avoiding - * it by maintaining the mapping of the page to the system. - * - * Returns amount of work completed - **/ -static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) -{ - unsigned int total_rx_bytes = 0, total_rx_packets = 0; - struct sk_buff *skb = rx_ring->skb; - u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); - bool failure = false; - - while (likely(total_rx_packets < (unsigned int)budget)) { - struct i40e_rx_buffer *rx_buffer; - union i40e_rx_desc *rx_desc; - unsigned int size; - u16 vlan_tag; - u8 rx_ptype; - u64 qword; - - /* return some buffers to hardware, one at a time is too slow */ - if (cleaned_count >= I40E_RX_BUFFER_WRITE) { - failure = failure || - iavf_alloc_rx_buffers(rx_ring, cleaned_count); - cleaned_count = 0; - } - - rx_desc = I40E_RX_DESC(rx_ring, rx_ring->next_to_clean); - - /* status_error_len will always be zero for unused descriptors - * because it's cleared in cleanup, and overlaps with hdr_addr - * which is always zero because packet split isn't used, if the - * hardware wrote DD then the length will be non-zero - */ - qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - - /* This memory barrier is needed to keep us from reading - * any other fields out of the rx_desc until we have - * verified the descriptor has been written back. - */ - dma_rmb(); - - size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> - I40E_RXD_QW1_LENGTH_PBUF_SHIFT; - if (!size) - break; - - i40e_trace(clean_rx_irq, rx_ring, rx_desc, skb); - rx_buffer = i40e_get_rx_buffer(rx_ring, size); - - /* retrieve a buffer from the ring */ - if (skb) - i40e_add_rx_frag(rx_ring, rx_buffer, skb, size); - else if (ring_uses_build_skb(rx_ring)) - skb = i40e_build_skb(rx_ring, rx_buffer, size); - else - skb = i40e_construct_skb(rx_ring, rx_buffer, size); - - /* exit if we failed to retrieve a buffer */ - if (!skb) { - rx_ring->rx_stats.alloc_buff_failed++; - rx_buffer->pagecnt_bias++; - break; - } - - i40e_put_rx_buffer(rx_ring, rx_buffer); - cleaned_count++; - - if (i40e_is_non_eop(rx_ring, rx_desc, skb)) - continue; - - /* ERR_MASK will only have valid bits if EOP set, and - * what we are doing here is actually checking - * I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in - * the error field - */ - if (unlikely(i40e_test_staterr(rx_desc, BIT(I40E_RXD_QW1_ERROR_SHIFT)))) { - dev_kfree_skb_any(skb); - skb = NULL; - continue; - } - - if (i40e_cleanup_headers(rx_ring, skb)) { - skb = NULL; - continue; - } - - /* probably a little skewed due to removing CRC */ - total_rx_bytes += skb->len; - - qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> - I40E_RXD_QW1_PTYPE_SHIFT; - - /* populate checksum, VLAN, and protocol */ - iavf_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype); - - - vlan_tag = (qword & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) ? - le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0; - - i40e_trace(clean_rx_irq_rx, rx_ring, rx_desc, skb); - i40e_receive_skb(rx_ring, skb, vlan_tag); - skb = NULL; - - /* update budget accounting */ - total_rx_packets++; - } - - rx_ring->skb = skb; - - u64_stats_update_begin(&rx_ring->syncp); - rx_ring->stats.packets += total_rx_packets; - rx_ring->stats.bytes += total_rx_bytes; - u64_stats_update_end(&rx_ring->syncp); - rx_ring->q_vector->rx.total_packets += total_rx_packets; - rx_ring->q_vector->rx.total_bytes += total_rx_bytes; - - /* guarantee a trip back through this routine if there was a failure */ - return failure ? budget : (int)total_rx_packets; -} - -static inline u32 i40e_buildreg_itr(const int type, u16 itr) -{ - u32 val; - - /* We don't bother with setting the CLEARPBA bit as the data sheet - * points out doing so is "meaningless since it was already - * auto-cleared". The auto-clearing happens when the interrupt is - * asserted. - * - * Hardware errata 28 for also indicates that writing to a - * xxINT_DYN_CTLx CSR with INTENA_MSK (bit 31) set to 0 will clear - * an event in the PBA anyway so we need to rely on the automask - * to hold pending events for us until the interrupt is re-enabled - * - * The itr value is reported in microseconds, and the register - * value is recorded in 2 microsecond units. For this reason we - * only need to shift by the interval shift - 1 instead of the - * full value. - */ - itr &= I40E_ITR_MASK; - - val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | - (type << I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) | - (itr << (I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT - 1)); - - return val; -} - -/* a small macro to shorten up some long lines */ -#define INTREG I40E_VFINT_DYN_CTLN1 - -/* The act of updating the ITR will cause it to immediately trigger. In order - * to prevent this from throwing off adaptive update statistics we defer the - * update so that it can only happen so often. So after either Tx or Rx are - * updated we make the adaptive scheme wait until either the ITR completely - * expires via the next_update expiration or we have been through at least - * 3 interrupts. - */ -#define ITR_COUNTDOWN_START 3 - -/** - * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt - * @vsi: the VSI we care about - * @q_vector: q_vector for which itr is being updated and interrupt enabled - * - **/ -static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, - struct i40e_q_vector *q_vector) -{ - struct i40e_hw *hw = &vsi->back->hw; - u32 intval; - - /* These will do nothing if dynamic updates are not enabled */ - i40e_update_itr(q_vector, &q_vector->tx); - i40e_update_itr(q_vector, &q_vector->rx); - - /* This block of logic allows us to get away with only updating - * one ITR value with each interrupt. The idea is to perform a - * pseudo-lazy update with the following criteria. - * - * 1. Rx is given higher priority than Tx if both are in same state - * 2. If we must reduce an ITR that is given highest priority. - * 3. We then give priority to increasing ITR based on amount. - */ - if (q_vector->rx.target_itr < q_vector->rx.current_itr) { - /* Rx ITR needs to be reduced, this is highest priority */ - intval = i40e_buildreg_itr(I40E_RX_ITR, - q_vector->rx.target_itr); - q_vector->rx.current_itr = q_vector->rx.target_itr; - q_vector->itr_countdown = ITR_COUNTDOWN_START; - } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) || - ((q_vector->rx.target_itr - q_vector->rx.current_itr) < - (q_vector->tx.target_itr - q_vector->tx.current_itr))) { - /* Tx ITR needs to be reduced, this is second priority - * Tx ITR needs to be increased more than Rx, fourth priority - */ - intval = i40e_buildreg_itr(I40E_TX_ITR, - q_vector->tx.target_itr); - q_vector->tx.current_itr = q_vector->tx.target_itr; - q_vector->itr_countdown = ITR_COUNTDOWN_START; - } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) { - /* Rx ITR needs to be increased, third priority */ - intval = i40e_buildreg_itr(I40E_RX_ITR, - q_vector->rx.target_itr); - q_vector->rx.current_itr = q_vector->rx.target_itr; - q_vector->itr_countdown = ITR_COUNTDOWN_START; - } else { - /* No ITR update, lowest priority */ - intval = i40e_buildreg_itr(I40E_ITR_NONE, 0); - if (q_vector->itr_countdown) - q_vector->itr_countdown--; - } - - if (!test_bit(__I40E_VSI_DOWN, vsi->state)) - wr32(hw, INTREG(q_vector->reg_idx), intval); -} - -/** - * iavf_napi_poll - NAPI polling Rx/Tx cleanup routine - * @napi: napi struct with our devices info in it - * @budget: amount of work driver is allowed to do this pass, in packets - * - * This function will clean all queues associated with a q_vector. - * - * Returns the amount of work done - **/ -int iavf_napi_poll(struct napi_struct *napi, int budget) -{ - struct i40e_q_vector *q_vector = - container_of(napi, struct i40e_q_vector, napi); - struct i40e_vsi *vsi = q_vector->vsi; - struct i40e_ring *ring; - bool clean_complete = true; - bool arm_wb = false; - int budget_per_ring; - int work_done = 0; - - if (test_bit(__I40E_VSI_DOWN, vsi->state)) { - napi_complete(napi); - return 0; - } - - /* Since the actual Tx work is minimal, we can give the Tx a larger - * budget and be more aggressive about cleaning up the Tx descriptors. - */ - i40e_for_each_ring(ring, q_vector->tx) { - if (!i40e_clean_tx_irq(vsi, ring, budget)) { - clean_complete = false; - continue; - } - arm_wb |= ring->arm_wb; - ring->arm_wb = false; - } - - /* Handle case where we are called by netpoll with a budget of 0 */ - if (budget <= 0) - goto tx_only; - - /* We attempt to distribute budget to each Rx queue fairly, but don't - * allow the budget to go below 1 because that would exit polling early. - */ - budget_per_ring = max(budget/q_vector->num_ringpairs, 1); - - i40e_for_each_ring(ring, q_vector->rx) { - int cleaned = i40e_clean_rx_irq(ring, budget_per_ring); - - work_done += cleaned; - /* if we clean as many as budgeted, we must not be done */ - if (cleaned >= budget_per_ring) - clean_complete = false; - } - - /* If work not completed, return budget and polling will return */ - if (!clean_complete) { - int cpu_id = smp_processor_id(); - - /* It is possible that the interrupt affinity has changed but, - * if the cpu is pegged at 100%, polling will never exit while - * traffic continues and the interrupt will be stuck on this - * cpu. We check to make sure affinity is correct before we - * continue to poll, otherwise we must stop polling so the - * interrupt can move to the correct cpu. - */ - if (!cpumask_test_cpu(cpu_id, &q_vector->affinity_mask)) { - /* Tell napi that we are done polling */ - napi_complete_done(napi, work_done); - - /* Force an interrupt */ - iavf_force_wb(vsi, q_vector); - - /* Return budget-1 so that polling stops */ - return budget - 1; - } -tx_only: - if (arm_wb) { - q_vector->tx.ring[0].tx_stats.tx_force_wb++; - i40e_enable_wb_on_itr(vsi, q_vector); - } - return budget; - } - - if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR) - q_vector->arm_wb_state = false; - - /* Work is done so exit the polling mode and re-enable the interrupt */ - napi_complete_done(napi, work_done); - - i40e_update_enable_itr(vsi, q_vector); - - return min(work_done, budget - 1); -} - -/** - * iavf_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW - * @skb: send buffer - * @tx_ring: ring to send buffer on - * @flags: the tx flags to be set - * - * Checks the skb and set up correspondingly several generic transmit flags - * related to VLAN tagging for the HW, such as VLAN, DCB, etc. - * - * Returns error code indicate the frame should be dropped upon error and the - * otherwise returns 0 to indicate the flags has been set properly. - **/ -static inline int iavf_tx_prepare_vlan_flags(struct sk_buff *skb, - struct i40e_ring *tx_ring, - u32 *flags) -{ - __be16 protocol = skb->protocol; - u32 tx_flags = 0; - - if (protocol == htons(ETH_P_8021Q) && - !(tx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) { - /* When HW VLAN acceleration is turned off by the user the - * stack sets the protocol to 8021q so that the driver - * can take any steps required to support the SW only - * VLAN handling. In our case the driver doesn't need - * to take any further steps so just set the protocol - * to the encapsulated ethertype. - */ - skb->protocol = vlan_get_protocol(skb); - goto out; - } - - /* if we have a HW VLAN tag being added, default to the HW one */ - if (skb_vlan_tag_present(skb)) { - tx_flags |= skb_vlan_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT; - tx_flags |= I40E_TX_FLAGS_HW_VLAN; - /* else if it is a SW VLAN, check the next protocol and store the tag */ - } else if (protocol == htons(ETH_P_8021Q)) { - struct vlan_hdr *vhdr, _vhdr; - - vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr); - if (!vhdr) - return -EINVAL; - - protocol = vhdr->h_vlan_encapsulated_proto; - tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT; - tx_flags |= I40E_TX_FLAGS_SW_VLAN; - } - -out: - *flags = tx_flags; - return 0; -} - -/** - * i40e_tso - set up the tso context descriptor - * @first: pointer to first Tx buffer for xmit - * @hdr_len: ptr to the size of the packet header - * @cd_type_cmd_tso_mss: Quad Word 1 - * - * Returns 0 if no TSO can happen, 1 if tso is going, or error - **/ -static int i40e_tso(struct i40e_tx_buffer *first, u8 *hdr_len, - u64 *cd_type_cmd_tso_mss) -{ - struct sk_buff *skb = first->skb; - u64 cd_cmd, cd_tso_len, cd_mss; - union { - struct iphdr *v4; - struct ipv6hdr *v6; - unsigned char *hdr; - } ip; - union { - struct tcphdr *tcp; - struct udphdr *udp; - unsigned char *hdr; - } l4; - u32 paylen, l4_offset; - u16 gso_segs, gso_size; - int err; - - if (skb->ip_summed != CHECKSUM_PARTIAL) - return 0; - - if (!skb_is_gso(skb)) - return 0; - - err = skb_cow_head(skb, 0); - if (err < 0) - return err; - - ip.hdr = skb_network_header(skb); - l4.hdr = skb_transport_header(skb); - - /* initialize outer IP header fields */ - if (ip.v4->version == 4) { - ip.v4->tot_len = 0; - ip.v4->check = 0; - } else { - ip.v6->payload_len = 0; - } - - if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE | - SKB_GSO_GRE_CSUM | - SKB_GSO_IPXIP4 | - SKB_GSO_IPXIP6 | - SKB_GSO_UDP_TUNNEL | - SKB_GSO_UDP_TUNNEL_CSUM)) { - if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) && - (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) { - l4.udp->len = 0; - - /* determine offset of outer transport header */ - l4_offset = l4.hdr - skb->data; - - /* remove payload length from outer checksum */ - paylen = skb->len - l4_offset; - csum_replace_by_diff(&l4.udp->check, - (__force __wsum)htonl(paylen)); - } - - /* reset pointers to inner headers */ - ip.hdr = skb_inner_network_header(skb); - l4.hdr = skb_inner_transport_header(skb); - - /* initialize inner IP header fields */ - if (ip.v4->version == 4) { - ip.v4->tot_len = 0; - ip.v4->check = 0; - } else { - ip.v6->payload_len = 0; - } - } - - /* determine offset of inner transport header */ - l4_offset = l4.hdr - skb->data; - - /* remove payload length from inner checksum */ - paylen = skb->len - l4_offset; - csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen)); - - /* compute length of segmentation header */ - *hdr_len = (l4.tcp->doff * 4) + l4_offset; - - /* pull values out of skb_shinfo */ - gso_size = skb_shinfo(skb)->gso_size; - gso_segs = skb_shinfo(skb)->gso_segs; - - /* update GSO size and bytecount with header size */ - first->gso_segs = gso_segs; - first->bytecount += (first->gso_segs - 1) * *hdr_len; - - /* find the field values */ - cd_cmd = I40E_TX_CTX_DESC_TSO; - cd_tso_len = skb->len - *hdr_len; - cd_mss = gso_size; - *cd_type_cmd_tso_mss |= (cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | - (cd_tso_len << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | - (cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); - return 1; -} - -/** - * i40e_tx_enable_csum - Enable Tx checksum offloads - * @skb: send buffer - * @tx_flags: pointer to Tx flags currently set - * @td_cmd: Tx descriptor command bits to set - * @td_offset: Tx descriptor header offsets to set - * @tx_ring: Tx descriptor ring - * @cd_tunneling: ptr to context desc bits - **/ -static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, - u32 *td_cmd, u32 *td_offset, - struct i40e_ring *tx_ring, - u32 *cd_tunneling) -{ - union { - struct iphdr *v4; - struct ipv6hdr *v6; - unsigned char *hdr; - } ip; - union { - struct tcphdr *tcp; - struct udphdr *udp; - unsigned char *hdr; - } l4; - unsigned char *exthdr; - u32 offset, cmd = 0; - __be16 frag_off; - u8 l4_proto = 0; - - if (skb->ip_summed != CHECKSUM_PARTIAL) - return 0; - - ip.hdr = skb_network_header(skb); - l4.hdr = skb_transport_header(skb); - - /* compute outer L2 header size */ - offset = ((ip.hdr - skb->data) / 2) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; - - if (skb->encapsulation) { - u32 tunnel = 0; - /* define outer network header type */ - if (*tx_flags & I40E_TX_FLAGS_IPV4) { - tunnel |= (*tx_flags & I40E_TX_FLAGS_TSO) ? - I40E_TX_CTX_EXT_IP_IPV4 : - I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; - - l4_proto = ip.v4->protocol; - } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { - tunnel |= I40E_TX_CTX_EXT_IP_IPV6; - - exthdr = ip.hdr + sizeof(*ip.v6); - l4_proto = ip.v6->nexthdr; - if (l4.hdr != exthdr) - ipv6_skip_exthdr(skb, exthdr - skb->data, - &l4_proto, &frag_off); - } - - /* define outer transport */ - switch (l4_proto) { - case IPPROTO_UDP: - tunnel |= I40E_TXD_CTX_UDP_TUNNELING; - *tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL; - break; - case IPPROTO_GRE: - tunnel |= I40E_TXD_CTX_GRE_TUNNELING; - *tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL; - break; - case IPPROTO_IPIP: - case IPPROTO_IPV6: - *tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL; - l4.hdr = skb_inner_network_header(skb); - break; - default: - if (*tx_flags & I40E_TX_FLAGS_TSO) - return -1; - - skb_checksum_help(skb); - return 0; - } - - /* compute outer L3 header size */ - tunnel |= ((l4.hdr - ip.hdr) / 4) << - I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT; - - /* switch IP header pointer from outer to inner header */ - ip.hdr = skb_inner_network_header(skb); - - /* compute tunnel header size */ - tunnel |= ((ip.hdr - l4.hdr) / 2) << - I40E_TXD_CTX_QW0_NATLEN_SHIFT; - - /* indicate if we need to offload outer UDP header */ - if ((*tx_flags & I40E_TX_FLAGS_TSO) && - !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) && - (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) - tunnel |= I40E_TXD_CTX_QW0_L4T_CS_MASK; - - /* record tunnel offload values */ - *cd_tunneling |= tunnel; - - /* switch L4 header pointer from outer to inner */ - l4.hdr = skb_inner_transport_header(skb); - l4_proto = 0; - - /* reset type as we transition from outer to inner headers */ - *tx_flags &= ~(I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6); - if (ip.v4->version == 4) - *tx_flags |= I40E_TX_FLAGS_IPV4; - if (ip.v6->version == 6) - *tx_flags |= I40E_TX_FLAGS_IPV6; - } - - /* Enable IP checksum offloads */ - if (*tx_flags & I40E_TX_FLAGS_IPV4) { - l4_proto = ip.v4->protocol; - /* the stack computes the IP header already, the only time we - * need the hardware to recompute it is in the case of TSO. - */ - cmd |= (*tx_flags & I40E_TX_FLAGS_TSO) ? - I40E_TX_DESC_CMD_IIPT_IPV4_CSUM : - I40E_TX_DESC_CMD_IIPT_IPV4; - } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { - cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; - - exthdr = ip.hdr + sizeof(*ip.v6); - l4_proto = ip.v6->nexthdr; - if (l4.hdr != exthdr) - ipv6_skip_exthdr(skb, exthdr - skb->data, - &l4_proto, &frag_off); - } - - /* compute inner L3 header size */ - offset |= ((l4.hdr - ip.hdr) / 4) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; - - /* Enable L4 checksum offloads */ - switch (l4_proto) { - case IPPROTO_TCP: - /* enable checksum offloads */ - cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; - offset |= l4.tcp->doff << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; - break; - case IPPROTO_SCTP: - /* enable SCTP checksum offload */ - cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; - offset |= (sizeof(struct sctphdr) >> 2) << - I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; - break; - case IPPROTO_UDP: - /* enable UDP checksum offload */ - cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; - offset |= (sizeof(struct udphdr) >> 2) << - I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; - break; - default: - if (*tx_flags & I40E_TX_FLAGS_TSO) - return -1; - skb_checksum_help(skb); - return 0; - } - - *td_cmd |= cmd; - *td_offset |= offset; - - return 1; -} - -/** - * i40e_create_tx_ctx Build the Tx context descriptor - * @tx_ring: ring to create the descriptor on - * @cd_type_cmd_tso_mss: Quad Word 1 - * @cd_tunneling: Quad Word 0 - bits 0-31 - * @cd_l2tag2: Quad Word 0 - bits 32-63 - **/ -static void i40e_create_tx_ctx(struct i40e_ring *tx_ring, - const u64 cd_type_cmd_tso_mss, - const u32 cd_tunneling, const u32 cd_l2tag2) -{ - struct i40e_tx_context_desc *context_desc; - int i = tx_ring->next_to_use; - - if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) && - !cd_tunneling && !cd_l2tag2) - return; - - /* grab the next descriptor */ - context_desc = I40E_TX_CTXTDESC(tx_ring, i); - - i++; - tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; - - /* cpu_to_le32 and assign to struct fields */ - context_desc->tunneling_params = cpu_to_le32(cd_tunneling); - context_desc->l2tag2 = cpu_to_le16(cd_l2tag2); - context_desc->rsvd = cpu_to_le16(0); - context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss); -} - -/** - * __iavf_chk_linearize - Check if there are more than 8 buffers per packet - * @skb: send buffer - * - * Note: Our HW can't DMA more than 8 buffers to build a packet on the wire - * and so we need to figure out the cases where we need to linearize the skb. - * - * For TSO we need to count the TSO header and segment payload separately. - * As such we need to check cases where we have 7 fragments or more as we - * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for - * the segment payload in the first descriptor, and another 7 for the - * fragments. - **/ -bool __iavf_chk_linearize(struct sk_buff *skb) -{ - const struct skb_frag_struct *frag, *stale; - int nr_frags, sum; - - /* no need to check if number of frags is less than 7 */ - nr_frags = skb_shinfo(skb)->nr_frags; - if (nr_frags < (I40E_MAX_BUFFER_TXD - 1)) - return false; - - /* We need to walk through the list and validate that each group - * of 6 fragments totals at least gso_size. - */ - nr_frags -= I40E_MAX_BUFFER_TXD - 2; - frag = &skb_shinfo(skb)->frags[0]; - - /* Initialize size to the negative value of gso_size minus 1. We - * use this as the worst case scenerio in which the frag ahead - * of us only provides one byte which is why we are limited to 6 - * descriptors for a single transmit as the header and previous - * fragment are already consuming 2 descriptors. - */ - sum = 1 - skb_shinfo(skb)->gso_size; - - /* Add size of frags 0 through 4 to create our initial sum */ - sum += skb_frag_size(frag++); - sum += skb_frag_size(frag++); - sum += skb_frag_size(frag++); - sum += skb_frag_size(frag++); - sum += skb_frag_size(frag++); - - /* Walk through fragments adding latest fragment, testing it, and - * then removing stale fragments from the sum. - */ - for (stale = &skb_shinfo(skb)->frags[0];; stale++) { - int stale_size = skb_frag_size(stale); - - sum += skb_frag_size(frag++); - - /* The stale fragment may present us with a smaller - * descriptor than the actual fragment size. To account - * for that we need to remove all the data on the front and - * figure out what the remainder would be in the last - * descriptor associated with the fragment. - */ - if (stale_size > I40E_MAX_DATA_PER_TXD) { - int align_pad = -(stale->page_offset) & - (I40E_MAX_READ_REQ_SIZE - 1); - - sum -= align_pad; - stale_size -= align_pad; - - do { - sum -= I40E_MAX_DATA_PER_TXD_ALIGNED; - stale_size -= I40E_MAX_DATA_PER_TXD_ALIGNED; - } while (stale_size > I40E_MAX_DATA_PER_TXD); - } - - /* if sum is negative we failed to make sufficient progress */ - if (sum < 0) - return true; - - if (!nr_frags--) - break; - - sum -= stale_size; - } - - return false; -} - -/** - * __iavf_maybe_stop_tx - 2nd level check for tx stop conditions - * @tx_ring: the ring to be checked - * @size: the size buffer we want to assure is available - * - * Returns -EBUSY if a stop is needed, else 0 - **/ -int __iavf_maybe_stop_tx(struct i40e_ring *tx_ring, int size) -{ - netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); - /* Memory barrier before checking head and tail */ - smp_mb(); - - /* Check again in a case another CPU has just made room available. */ - if (likely(I40E_DESC_UNUSED(tx_ring) < size)) - return -EBUSY; - - /* A reprieve! - use start_queue because it doesn't call schedule */ - netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index); - ++tx_ring->tx_stats.restart_queue; - return 0; -} - -/** - * iavf_tx_map - Build the Tx descriptor - * @tx_ring: ring to send buffer on - * @skb: send buffer - * @first: first buffer info buffer to use - * @tx_flags: collected send information - * @hdr_len: size of the packet header - * @td_cmd: the command field in the descriptor - * @td_offset: offset for checksum or crc - **/ -static inline void iavf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, - struct i40e_tx_buffer *first, u32 tx_flags, - const u8 hdr_len, u32 td_cmd, u32 td_offset) -{ - unsigned int data_len = skb->data_len; - unsigned int size = skb_headlen(skb); - struct skb_frag_struct *frag; - struct i40e_tx_buffer *tx_bi; - struct i40e_tx_desc *tx_desc; - u16 i = tx_ring->next_to_use; - u32 td_tag = 0; - dma_addr_t dma; - - if (tx_flags & I40E_TX_FLAGS_HW_VLAN) { - td_cmd |= I40E_TX_DESC_CMD_IL2TAG1; - td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >> - I40E_TX_FLAGS_VLAN_SHIFT; - } - - first->tx_flags = tx_flags; - - dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); - - tx_desc = I40E_TX_DESC(tx_ring, i); - tx_bi = first; - - for (frag = &skb_shinfo(skb)->frags[0];; frag++) { - unsigned int max_data = I40E_MAX_DATA_PER_TXD_ALIGNED; - - if (dma_mapping_error(tx_ring->dev, dma)) - goto dma_error; - - /* record length, and DMA address */ - dma_unmap_len_set(tx_bi, len, size); - dma_unmap_addr_set(tx_bi, dma, dma); - - /* align size to end of page */ - max_data += -dma & (I40E_MAX_READ_REQ_SIZE - 1); - tx_desc->buffer_addr = cpu_to_le64(dma); - - while (unlikely(size > I40E_MAX_DATA_PER_TXD)) { - tx_desc->cmd_type_offset_bsz = - build_ctob(td_cmd, td_offset, - max_data, td_tag); - - tx_desc++; - i++; - - if (i == tx_ring->count) { - tx_desc = I40E_TX_DESC(tx_ring, 0); - i = 0; - } - - dma += max_data; - size -= max_data; - - max_data = I40E_MAX_DATA_PER_TXD_ALIGNED; - tx_desc->buffer_addr = cpu_to_le64(dma); - } - - if (likely(!data_len)) - break; - - tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, - size, td_tag); - - tx_desc++; - i++; - - if (i == tx_ring->count) { - tx_desc = I40E_TX_DESC(tx_ring, 0); - i = 0; - } - - size = skb_frag_size(frag); - data_len -= size; - - dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size, - DMA_TO_DEVICE); - - tx_bi = &tx_ring->tx_bi[i]; - } - - netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); - - i++; - if (i == tx_ring->count) - i = 0; - - tx_ring->next_to_use = i; - - i40e_maybe_stop_tx(tx_ring, DESC_NEEDED); - - /* write last descriptor with RS and EOP bits */ - td_cmd |= I40E_TXD_CMD; - tx_desc->cmd_type_offset_bsz = - build_ctob(td_cmd, td_offset, size, td_tag); - - /* Force memory writes to complete before letting h/w know there - * are new descriptors to fetch. - * - * We also use this memory barrier to make certain all of the - * status bits have been updated before next_to_watch is written. - */ - wmb(); - - /* set next_to_watch value indicating a packet is present */ - first->next_to_watch = tx_desc; - - /* notify HW of packet */ - if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) { - writel(i, tx_ring->tail); - - /* we need this if more than one processor can write to our tail - * at a time, it synchronizes IO on IA64/Altix systems - */ - mmiowb(); - } - - return; - -dma_error: - dev_info(tx_ring->dev, "TX DMA map failed\n"); - - /* clear dma mappings for failed tx_bi map */ - for (;;) { - tx_bi = &tx_ring->tx_bi[i]; - i40e_unmap_and_free_tx_resource(tx_ring, tx_bi); - if (tx_bi == first) - break; - if (i == 0) - i = tx_ring->count; - i--; - } - - tx_ring->next_to_use = i; -} - -/** - * i40e_xmit_frame_ring - Sends buffer on Tx ring - * @skb: send buffer - * @tx_ring: ring to send buffer on - * - * Returns NETDEV_TX_OK if sent, else an error code - **/ -static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, - struct i40e_ring *tx_ring) -{ - u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT; - u32 cd_tunneling = 0, cd_l2tag2 = 0; - struct i40e_tx_buffer *first; - u32 td_offset = 0; - u32 tx_flags = 0; - __be16 protocol; - u32 td_cmd = 0; - u8 hdr_len = 0; - int tso, count; - - /* prefetch the data, we'll need it later */ - prefetch(skb->data); - - i40e_trace(xmit_frame_ring, skb, tx_ring); - - count = i40e_xmit_descriptor_count(skb); - if (i40e_chk_linearize(skb, count)) { - if (__skb_linearize(skb)) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } - count = i40e_txd_use_count(skb->len); - tx_ring->tx_stats.tx_linearize++; - } - - /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD, - * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD, - * + 4 desc gap to avoid the cache line where head is, - * + 1 desc for context descriptor, - * otherwise try next time - */ - if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) { - tx_ring->tx_stats.tx_busy++; - return NETDEV_TX_BUSY; - } - - /* record the location of the first descriptor for this packet */ - first = &tx_ring->tx_bi[tx_ring->next_to_use]; - first->skb = skb; - first->bytecount = skb->len; - first->gso_segs = 1; - - /* prepare the xmit flags */ - if (iavf_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags)) - goto out_drop; - - /* obtain protocol of skb */ - protocol = vlan_get_protocol(skb); - - /* setup IPv4/IPv6 offloads */ - if (protocol == htons(ETH_P_IP)) - tx_flags |= I40E_TX_FLAGS_IPV4; - else if (protocol == htons(ETH_P_IPV6)) - tx_flags |= I40E_TX_FLAGS_IPV6; - - tso = i40e_tso(first, &hdr_len, &cd_type_cmd_tso_mss); - - if (tso < 0) - goto out_drop; - else if (tso) - tx_flags |= I40E_TX_FLAGS_TSO; - - /* Always offload the checksum, since it's in the data descriptor */ - tso = i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset, - tx_ring, &cd_tunneling); - if (tso < 0) - goto out_drop; - - skb_tx_timestamp(skb); - - /* always enable CRC insertion offload */ - td_cmd |= I40E_TX_DESC_CMD_ICRC; - - i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss, - cd_tunneling, cd_l2tag2); - - iavf_tx_map(tx_ring, skb, first, tx_flags, hdr_len, - td_cmd, td_offset); - - return NETDEV_TX_OK; - -out_drop: - i40e_trace(xmit_frame_ring_drop, first->skb, tx_ring); - dev_kfree_skb_any(first->skb); - first->skb = NULL; - return NETDEV_TX_OK; -} - -/** - * iavf_xmit_frame - Selects the correct VSI and Tx queue to send buffer - * @skb: send buffer - * @netdev: network interface device structure - * - * Returns NETDEV_TX_OK if sent, else an error code - **/ -netdev_tx_t iavf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - struct i40e_ring *tx_ring = &adapter->tx_rings[skb->queue_mapping]; - - /* hardware can't handle really short frames, hardware padding works - * beyond this point - */ - if (unlikely(skb->len < I40E_MIN_TX_LEN)) { - if (skb_pad(skb, I40E_MIN_TX_LEN - skb->len)) - return NETDEV_TX_OK; - skb->len = I40E_MIN_TX_LEN; - skb_set_tail_pointer(skb, I40E_MIN_TX_LEN); - } - - return i40e_xmit_frame_ring(skb, tx_ring); -} diff --git a/drivers/net/ethernet/intel/iavf/i40e_txrx.h b/drivers/net/ethernet/intel/iavf/i40e_txrx.h deleted file mode 100644 index 5dbb1cd52febd..0000000000000 --- a/drivers/net/ethernet/intel/iavf/i40e_txrx.h +++ /dev/null @@ -1,524 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright(c) 2013 - 2018 Intel Corporation. */ - -#ifndef _I40E_TXRX_H_ -#define _I40E_TXRX_H_ - -/* Interrupt Throttling and Rate Limiting Goodies */ -#define I40E_DEFAULT_IRQ_WORK 256 - -/* The datasheet for the X710 and XL710 indicate that the maximum value for - * the ITR is 8160usec which is then called out as 0xFF0 with a 2usec - * resolution. 8160 is 0x1FE0 when written out in hex. So instead of storing - * the register value which is divided by 2 lets use the actual values and - * avoid an excessive amount of translation. - */ -#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ -#define I40E_ITR_MASK 0x1FFE /* mask for ITR register value */ -#define I40E_MIN_ITR 2 /* reg uses 2 usec resolution */ -#define I40E_ITR_100K 10 /* all values below must be even */ -#define I40E_ITR_50K 20 -#define I40E_ITR_20K 50 -#define I40E_ITR_18K 60 -#define I40E_ITR_8K 122 -#define I40E_MAX_ITR 8160 /* maximum value as per datasheet */ -#define ITR_TO_REG(setting) ((setting) & ~I40E_ITR_DYNAMIC) -#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~I40E_ITR_MASK) -#define ITR_IS_DYNAMIC(setting) (!!((setting) & I40E_ITR_DYNAMIC)) - -#define I40E_ITR_RX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC) -#define I40E_ITR_TX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC) - -/* 0x40 is the enable bit for interrupt rate limiting, and must be set if - * the value of the rate limit is non-zero - */ -#define INTRL_ENA BIT(6) -#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */ -#define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2) -#define INTRL_USEC_TO_REG(set) ((set) ? ((set) >> 2) | INTRL_ENA : 0) -#define I40E_INTRL_8K 125 /* 8000 ints/sec */ -#define I40E_INTRL_62K 16 /* 62500 ints/sec */ -#define I40E_INTRL_83K 12 /* 83333 ints/sec */ - -#define I40E_QUEUE_END_OF_LIST 0x7FF - -/* this enum matches hardware bits and is meant to be used by DYN_CTLN - * registers and QINT registers or more generally anywhere in the manual - * mentioning ITR_INDX, ITR_NONE cannot be used as an index 'n' into any - * register but instead is a special value meaning "don't update" ITR0/1/2. - */ -enum i40e_dyn_idx_t { - I40E_IDX_ITR0 = 0, - I40E_IDX_ITR1 = 1, - I40E_IDX_ITR2 = 2, - I40E_ITR_NONE = 3 /* ITR_NONE must not be used as an index */ -}; - -/* these are indexes into ITRN registers */ -#define I40E_RX_ITR I40E_IDX_ITR0 -#define I40E_TX_ITR I40E_IDX_ITR1 -#define I40E_PE_ITR I40E_IDX_ITR2 - -/* Supported RSS offloads */ -#define I40E_DEFAULT_RSS_HENA ( \ - BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | \ - BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_SCTP) | \ - BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP) | \ - BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_OTHER) | \ - BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4) | \ - BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | \ - BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP) | \ - BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_SCTP) | \ - BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_OTHER) | \ - BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6) | \ - BIT_ULL(I40E_FILTER_PCTYPE_L2_PAYLOAD)) - -#define I40E_DEFAULT_RSS_HENA_EXPANDED (I40E_DEFAULT_RSS_HENA | \ - BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK) | \ - BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | \ - BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP) | \ - BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK) | \ - BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | \ - BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP)) - -/* Supported Rx Buffer Sizes (a multiple of 128) */ -#define I40E_RXBUFFER_256 256 -#define I40E_RXBUFFER_1536 1536 /* 128B aligned standard Ethernet frame */ -#define I40E_RXBUFFER_2048 2048 -#define I40E_RXBUFFER_3072 3072 /* Used for large frames w/ padding */ -#define I40E_MAX_RXBUFFER 9728 /* largest size for single descriptor */ - -/* NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN means we - * reserve 2 more, and skb_shared_info adds an additional 384 bytes more, - * this adds up to 512 bytes of extra data meaning the smallest allocation - * we could have is 1K. - * i.e. RXBUFFER_256 --> 960 byte skb (size-1024 slab) - * i.e. RXBUFFER_512 --> 1216 byte skb (size-2048 slab) - */ -#define I40E_RX_HDR_SIZE I40E_RXBUFFER_256 -#define I40E_PACKET_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2)) -#define i40e_rx_desc i40e_32byte_rx_desc - -#define I40E_RX_DMA_ATTR \ - (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) - -/* Attempt to maximize the headroom available for incoming frames. We - * use a 2K buffer for receives and need 1536/1534 to store the data for - * the frame. This leaves us with 512 bytes of room. From that we need - * to deduct the space needed for the shared info and the padding needed - * to IP align the frame. - * - * Note: For cache line sizes 256 or larger this value is going to end - * up negative. In these cases we should fall back to the legacy - * receive path. - */ -#if (PAGE_SIZE < 8192) -#define I40E_2K_TOO_SMALL_WITH_PADDING \ -((NET_SKB_PAD + I40E_RXBUFFER_1536) > SKB_WITH_OVERHEAD(I40E_RXBUFFER_2048)) - -static inline int i40e_compute_pad(int rx_buf_len) -{ - int page_size, pad_size; - - page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2); - pad_size = SKB_WITH_OVERHEAD(page_size) - rx_buf_len; - - return pad_size; -} - -static inline int i40e_skb_pad(void) -{ - int rx_buf_len; - - /* If a 2K buffer cannot handle a standard Ethernet frame then - * optimize padding for a 3K buffer instead of a 1.5K buffer. - * - * For a 3K buffer we need to add enough padding to allow for - * tailroom due to NET_IP_ALIGN possibly shifting us out of - * cache-line alignment. - */ - if (I40E_2K_TOO_SMALL_WITH_PADDING) - rx_buf_len = I40E_RXBUFFER_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN); - else - rx_buf_len = I40E_RXBUFFER_1536; - - /* if needed make room for NET_IP_ALIGN */ - rx_buf_len -= NET_IP_ALIGN; - - return i40e_compute_pad(rx_buf_len); -} - -#define I40E_SKB_PAD i40e_skb_pad() -#else -#define I40E_2K_TOO_SMALL_WITH_PADDING false -#define I40E_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) -#endif - -/** - * i40e_test_staterr - tests bits in Rx descriptor status and error fields - * @rx_desc: pointer to receive descriptor (in le64 format) - * @stat_err_bits: value to mask - * - * This function does some fast chicanery in order to return the - * value of the mask which is really only used for boolean tests. - * The status_error_len doesn't need to be shifted because it begins - * at offset zero. - */ -static inline bool i40e_test_staterr(union i40e_rx_desc *rx_desc, - const u64 stat_err_bits) -{ - return !!(rx_desc->wb.qword1.status_error_len & - cpu_to_le64(stat_err_bits)); -} - -/* How many Rx Buffers do we bundle into one write to the hardware ? */ -#define I40E_RX_BUFFER_WRITE 32 /* Must be power of 2 */ -#define I40E_RX_INCREMENT(r, i) \ - do { \ - (i)++; \ - if ((i) == (r)->count) \ - i = 0; \ - r->next_to_clean = i; \ - } while (0) - -#define I40E_RX_NEXT_DESC(r, i, n) \ - do { \ - (i)++; \ - if ((i) == (r)->count) \ - i = 0; \ - (n) = I40E_RX_DESC((r), (i)); \ - } while (0) - -#define I40E_RX_NEXT_DESC_PREFETCH(r, i, n) \ - do { \ - I40E_RX_NEXT_DESC((r), (i), (n)); \ - prefetch((n)); \ - } while (0) - -#define I40E_MAX_BUFFER_TXD 8 -#define I40E_MIN_TX_LEN 17 - -/* The size limit for a transmit buffer in a descriptor is (16K - 1). - * In order to align with the read requests we will align the value to - * the nearest 4K which represents our maximum read request size. - */ -#define I40E_MAX_READ_REQ_SIZE 4096 -#define I40E_MAX_DATA_PER_TXD (16 * 1024 - 1) -#define I40E_MAX_DATA_PER_TXD_ALIGNED \ - (I40E_MAX_DATA_PER_TXD & ~(I40E_MAX_READ_REQ_SIZE - 1)) - -/** - * i40e_txd_use_count - estimate the number of descriptors needed for Tx - * @size: transmit request size in bytes - * - * Due to hardware alignment restrictions (4K alignment), we need to - * assume that we can have no more than 12K of data per descriptor, even - * though each descriptor can take up to 16K - 1 bytes of aligned memory. - * Thus, we need to divide by 12K. But division is slow! Instead, - * we decompose the operation into shifts and one relatively cheap - * multiply operation. - * - * To divide by 12K, we first divide by 4K, then divide by 3: - * To divide by 4K, shift right by 12 bits - * To divide by 3, multiply by 85, then divide by 256 - * (Divide by 256 is done by shifting right by 8 bits) - * Finally, we add one to round up. Because 256 isn't an exact multiple of - * 3, we'll underestimate near each multiple of 12K. This is actually more - * accurate as we have 4K - 1 of wiggle room that we can fit into the last - * segment. For our purposes this is accurate out to 1M which is orders of - * magnitude greater than our largest possible GSO size. - * - * This would then be implemented as: - * return (((size >> 12) * 85) >> 8) + 1; - * - * Since multiplication and division are commutative, we can reorder - * operations into: - * return ((size * 85) >> 20) + 1; - */ -static inline unsigned int i40e_txd_use_count(unsigned int size) -{ - return ((size * 85) >> 20) + 1; -} - -/* Tx Descriptors needed, worst case */ -#define DESC_NEEDED (MAX_SKB_FRAGS + 6) -#define I40E_MIN_DESC_PENDING 4 - -#define I40E_TX_FLAGS_HW_VLAN BIT(1) -#define I40E_TX_FLAGS_SW_VLAN BIT(2) -#define I40E_TX_FLAGS_TSO BIT(3) -#define I40E_TX_FLAGS_IPV4 BIT(4) -#define I40E_TX_FLAGS_IPV6 BIT(5) -#define I40E_TX_FLAGS_FCCRC BIT(6) -#define I40E_TX_FLAGS_FSO BIT(7) -#define I40E_TX_FLAGS_FD_SB BIT(9) -#define I40E_TX_FLAGS_VXLAN_TUNNEL BIT(10) -#define I40E_TX_FLAGS_VLAN_MASK 0xffff0000 -#define I40E_TX_FLAGS_VLAN_PRIO_MASK 0xe0000000 -#define I40E_TX_FLAGS_VLAN_PRIO_SHIFT 29 -#define I40E_TX_FLAGS_VLAN_SHIFT 16 - -struct i40e_tx_buffer { - struct i40e_tx_desc *next_to_watch; - union { - struct sk_buff *skb; - void *raw_buf; - }; - unsigned int bytecount; - unsigned short gso_segs; - - DEFINE_DMA_UNMAP_ADDR(dma); - DEFINE_DMA_UNMAP_LEN(len); - u32 tx_flags; -}; - -struct i40e_rx_buffer { - dma_addr_t dma; - struct page *page; -#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) - __u32 page_offset; -#else - __u16 page_offset; -#endif - __u16 pagecnt_bias; -}; - -struct i40e_queue_stats { - u64 packets; - u64 bytes; -}; - -struct i40e_tx_queue_stats { - u64 restart_queue; - u64 tx_busy; - u64 tx_done_old; - u64 tx_linearize; - u64 tx_force_wb; - int prev_pkt_ctr; - u64 tx_lost_interrupt; -}; - -struct i40e_rx_queue_stats { - u64 non_eop_descs; - u64 alloc_page_failed; - u64 alloc_buff_failed; - u64 page_reuse_count; - u64 realloc_count; -}; - -enum i40e_ring_state_t { - __I40E_TX_FDIR_INIT_DONE, - __I40E_TX_XPS_INIT_DONE, - __I40E_RING_STATE_NBITS /* must be last */ -}; - -/* some useful defines for virtchannel interface, which - * is the only remaining user of header split - */ -#define I40E_RX_DTYPE_NO_SPLIT 0 -#define I40E_RX_DTYPE_HEADER_SPLIT 1 -#define I40E_RX_DTYPE_SPLIT_ALWAYS 2 -#define I40E_RX_SPLIT_L2 0x1 -#define I40E_RX_SPLIT_IP 0x2 -#define I40E_RX_SPLIT_TCP_UDP 0x4 -#define I40E_RX_SPLIT_SCTP 0x8 - -/* struct that defines a descriptor ring, associated with a VSI */ -struct i40e_ring { - struct i40e_ring *next; /* pointer to next ring in q_vector */ - void *desc; /* Descriptor ring memory */ - struct device *dev; /* Used for DMA mapping */ - struct net_device *netdev; /* netdev ring maps to */ - union { - struct i40e_tx_buffer *tx_bi; - struct i40e_rx_buffer *rx_bi; - }; - DECLARE_BITMAP(state, __I40E_RING_STATE_NBITS); - u16 queue_index; /* Queue number of ring */ - u8 dcb_tc; /* Traffic class of ring */ - u8 __iomem *tail; - - /* high bit set means dynamic, use accessors routines to read/write. - * hardware only supports 2us resolution for the ITR registers. - * these values always store the USER setting, and must be converted - * before programming to a register. - */ - u16 itr_setting; - - u16 count; /* Number of descriptors */ - u16 reg_idx; /* HW register index of the ring */ - u16 rx_buf_len; - - /* used in interrupt processing */ - u16 next_to_use; - u16 next_to_clean; - - u8 atr_sample_rate; - u8 atr_count; - - bool ring_active; /* is ring online or not */ - bool arm_wb; /* do something to arm write back */ - u8 packet_stride; - - u16 flags; -#define I40E_TXR_FLAGS_WB_ON_ITR BIT(0) -#define I40E_RXR_FLAGS_BUILD_SKB_ENABLED BIT(1) - - /* stats structs */ - struct i40e_queue_stats stats; - struct u64_stats_sync syncp; - union { - struct i40e_tx_queue_stats tx_stats; - struct i40e_rx_queue_stats rx_stats; - }; - - unsigned int size; /* length of descriptor ring in bytes */ - dma_addr_t dma; /* physical address of ring */ - - struct i40e_vsi *vsi; /* Backreference to associated VSI */ - struct i40e_q_vector *q_vector; /* Backreference to associated vector */ - - struct rcu_head rcu; /* to avoid race on free */ - u16 next_to_alloc; - struct sk_buff *skb; /* When iavf_clean_rx_ring_irq() must - * return before it sees the EOP for - * the current packet, we save that skb - * here and resume receiving this - * packet the next time - * iavf_clean_rx_ring_irq() is called - * for this ring. - */ -} ____cacheline_internodealigned_in_smp; - -static inline bool ring_uses_build_skb(struct i40e_ring *ring) -{ - return !!(ring->flags & I40E_RXR_FLAGS_BUILD_SKB_ENABLED); -} - -static inline void set_ring_build_skb_enabled(struct i40e_ring *ring) -{ - ring->flags |= I40E_RXR_FLAGS_BUILD_SKB_ENABLED; -} - -static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring) -{ - ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED; -} - -#define I40E_ITR_ADAPTIVE_MIN_INC 0x0002 -#define I40E_ITR_ADAPTIVE_MIN_USECS 0x0002 -#define I40E_ITR_ADAPTIVE_MAX_USECS 0x007e -#define I40E_ITR_ADAPTIVE_LATENCY 0x8000 -#define I40E_ITR_ADAPTIVE_BULK 0x0000 -#define ITR_IS_BULK(x) (!((x) & I40E_ITR_ADAPTIVE_LATENCY)) - -struct i40e_ring_container { - struct i40e_ring *ring; /* pointer to linked list of ring(s) */ - unsigned long next_update; /* jiffies value of next update */ - unsigned int total_bytes; /* total bytes processed this int */ - unsigned int total_packets; /* total packets processed this int */ - u16 count; - u16 target_itr; /* target ITR setting for ring(s) */ - u16 current_itr; /* current ITR setting for ring(s) */ -}; - -/* iterator for handling rings in ring container */ -#define i40e_for_each_ring(pos, head) \ - for (pos = (head).ring; pos != NULL; pos = pos->next) - -static inline unsigned int i40e_rx_pg_order(struct i40e_ring *ring) -{ -#if (PAGE_SIZE < 8192) - if (ring->rx_buf_len > (PAGE_SIZE / 2)) - return 1; -#endif - return 0; -} - -#define i40e_rx_pg_size(_ring) (PAGE_SIZE << i40e_rx_pg_order(_ring)) - -bool iavf_alloc_rx_buffers(struct i40e_ring *rxr, u16 cleaned_count); -netdev_tx_t iavf_xmit_frame(struct sk_buff *skb, struct net_device *netdev); -void iavf_clean_tx_ring(struct i40e_ring *tx_ring); -void iavf_clean_rx_ring(struct i40e_ring *rx_ring); -int iavf_setup_tx_descriptors(struct i40e_ring *tx_ring); -int iavf_setup_rx_descriptors(struct i40e_ring *rx_ring); -void iavf_free_tx_resources(struct i40e_ring *tx_ring); -void iavf_free_rx_resources(struct i40e_ring *rx_ring); -int iavf_napi_poll(struct napi_struct *napi, int budget); -void iavf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector); -u32 iavf_get_tx_pending(struct i40e_ring *ring, bool in_sw); -void iavf_detect_recover_hung(struct i40e_vsi *vsi); -int __iavf_maybe_stop_tx(struct i40e_ring *tx_ring, int size); -bool __iavf_chk_linearize(struct sk_buff *skb); - -/** - * i40e_xmit_descriptor_count - calculate number of Tx descriptors needed - * @skb: send buffer - * @tx_ring: ring to send buffer on - * - * Returns number of data descriptors needed for this skb. Returns 0 to indicate - * there is not enough descriptors available in this ring since we need at least - * one descriptor. - **/ -static inline int i40e_xmit_descriptor_count(struct sk_buff *skb) -{ - const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; - unsigned int nr_frags = skb_shinfo(skb)->nr_frags; - int count = 0, size = skb_headlen(skb); - - for (;;) { - count += i40e_txd_use_count(size); - - if (!nr_frags--) - break; - - size = skb_frag_size(frag++); - } - - return count; -} - -/** - * i40e_maybe_stop_tx - 1st level check for Tx stop conditions - * @tx_ring: the ring to be checked - * @size: the size buffer we want to assure is available - * - * Returns 0 if stop is not needed - **/ -static inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) -{ - if (likely(I40E_DESC_UNUSED(tx_ring) >= size)) - return 0; - return __iavf_maybe_stop_tx(tx_ring, size); -} - -/** - * i40e_chk_linearize - Check if there are more than 8 fragments per packet - * @skb: send buffer - * @count: number of buffers used - * - * Note: Our HW can't scatter-gather more than 8 fragments to build - * a packet on the wire and so we need to figure out the cases where we - * need to linearize the skb. - **/ -static inline bool i40e_chk_linearize(struct sk_buff *skb, int count) -{ - /* Both TSO and single send will work if count is less than 8 */ - if (likely(count < I40E_MAX_BUFFER_TXD)) - return false; - - if (skb_is_gso(skb)) - return __iavf_chk_linearize(skb); - - /* we can support up to 8 data buffers for a single send */ - return count != I40E_MAX_BUFFER_TXD; -} -/** - * @ring: Tx ring to find the netdev equivalent of - **/ -static inline struct netdev_queue *txring_txq(const struct i40e_ring *ring) -{ - return netdev_get_tx_queue(ring->netdev, ring->queue_index); -} -#endif /* _I40E_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/i40evf.h b/drivers/net/ethernet/intel/iavf/i40evf.h deleted file mode 100644 index 3be326499b8c6..0000000000000 --- a/drivers/net/ethernet/intel/iavf/i40evf.h +++ /dev/null @@ -1,427 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright(c) 2013 - 2018 Intel Corporation. */ - -#ifndef _IAVF_H_ -#define _IAVF_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "i40e_type.h" -#include -#include "i40e_txrx.h" - -#define DEFAULT_DEBUG_LEVEL_SHIFT 3 -#define PFX "iavf: " - -/* VSI state flags shared with common code */ -enum iavf_vsi_state_t { - __I40E_VSI_DOWN, - /* This must be last as it determines the size of the BITMAP */ - __I40E_VSI_STATE_SIZE__, -}; - -/* dummy struct to make common code less painful */ -struct i40e_vsi { - struct iavf_adapter *back; - struct net_device *netdev; - unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; - u16 seid; - u16 id; - DECLARE_BITMAP(state, __I40E_VSI_STATE_SIZE__); - int base_vector; - u16 work_limit; - u16 qs_handle; - void *priv; /* client driver data reference. */ -}; - -/* How many Rx Buffers do we bundle into one write to the hardware ? */ -#define IAVF_RX_BUFFER_WRITE 16 /* Must be power of 2 */ -#define IAVF_DEFAULT_TXD 512 -#define IAVF_DEFAULT_RXD 512 -#define IAVF_MAX_TXD 4096 -#define IAVF_MIN_TXD 64 -#define IAVF_MAX_RXD 4096 -#define IAVF_MIN_RXD 64 -#define IAVF_REQ_DESCRIPTOR_MULTIPLE 32 -#define IAVF_MAX_AQ_BUF_SIZE 4096 -#define IAVF_AQ_LEN 32 -#define IAVF_AQ_MAX_ERR 20 /* times to try before resetting AQ */ - -#define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN) - -#define I40E_RX_DESC(R, i) (&(((union i40e_32byte_rx_desc *)((R)->desc))[i])) -#define I40E_TX_DESC(R, i) (&(((struct i40e_tx_desc *)((R)->desc))[i])) -#define I40E_TX_CTXTDESC(R, i) \ - (&(((struct i40e_tx_context_desc *)((R)->desc))[i])) -#define IAVF_MAX_REQ_QUEUES 4 - -#define IAVF_HKEY_ARRAY_SIZE ((I40E_VFQF_HKEY_MAX_INDEX + 1) * 4) -#define IAVF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT_MAX_INDEX + 1) * 4) -#define IAVF_MBPS_DIVISOR 125000 /* divisor to convert to Mbps */ - -/* MAX_MSIX_Q_VECTORS of these are allocated, - * but we only use one per queue-specific vector. - */ -struct i40e_q_vector { - struct iavf_adapter *adapter; - struct i40e_vsi *vsi; - struct napi_struct napi; - struct i40e_ring_container rx; - struct i40e_ring_container tx; - u32 ring_mask; - u8 itr_countdown; /* when 0 should adjust adaptive ITR */ - u8 num_ringpairs; /* total number of ring pairs in vector */ - u16 v_idx; /* index in the vsi->q_vector array. */ - u16 reg_idx; /* register index of the interrupt */ - char name[IFNAMSIZ + 15]; - bool arm_wb_state; - cpumask_t affinity_mask; - struct irq_affinity_notify affinity_notify; -}; - -/* Helper macros to switch between ints/sec and what the register uses. - * And yes, it's the same math going both ways. The lowest value - * supported by all of the i40e hardware is 8. - */ -#define EITR_INTS_PER_SEC_TO_REG(_eitr) \ - ((_eitr) ? (1000000000 / ((_eitr) * 256)) : 8) -#define EITR_REG_TO_INTS_PER_SEC EITR_INTS_PER_SEC_TO_REG - -#define IAVF_DESC_UNUSED(R) \ - ((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ - (R)->next_to_clean - (R)->next_to_use - 1) - -#define IAVF_RX_DESC_ADV(R, i) \ - (&(((union i40e_adv_rx_desc *)((R).desc))[i])) -#define IAVF_TX_DESC_ADV(R, i) \ - (&(((union i40e_adv_tx_desc *)((R).desc))[i])) -#define IAVF_TX_CTXTDESC_ADV(R, i) \ - (&(((struct i40e_adv_tx_context_desc *)((R).desc))[i])) - -#define OTHER_VECTOR 1 -#define NONQ_VECS (OTHER_VECTOR) - -#define MIN_MSIX_Q_VECTORS 1 -#define MIN_MSIX_COUNT (MIN_MSIX_Q_VECTORS + NONQ_VECS) - -#define IAVF_QUEUE_END_OF_LIST 0x7FF -#define IAVF_FREE_VECTOR 0x7FFF -struct iavf_mac_filter { - struct list_head list; - u8 macaddr[ETH_ALEN]; - bool remove; /* filter needs to be removed */ - bool add; /* filter needs to be added */ -}; - -struct iavf_vlan_filter { - struct list_head list; - u16 vlan; - bool remove; /* filter needs to be removed */ - bool add; /* filter needs to be added */ -}; - -#define IAVF_MAX_TRAFFIC_CLASS 4 -/* State of traffic class creation */ -enum iavf_tc_state_t { - __IAVF_TC_INVALID, /* no traffic class, default state */ - __IAVF_TC_RUNNING, /* traffic classes have been created */ -}; - -/* channel info */ -struct iavf_channel_config { - struct virtchnl_channel_info ch_info[IAVF_MAX_TRAFFIC_CLASS]; - enum iavf_tc_state_t state; - u8 total_qps; -}; - -/* State of cloud filter */ -enum iavf_cloud_filter_state_t { - __IAVF_CF_INVALID, /* cloud filter not added */ - __IAVF_CF_ADD_PENDING, /* cloud filter pending add by the PF */ - __IAVF_CF_DEL_PENDING, /* cloud filter pending del by the PF */ - __IAVF_CF_ACTIVE, /* cloud filter is active */ -}; - -/* Driver state. The order of these is important! */ -enum iavf_state_t { - __IAVF_STARTUP, /* driver loaded, probe complete */ - __IAVF_REMOVE, /* driver is being unloaded */ - __IAVF_INIT_VERSION_CHECK, /* aq msg sent, awaiting reply */ - __IAVF_INIT_GET_RESOURCES, /* aq msg sent, awaiting reply */ - __IAVF_INIT_SW, /* got resources, setting up structs */ - __IAVF_RESETTING, /* in reset */ - /* Below here, watchdog is running */ - __IAVF_DOWN, /* ready, can be opened */ - __IAVF_DOWN_PENDING, /* descending, waiting for watchdog */ - __IAVF_TESTING, /* in ethtool self-test */ - __IAVF_RUNNING, /* opened, working */ -}; - -enum iavf_critical_section_t { - __IAVF_IN_CRITICAL_TASK, /* cannot be interrupted */ - __IAVF_IN_CLIENT_TASK, - __IAVF_IN_REMOVE_TASK, /* device being removed */ -}; - -#define IAVF_CLOUD_FIELD_OMAC 0x01 -#define IAVF_CLOUD_FIELD_IMAC 0x02 -#define IAVF_CLOUD_FIELD_IVLAN 0x04 -#define IAVF_CLOUD_FIELD_TEN_ID 0x08 -#define IAVF_CLOUD_FIELD_IIP 0x10 - -#define IAVF_CF_FLAGS_OMAC IAVF_CLOUD_FIELD_OMAC -#define IAVF_CF_FLAGS_IMAC IAVF_CLOUD_FIELD_IMAC -#define IAVF_CF_FLAGS_IMAC_IVLAN (IAVF_CLOUD_FIELD_IMAC |\ - IAVF_CLOUD_FIELD_IVLAN) -#define IAVF_CF_FLAGS_IMAC_TEN_ID (IAVF_CLOUD_FIELD_IMAC |\ - IAVF_CLOUD_FIELD_TEN_ID) -#define IAVF_CF_FLAGS_OMAC_TEN_ID_IMAC (IAVF_CLOUD_FIELD_OMAC |\ - IAVF_CLOUD_FIELD_IMAC |\ - IAVF_CLOUD_FIELD_TEN_ID) -#define IAVF_CF_FLAGS_IMAC_IVLAN_TEN_ID (IAVF_CLOUD_FIELD_IMAC |\ - IAVF_CLOUD_FIELD_IVLAN |\ - IAVF_CLOUD_FIELD_TEN_ID) -#define IAVF_CF_FLAGS_IIP I40E_CLOUD_FIELD_IIP - -/* bookkeeping of cloud filters */ -struct iavf_cloud_filter { - enum iavf_cloud_filter_state_t state; - struct list_head list; - struct virtchnl_filter f; - unsigned long cookie; - bool del; /* filter needs to be deleted */ - bool add; /* filter needs to be added */ -}; - -/* board specific private data structure */ -struct iavf_adapter { - struct timer_list watchdog_timer; - struct work_struct reset_task; - struct work_struct adminq_task; - struct delayed_work client_task; - struct delayed_work init_task; - wait_queue_head_t down_waitqueue; - struct i40e_q_vector *q_vectors; - struct list_head vlan_filter_list; - struct list_head mac_filter_list; - /* Lock to protect accesses to MAC and VLAN lists */ - spinlock_t mac_vlan_list_lock; - char misc_vector_name[IFNAMSIZ + 9]; - int num_active_queues; - int num_req_queues; - - /* TX */ - struct i40e_ring *tx_rings; - u32 tx_timeout_count; - u32 tx_desc_count; - - /* RX */ - struct i40e_ring *rx_rings; - u64 hw_csum_rx_error; - u32 rx_desc_count; - int num_msix_vectors; - int num_iwarp_msix; - int iwarp_base_vector; - u32 client_pending; - struct i40e_client_instance *cinst; - struct msix_entry *msix_entries; - - u32 flags; -#define IAVF_FLAG_RX_CSUM_ENABLED BIT(0) -#define IAVF_FLAG_PF_COMMS_FAILED BIT(3) -#define IAVF_FLAG_RESET_PENDING BIT(4) -#define IAVF_FLAG_RESET_NEEDED BIT(5) -#define IAVF_FLAG_WB_ON_ITR_CAPABLE BIT(6) -#define IAVF_FLAG_ADDR_SET_BY_PF BIT(8) -#define IAVF_FLAG_SERVICE_CLIENT_REQUESTED BIT(9) -#define IAVF_FLAG_CLIENT_NEEDS_OPEN BIT(10) -#define IAVF_FLAG_CLIENT_NEEDS_CLOSE BIT(11) -#define IAVF_FLAG_CLIENT_NEEDS_L2_PARAMS BIT(12) -#define IAVF_FLAG_PROMISC_ON BIT(13) -#define IAVF_FLAG_ALLMULTI_ON BIT(14) -#define IAVF_FLAG_LEGACY_RX BIT(15) -#define IAVF_FLAG_REINIT_ITR_NEEDED BIT(16) -#define IAVF_FLAG_QUEUES_DISABLED BIT(17) -/* duplicates for common code */ -#define I40E_FLAG_DCB_ENABLED 0 -#define I40E_FLAG_RX_CSUM_ENABLED IAVF_FLAG_RX_CSUM_ENABLED -#define I40E_FLAG_LEGACY_RX IAVF_FLAG_LEGACY_RX - /* flags for admin queue service task */ - u32 aq_required; -#define IAVF_FLAG_AQ_ENABLE_QUEUES BIT(0) -#define IAVF_FLAG_AQ_DISABLE_QUEUES BIT(1) -#define IAVF_FLAG_AQ_ADD_MAC_FILTER BIT(2) -#define IAVF_FLAG_AQ_ADD_VLAN_FILTER BIT(3) -#define IAVF_FLAG_AQ_DEL_MAC_FILTER BIT(4) -#define IAVF_FLAG_AQ_DEL_VLAN_FILTER BIT(5) -#define IAVF_FLAG_AQ_CONFIGURE_QUEUES BIT(6) -#define IAVF_FLAG_AQ_MAP_VECTORS BIT(7) -#define IAVF_FLAG_AQ_HANDLE_RESET BIT(8) -#define IAVF_FLAG_AQ_CONFIGURE_RSS BIT(9) /* direct AQ config */ -#define IAVF_FLAG_AQ_GET_CONFIG BIT(10) -/* Newer style, RSS done by the PF so we can ignore hardware vagaries. */ -#define IAVF_FLAG_AQ_GET_HENA BIT(11) -#define IAVF_FLAG_AQ_SET_HENA BIT(12) -#define IAVF_FLAG_AQ_SET_RSS_KEY BIT(13) -#define IAVF_FLAG_AQ_SET_RSS_LUT BIT(14) -#define IAVF_FLAG_AQ_REQUEST_PROMISC BIT(15) -#define IAVF_FLAG_AQ_RELEASE_PROMISC BIT(16) -#define IAVF_FLAG_AQ_REQUEST_ALLMULTI BIT(17) -#define IAVF_FLAG_AQ_RELEASE_ALLMULTI BIT(18) -#define IAVF_FLAG_AQ_ENABLE_VLAN_STRIPPING BIT(19) -#define IAVF_FLAG_AQ_DISABLE_VLAN_STRIPPING BIT(20) -#define IAVF_FLAG_AQ_ENABLE_CHANNELS BIT(21) -#define IAVF_FLAG_AQ_DISABLE_CHANNELS BIT(22) -#define IAVF_FLAG_AQ_ADD_CLOUD_FILTER BIT(23) -#define IAVF_FLAG_AQ_DEL_CLOUD_FILTER BIT(24) - - /* OS defined structs */ - struct net_device *netdev; - struct pci_dev *pdev; - - struct i40e_hw hw; /* defined in i40e_type.h */ - - enum iavf_state_t state; - unsigned long crit_section; - - struct work_struct watchdog_task; - bool netdev_registered; - bool link_up; - enum virtchnl_link_speed link_speed; - enum virtchnl_ops current_op; -#define CLIENT_ALLOWED(_a) ((_a)->vf_res ? \ - (_a)->vf_res->vf_cap_flags & \ - VIRTCHNL_VF_OFFLOAD_IWARP : \ - 0) -#define CLIENT_ENABLED(_a) ((_a)->cinst) -/* RSS by the PF should be preferred over RSS via other methods. */ -#define RSS_PF(_a) ((_a)->vf_res->vf_cap_flags & \ - VIRTCHNL_VF_OFFLOAD_RSS_PF) -#define RSS_AQ(_a) ((_a)->vf_res->vf_cap_flags & \ - VIRTCHNL_VF_OFFLOAD_RSS_AQ) -#define RSS_REG(_a) (!((_a)->vf_res->vf_cap_flags & \ - (VIRTCHNL_VF_OFFLOAD_RSS_AQ | \ - VIRTCHNL_VF_OFFLOAD_RSS_PF))) -#define VLAN_ALLOWED(_a) ((_a)->vf_res->vf_cap_flags & \ - VIRTCHNL_VF_OFFLOAD_VLAN) - struct virtchnl_vf_resource *vf_res; /* incl. all VSIs */ - struct virtchnl_vsi_resource *vsi_res; /* our LAN VSI */ - struct virtchnl_version_info pf_version; -#define PF_IS_V11(_a) (((_a)->pf_version.major == 1) && \ - ((_a)->pf_version.minor == 1)) - u16 msg_enable; - struct i40e_eth_stats current_stats; - struct i40e_vsi vsi; - u32 aq_wait_count; - /* RSS stuff */ - u64 hena; - u16 rss_key_size; - u16 rss_lut_size; - u8 *rss_key; - u8 *rss_lut; - /* ADQ related members */ - struct iavf_channel_config ch_config; - u8 num_tc; - struct list_head cloud_filter_list; - /* lock to protest access to the cloud filter list */ - spinlock_t cloud_filter_list_lock; - u16 num_cloud_filters; -}; - - -/* Ethtool Private Flags */ - -/* lan device */ -struct i40e_device { - struct list_head list; - struct iavf_adapter *vf; -}; - -/* needed by iavf_ethtool.c */ -extern char iavf_driver_name[]; -extern const char iavf_driver_version[]; - -int iavf_up(struct iavf_adapter *adapter); -void iavf_down(struct iavf_adapter *adapter); -int iavf_process_config(struct iavf_adapter *adapter); -void iavf_schedule_reset(struct iavf_adapter *adapter); -void iavf_reset(struct iavf_adapter *adapter); -void iavf_set_ethtool_ops(struct net_device *netdev); -void iavf_update_stats(struct iavf_adapter *adapter); -void iavf_reset_interrupt_capability(struct iavf_adapter *adapter); -int iavf_init_interrupt_scheme(struct iavf_adapter *adapter); -void iavf_irq_enable_queues(struct iavf_adapter *adapter, u32 mask); -void iavf_free_all_tx_resources(struct iavf_adapter *adapter); -void iavf_free_all_rx_resources(struct iavf_adapter *adapter); - -void i40e_napi_add_all(struct iavf_adapter *adapter); -void i40e_napi_del_all(struct iavf_adapter *adapter); - -int iavf_send_api_ver(struct iavf_adapter *adapter); -int iavf_verify_api_ver(struct iavf_adapter *adapter); -int iavf_send_vf_config_msg(struct iavf_adapter *adapter); -int iavf_get_vf_config(struct iavf_adapter *adapter); -void iavf_irq_enable(struct iavf_adapter *adapter, bool flush); -void iavf_configure_queues(struct iavf_adapter *adapter); -void iavf_deconfigure_queues(struct iavf_adapter *adapter); -void iavf_enable_queues(struct iavf_adapter *adapter); -void iavf_disable_queues(struct iavf_adapter *adapter); -void iavf_map_queues(struct iavf_adapter *adapter); -int iavf_request_queues(struct iavf_adapter *adapter, int num); -void iavf_add_ether_addrs(struct iavf_adapter *adapter); -void iavf_del_ether_addrs(struct iavf_adapter *adapter); -void iavf_add_vlans(struct iavf_adapter *adapter); -void iavf_del_vlans(struct iavf_adapter *adapter); -void iavf_set_promiscuous(struct iavf_adapter *adapter, int flags); -void iavf_request_stats(struct iavf_adapter *adapter); -void iavf_request_reset(struct iavf_adapter *adapter); -void iavf_get_hena(struct iavf_adapter *adapter); -void iavf_set_hena(struct iavf_adapter *adapter); -void iavf_set_rss_key(struct iavf_adapter *adapter); -void iavf_set_rss_lut(struct iavf_adapter *adapter); -void iavf_enable_vlan_stripping(struct iavf_adapter *adapter); -void iavf_disable_vlan_stripping(struct iavf_adapter *adapter); -void iavf_virtchnl_completion(struct iavf_adapter *adapter, - enum virtchnl_ops v_opcode, - iavf_status v_retval, u8 *msg, u16 msglen); -int iavf_config_rss(struct iavf_adapter *adapter); -int iavf_lan_add_device(struct iavf_adapter *adapter); -int iavf_lan_del_device(struct iavf_adapter *adapter); -void iavf_client_subtask(struct iavf_adapter *adapter); -void iavf_notify_client_message(struct i40e_vsi *vsi, u8 *msg, u16 len); -void iavf_notify_client_l2_params(struct i40e_vsi *vsi); -void iavf_notify_client_open(struct i40e_vsi *vsi); -void iavf_notify_client_close(struct i40e_vsi *vsi, bool reset); -void iavf_enable_channels(struct iavf_adapter *adapter); -void iavf_disable_channels(struct iavf_adapter *adapter); -void iavf_add_cloud_filter(struct iavf_adapter *adapter); -void iavf_del_cloud_filter(struct iavf_adapter *adapter); -#endif /* _IAVF_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/i40evf_client.c b/drivers/net/ethernet/intel/iavf/i40evf_client.c deleted file mode 100644 index 4c3e9b5d547b4..0000000000000 --- a/drivers/net/ethernet/intel/iavf/i40evf_client.c +++ /dev/null @@ -1,579 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright(c) 2013 - 2018 Intel Corporation. */ - -#include -#include - -#include "i40evf.h" -#include "i40e_prototype.h" -#include "i40evf_client.h" - -static -const char iavf_client_interface_version_str[] = IAVF_CLIENT_VERSION_STR; -static struct i40e_client *vf_registered_client; -static LIST_HEAD(iavf_devices); -static DEFINE_MUTEX(iavf_device_mutex); - -static u32 iavf_client_virtchnl_send(struct i40e_info *ldev, - struct i40e_client *client, - u8 *msg, u16 len); - -static int iavf_client_setup_qvlist(struct i40e_info *ldev, - struct i40e_client *client, - struct i40e_qvlist_info *qvlist_info); - -static struct i40e_ops iavf_lan_ops = { - .virtchnl_send = iavf_client_virtchnl_send, - .setup_qvlist = iavf_client_setup_qvlist, -}; - -/** - * iavf_client_get_params - retrieve relevant client parameters - * @vsi: VSI with parameters - * @params: client param struct - **/ -static -void iavf_client_get_params(struct i40e_vsi *vsi, struct i40e_params *params) -{ - int i; - - memset(params, 0, sizeof(struct i40e_params)); - params->mtu = vsi->netdev->mtu; - params->link_up = vsi->back->link_up; - - for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) { - params->qos.prio_qos[i].tc = 0; - params->qos.prio_qos[i].qs_handle = vsi->qs_handle; - } -} - -/** - * iavf_notify_client_message - call the client message receive callback - * @vsi: the VSI associated with this client - * @msg: message buffer - * @len: length of message - * - * If there is a client to this VSI, call the client - **/ -void iavf_notify_client_message(struct i40e_vsi *vsi, u8 *msg, u16 len) -{ - struct i40e_client_instance *cinst; - - if (!vsi) - return; - - cinst = vsi->back->cinst; - if (!cinst || !cinst->client || !cinst->client->ops || - !cinst->client->ops->virtchnl_receive) { - dev_dbg(&vsi->back->pdev->dev, - "Cannot locate client instance virtchnl_receive function\n"); - return; - } - cinst->client->ops->virtchnl_receive(&cinst->lan_info, cinst->client, - msg, len); -} - -/** - * iavf_notify_client_l2_params - call the client notify callback - * @vsi: the VSI with l2 param changes - * - * If there is a client to this VSI, call the client - **/ -void iavf_notify_client_l2_params(struct i40e_vsi *vsi) -{ - struct i40e_client_instance *cinst; - struct i40e_params params; - - if (!vsi) - return; - - cinst = vsi->back->cinst; - - if (!cinst || !cinst->client || !cinst->client->ops || - !cinst->client->ops->l2_param_change) { - dev_dbg(&vsi->back->pdev->dev, - "Cannot locate client instance l2_param_change function\n"); - return; - } - iavf_client_get_params(vsi, ¶ms); - cinst->lan_info.params = params; - cinst->client->ops->l2_param_change(&cinst->lan_info, cinst->client, - ¶ms); -} - -/** - * iavf_notify_client_open - call the client open callback - * @vsi: the VSI with netdev opened - * - * If there is a client to this netdev, call the client with open - **/ -void iavf_notify_client_open(struct i40e_vsi *vsi) -{ - struct iavf_adapter *adapter = vsi->back; - struct i40e_client_instance *cinst = adapter->cinst; - int ret; - - if (!cinst || !cinst->client || !cinst->client->ops || - !cinst->client->ops->open) { - dev_dbg(&vsi->back->pdev->dev, - "Cannot locate client instance open function\n"); - return; - } - if (!(test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state))) { - ret = cinst->client->ops->open(&cinst->lan_info, cinst->client); - if (!ret) - set_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state); - } -} - -/** - * iavf_client_release_qvlist - send a message to the PF to release iwarp qv map - * @ldev: pointer to L2 context. - * - * Return 0 on success or < 0 on error - **/ -static int iavf_client_release_qvlist(struct i40e_info *ldev) -{ - struct iavf_adapter *adapter = ldev->vf; - iavf_status err; - - if (adapter->aq_required) - return -EAGAIN; - - err = iavf_aq_send_msg_to_pf(&adapter->hw, - VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP, - I40E_SUCCESS, NULL, 0, NULL); - - if (err) - dev_err(&adapter->pdev->dev, - "Unable to send iWarp vector release message to PF, error %d, aq status %d\n", - err, adapter->hw.aq.asq_last_status); - - return err; -} - -/** - * iavf_notify_client_close - call the client close callback - * @vsi: the VSI with netdev closed - * @reset: true when close called due to reset pending - * - * If there is a client to this netdev, call the client with close - **/ -void iavf_notify_client_close(struct i40e_vsi *vsi, bool reset) -{ - struct iavf_adapter *adapter = vsi->back; - struct i40e_client_instance *cinst = adapter->cinst; - - if (!cinst || !cinst->client || !cinst->client->ops || - !cinst->client->ops->close) { - dev_dbg(&vsi->back->pdev->dev, - "Cannot locate client instance close function\n"); - return; - } - cinst->client->ops->close(&cinst->lan_info, cinst->client, reset); - iavf_client_release_qvlist(&cinst->lan_info); - clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state); -} - -/** - * iavf_client_add_instance - add a client instance to the instance list - * @adapter: pointer to the board struct - * - * Returns cinst ptr on success, NULL on failure - **/ -static struct i40e_client_instance * -iavf_client_add_instance(struct iavf_adapter *adapter) -{ - struct i40e_client_instance *cinst = NULL; - struct i40e_vsi *vsi = &adapter->vsi; - struct netdev_hw_addr *mac = NULL; - struct i40e_params params; - - if (!vf_registered_client) - goto out; - - if (adapter->cinst) { - cinst = adapter->cinst; - goto out; - } - - cinst = kzalloc(sizeof(*cinst), GFP_KERNEL); - if (!cinst) - goto out; - - cinst->lan_info.vf = (void *)adapter; - cinst->lan_info.netdev = vsi->netdev; - cinst->lan_info.pcidev = adapter->pdev; - cinst->lan_info.fid = 0; - cinst->lan_info.ftype = I40E_CLIENT_FTYPE_VF; - cinst->lan_info.hw_addr = adapter->hw.hw_addr; - cinst->lan_info.ops = &iavf_lan_ops; - cinst->lan_info.version.major = IAVF_CLIENT_VERSION_MAJOR; - cinst->lan_info.version.minor = IAVF_CLIENT_VERSION_MINOR; - cinst->lan_info.version.build = IAVF_CLIENT_VERSION_BUILD; - iavf_client_get_params(vsi, ¶ms); - cinst->lan_info.params = params; - set_bit(__I40E_CLIENT_INSTANCE_NONE, &cinst->state); - - cinst->lan_info.msix_count = adapter->num_iwarp_msix; - cinst->lan_info.msix_entries = - &adapter->msix_entries[adapter->iwarp_base_vector]; - - mac = list_first_entry(&cinst->lan_info.netdev->dev_addrs.list, - struct netdev_hw_addr, list); - if (mac) - ether_addr_copy(cinst->lan_info.lanmac, mac->addr); - else - dev_err(&adapter->pdev->dev, "MAC address list is empty!\n"); - - cinst->client = vf_registered_client; - adapter->cinst = cinst; -out: - return cinst; -} - -/** - * iavf_client_del_instance - removes a client instance from the list - * @adapter: pointer to the board struct - * - **/ -static -void iavf_client_del_instance(struct iavf_adapter *adapter) -{ - kfree(adapter->cinst); - adapter->cinst = NULL; -} - -/** - * iavf_client_subtask - client maintenance work - * @adapter: board private structure - **/ -void iavf_client_subtask(struct iavf_adapter *adapter) -{ - struct i40e_client *client = vf_registered_client; - struct i40e_client_instance *cinst; - int ret = 0; - - if (adapter->state < __IAVF_DOWN) - return; - - /* first check client is registered */ - if (!client) - return; - - /* Add the client instance to the instance list */ - cinst = iavf_client_add_instance(adapter); - if (!cinst) - return; - - dev_info(&adapter->pdev->dev, "Added instance of Client %s\n", - client->name); - - if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state)) { - /* Send an Open request to the client */ - - if (client->ops && client->ops->open) - ret = client->ops->open(&cinst->lan_info, client); - if (!ret) - set_bit(__I40E_CLIENT_INSTANCE_OPENED, - &cinst->state); - else - /* remove client instance */ - iavf_client_del_instance(adapter); - } -} - -/** - * iavf_lan_add_device - add a lan device struct to the list of lan devices - * @adapter: pointer to the board struct - * - * Returns 0 on success or none 0 on error - **/ -int iavf_lan_add_device(struct iavf_adapter *adapter) -{ - struct i40e_device *ldev; - int ret = 0; - - mutex_lock(&iavf_device_mutex); - list_for_each_entry(ldev, &iavf_devices, list) { - if (ldev->vf == adapter) { - ret = -EEXIST; - goto out; - } - } - ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); - if (!ldev) { - ret = -ENOMEM; - goto out; - } - ldev->vf = adapter; - INIT_LIST_HEAD(&ldev->list); - list_add(&ldev->list, &iavf_devices); - dev_info(&adapter->pdev->dev, "Added LAN device bus=0x%02x dev=0x%02x func=0x%02x\n", - adapter->hw.bus.bus_id, adapter->hw.bus.device, - adapter->hw.bus.func); - - /* Since in some cases register may have happened before a device gets - * added, we can schedule a subtask to go initiate the clients. - */ - adapter->flags |= IAVF_FLAG_SERVICE_CLIENT_REQUESTED; - -out: - mutex_unlock(&iavf_device_mutex); - return ret; -} - -/** - * iavf_lan_del_device - removes a lan device from the device list - * @adapter: pointer to the board struct - * - * Returns 0 on success or non-0 on error - **/ -int iavf_lan_del_device(struct iavf_adapter *adapter) -{ - struct i40e_device *ldev, *tmp; - int ret = -ENODEV; - - mutex_lock(&iavf_device_mutex); - list_for_each_entry_safe(ldev, tmp, &iavf_devices, list) { - if (ldev->vf == adapter) { - dev_info(&adapter->pdev->dev, - "Deleted LAN device bus=0x%02x dev=0x%02x func=0x%02x\n", - adapter->hw.bus.bus_id, adapter->hw.bus.device, - adapter->hw.bus.func); - list_del(&ldev->list); - kfree(ldev); - ret = 0; - break; - } - } - - mutex_unlock(&iavf_device_mutex); - return ret; -} - -/** - * iavf_client_release - release client specific resources - * @client: pointer to the registered client - * - **/ -static void iavf_client_release(struct i40e_client *client) -{ - struct i40e_client_instance *cinst; - struct i40e_device *ldev; - struct iavf_adapter *adapter; - - mutex_lock(&iavf_device_mutex); - list_for_each_entry(ldev, &iavf_devices, list) { - adapter = ldev->vf; - cinst = adapter->cinst; - if (!cinst) - continue; - if (test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state)) { - if (client->ops && client->ops->close) - client->ops->close(&cinst->lan_info, client, - false); - iavf_client_release_qvlist(&cinst->lan_info); - clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state); - - dev_warn(&adapter->pdev->dev, - "Client %s instance closed\n", client->name); - } - /* delete the client instance */ - iavf_client_del_instance(adapter); - dev_info(&adapter->pdev->dev, "Deleted client instance of Client %s\n", - client->name); - } - mutex_unlock(&iavf_device_mutex); -} - -/** - * iavf_client_prepare - prepare client specific resources - * @client: pointer to the registered client - * - **/ -static void iavf_client_prepare(struct i40e_client *client) -{ - struct i40e_device *ldev; - struct iavf_adapter *adapter; - - mutex_lock(&iavf_device_mutex); - list_for_each_entry(ldev, &iavf_devices, list) { - adapter = ldev->vf; - /* Signal the watchdog to service the client */ - adapter->flags |= IAVF_FLAG_SERVICE_CLIENT_REQUESTED; - } - mutex_unlock(&iavf_device_mutex); -} - -/** - * iavf_client_virtchnl_send - send a message to the PF instance - * @ldev: pointer to L2 context. - * @client: Client pointer. - * @msg: pointer to message buffer - * @len: message length - * - * Return 0 on success or < 0 on error - **/ -static u32 iavf_client_virtchnl_send(struct i40e_info *ldev, - struct i40e_client *client, - u8 *msg, u16 len) -{ - struct iavf_adapter *adapter = ldev->vf; - iavf_status err; - - if (adapter->aq_required) - return -EAGAIN; - - err = iavf_aq_send_msg_to_pf(&adapter->hw, VIRTCHNL_OP_IWARP, - I40E_SUCCESS, msg, len, NULL); - if (err) - dev_err(&adapter->pdev->dev, "Unable to send iWarp message to PF, error %d, aq status %d\n", - err, adapter->hw.aq.asq_last_status); - - return err; -} - -/** - * iavf_client_setup_qvlist - send a message to the PF to setup iwarp qv map - * @ldev: pointer to L2 context. - * @client: Client pointer. - * @qvlist_info: queue and vector list - * - * Return 0 on success or < 0 on error - **/ -static int iavf_client_setup_qvlist(struct i40e_info *ldev, - struct i40e_client *client, - struct i40e_qvlist_info *qvlist_info) -{ - struct virtchnl_iwarp_qvlist_info *v_qvlist_info; - struct iavf_adapter *adapter = ldev->vf; - struct i40e_qv_info *qv_info; - iavf_status err; - u32 v_idx, i; - u32 msg_size; - - if (adapter->aq_required) - return -EAGAIN; - - /* A quick check on whether the vectors belong to the client */ - for (i = 0; i < qvlist_info->num_vectors; i++) { - qv_info = &qvlist_info->qv_info[i]; - if (!qv_info) - continue; - v_idx = qv_info->v_idx; - if ((v_idx >= - (adapter->iwarp_base_vector + adapter->num_iwarp_msix)) || - (v_idx < adapter->iwarp_base_vector)) - return -EINVAL; - } - - v_qvlist_info = (struct virtchnl_iwarp_qvlist_info *)qvlist_info; - msg_size = sizeof(struct virtchnl_iwarp_qvlist_info) + - (sizeof(struct virtchnl_iwarp_qv_info) * - (v_qvlist_info->num_vectors - 1)); - - adapter->client_pending |= BIT(VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP); - err = iavf_aq_send_msg_to_pf(&adapter->hw, - VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP, I40E_SUCCESS, - (u8 *)v_qvlist_info, msg_size, NULL); - - if (err) { - dev_err(&adapter->pdev->dev, - "Unable to send iWarp vector config message to PF, error %d, aq status %d\n", - err, adapter->hw.aq.asq_last_status); - goto out; - } - - err = -EBUSY; - for (i = 0; i < 5; i++) { - msleep(100); - if (!(adapter->client_pending & - BIT(VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP))) { - err = 0; - break; - } - } -out: - return err; -} - -/** - * iavf_register_client - Register a i40e client driver with the L2 driver - * @client: pointer to the i40e_client struct - * - * Returns 0 on success or non-0 on error - **/ -int iavf_register_client(struct i40e_client *client) -{ - int ret = 0; - - if (!client) { - ret = -EIO; - goto out; - } - - if (strlen(client->name) == 0) { - pr_info("iavf: Failed to register client with no name\n"); - ret = -EIO; - goto out; - } - - if (vf_registered_client) { - pr_info("iavf: Client %s has already been registered!\n", - client->name); - ret = -EEXIST; - goto out; - } - - if ((client->version.major != IAVF_CLIENT_VERSION_MAJOR) || - (client->version.minor != IAVF_CLIENT_VERSION_MINOR)) { - pr_info("iavf: Failed to register client %s due to mismatched client interface version\n", - client->name); - pr_info("Client is using version: %02d.%02d.%02d while LAN driver supports %s\n", - client->version.major, client->version.minor, - client->version.build, - iavf_client_interface_version_str); - ret = -EIO; - goto out; - } - - vf_registered_client = client; - - iavf_client_prepare(client); - - pr_info("iavf: Registered client %s with return code %d\n", - client->name, ret); -out: - return ret; -} -EXPORT_SYMBOL(iavf_register_client); - -/** - * iavf_unregister_client - Unregister a i40e client driver with the L2 driver - * @client: pointer to the i40e_client struct - * - * Returns 0 on success or non-0 on error - **/ -int iavf_unregister_client(struct i40e_client *client) -{ - int ret = 0; - - /* When a unregister request comes through we would have to send - * a close for each of the client instances that were opened. - * client_release function is called to handle this. - */ - iavf_client_release(client); - - if (vf_registered_client != client) { - pr_info("iavf: Client %s has not been registered\n", - client->name); - ret = -ENODEV; - goto out; - } - vf_registered_client = NULL; - pr_info("iavf: Unregistered client %s\n", client->name); -out: - return ret; -} -EXPORT_SYMBOL(iavf_unregister_client); diff --git a/drivers/net/ethernet/intel/iavf/i40evf_client.h b/drivers/net/ethernet/intel/iavf/i40evf_client.h deleted file mode 100644 index e216fc9dfd81a..0000000000000 --- a/drivers/net/ethernet/intel/iavf/i40evf_client.h +++ /dev/null @@ -1,169 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright(c) 2013 - 2018 Intel Corporation. */ - -#ifndef _IAVF_CLIENT_H_ -#define _IAVF_CLIENT_H_ - -#define IAVF_CLIENT_STR_LENGTH 10 - -/* Client interface version should be updated anytime there is a change in the - * existing APIs or data structures. - */ -#define IAVF_CLIENT_VERSION_MAJOR 0 -#define IAVF_CLIENT_VERSION_MINOR 01 -#define IAVF_CLIENT_VERSION_BUILD 00 -#define IAVF_CLIENT_VERSION_STR \ - __stringify(IAVF_CLIENT_VERSION_MAJOR) "." \ - __stringify(IAVF_CLIENT_VERSION_MINOR) "." \ - __stringify(IAVF_CLIENT_VERSION_BUILD) - -struct i40e_client_version { - u8 major; - u8 minor; - u8 build; - u8 rsvd; -}; - -enum i40e_client_state { - __I40E_CLIENT_NULL, - __I40E_CLIENT_REGISTERED -}; - -enum i40e_client_instance_state { - __I40E_CLIENT_INSTANCE_NONE, - __I40E_CLIENT_INSTANCE_OPENED, -}; - -struct i40e_ops; -struct i40e_client; - -/* HW does not define a type value for AEQ; only for RX/TX and CEQ. - * In order for us to keep the interface simple, SW will define a - * unique type value for AEQ. - */ -#define I40E_QUEUE_TYPE_PE_AEQ 0x80 -#define I40E_QUEUE_INVALID_IDX 0xFFFF - -struct i40e_qv_info { - u32 v_idx; /* msix_vector */ - u16 ceq_idx; - u16 aeq_idx; - u8 itr_idx; -}; - -struct i40e_qvlist_info { - u32 num_vectors; - struct i40e_qv_info qv_info[1]; -}; - -#define I40E_CLIENT_MSIX_ALL 0xFFFFFFFF - -/* set of LAN parameters useful for clients managed by LAN */ - -/* Struct to hold per priority info */ -struct i40e_prio_qos_params { - u16 qs_handle; /* qs handle for prio */ - u8 tc; /* TC mapped to prio */ - u8 reserved; -}; - -#define I40E_CLIENT_MAX_USER_PRIORITY 8 -/* Struct to hold Client QoS */ -struct i40e_qos_params { - struct i40e_prio_qos_params prio_qos[I40E_CLIENT_MAX_USER_PRIORITY]; -}; - -struct i40e_params { - struct i40e_qos_params qos; - u16 mtu; - u16 link_up; /* boolean */ -}; - -/* Structure to hold LAN device info for a client device */ -struct i40e_info { - struct i40e_client_version version; - u8 lanmac[6]; - struct net_device *netdev; - struct pci_dev *pcidev; - u8 __iomem *hw_addr; - u8 fid; /* function id, PF id or VF id */ -#define I40E_CLIENT_FTYPE_PF 0 -#define I40E_CLIENT_FTYPE_VF 1 - u8 ftype; /* function type, PF or VF */ - void *vf; /* cast to iavf_adapter */ - - /* All L2 params that could change during the life span of the device - * and needs to be communicated to the client when they change - */ - struct i40e_params params; - struct i40e_ops *ops; - - u16 msix_count; /* number of msix vectors*/ - /* Array down below will be dynamically allocated based on msix_count */ - struct msix_entry *msix_entries; - u16 itr_index; /* Which ITR index the PE driver is suppose to use */ -}; - -struct i40e_ops { - /* setup_q_vector_list enables queues with a particular vector */ - int (*setup_qvlist)(struct i40e_info *ldev, struct i40e_client *client, - struct i40e_qvlist_info *qv_info); - - u32 (*virtchnl_send)(struct i40e_info *ldev, struct i40e_client *client, - u8 *msg, u16 len); - - /* If the PE Engine is unresponsive, RDMA driver can request a reset.*/ - void (*request_reset)(struct i40e_info *ldev, - struct i40e_client *client); -}; - -struct i40e_client_ops { - /* Should be called from register_client() or whenever the driver is - * ready to create a specific client instance. - */ - int (*open)(struct i40e_info *ldev, struct i40e_client *client); - - /* Should be closed when netdev is unavailable or when unregister - * call comes in. If the close happens due to a reset, set the reset - * bit to true. - */ - void (*close)(struct i40e_info *ldev, struct i40e_client *client, - bool reset); - - /* called when some l2 managed parameters changes - mss */ - void (*l2_param_change)(struct i40e_info *ldev, - struct i40e_client *client, - struct i40e_params *params); - - /* called when a message is received from the PF */ - int (*virtchnl_receive)(struct i40e_info *ldev, - struct i40e_client *client, - u8 *msg, u16 len); -}; - -/* Client device */ -struct i40e_client_instance { - struct list_head list; - struct i40e_info lan_info; - struct i40e_client *client; - unsigned long state; -}; - -struct i40e_client { - struct list_head list; /* list of registered clients */ - char name[IAVF_CLIENT_STR_LENGTH]; - struct i40e_client_version version; - unsigned long state; /* client state */ - atomic_t ref_cnt; /* Count of all the client devices of this kind */ - u32 flags; -#define I40E_CLIENT_FLAGS_LAUNCH_ON_PROBE BIT(0) -#define I40E_TX_FLAGS_NOTIFY_OTHER_EVENTS BIT(2) - u8 type; -#define I40E_CLIENT_IWARP 0 - struct i40e_client_ops *ops; /* client ops provided by the client */ -}; - -/* used by clients */ -int iavf_register_client(struct i40e_client *client); -int iavf_unregister_client(struct i40e_client *client); -#endif /* _IAVF_CLIENT_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/i40evf_ethtool.c b/drivers/net/ethernet/intel/iavf/i40evf_ethtool.c deleted file mode 100644 index b8b2db7ee8b80..0000000000000 --- a/drivers/net/ethernet/intel/iavf/i40evf_ethtool.c +++ /dev/null @@ -1,1042 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright(c) 2013 - 2018 Intel Corporation. */ - -/* ethtool support for iavf */ -#include "i40evf.h" - -#include - -/* ethtool statistics helpers */ - -/** - * struct i40e_stats - definition for an ethtool statistic - * @stat_string: statistic name to display in ethtool -S output - * @sizeof_stat: the sizeof() the stat, must be no greater than sizeof(u64) - * @stat_offset: offsetof() the stat from a base pointer - * - * This structure defines a statistic to be added to the ethtool stats buffer. - * It defines a statistic as offset from a common base pointer. Stats should - * be defined in constant arrays using the I40E_STAT macro, with every element - * of the array using the same _type for calculating the sizeof_stat and - * stat_offset. - * - * The @sizeof_stat is expected to be sizeof(u8), sizeof(u16), sizeof(u32) or - * sizeof(u64). Other sizes are not expected and will produce a WARN_ONCE from - * the i40e_add_ethtool_stat() helper function. - * - * The @stat_string is interpreted as a format string, allowing formatted - * values to be inserted while looping over multiple structures for a given - * statistics array. Thus, every statistic string in an array should have the - * same type and number of format specifiers, to be formatted by variadic - * arguments to the i40e_add_stat_string() helper function. - **/ -struct i40e_stats { - char stat_string[ETH_GSTRING_LEN]; - int sizeof_stat; - int stat_offset; -}; - -/* Helper macro to define an i40e_stat structure with proper size and type. - * Use this when defining constant statistics arrays. Note that @_type expects - * only a type name and is used multiple times. - */ -#define I40E_STAT(_type, _name, _stat) { \ - .stat_string = _name, \ - .sizeof_stat = FIELD_SIZEOF(_type, _stat), \ - .stat_offset = offsetof(_type, _stat) \ -} - -/* Helper macro for defining some statistics directly copied from the netdev - * stats structure. - */ -#define I40E_NETDEV_STAT(_net_stat) \ - I40E_STAT(struct rtnl_link_stats64, #_net_stat, _net_stat) - -/* Helper macro for defining some statistics related to queues */ -#define I40E_QUEUE_STAT(_name, _stat) \ - I40E_STAT(struct i40e_ring, _name, _stat) - -/* Stats associated with a Tx or Rx ring */ -static const struct i40e_stats i40e_gstrings_queue_stats[] = { - I40E_QUEUE_STAT("%s-%u.packets", stats.packets), - I40E_QUEUE_STAT("%s-%u.bytes", stats.bytes), -}; - -/** - * iavf_add_one_ethtool_stat - copy the stat into the supplied buffer - * @data: location to store the stat value - * @pointer: basis for where to copy from - * @stat: the stat definition - * - * Copies the stat data defined by the pointer and stat structure pair into - * the memory supplied as data. Used to implement i40e_add_ethtool_stats and - * iavf_add_queue_stats. If the pointer is null, data will be zero'd. - */ -static void -iavf_add_one_ethtool_stat(u64 *data, void *pointer, - const struct i40e_stats *stat) -{ - char *p; - - if (!pointer) { - /* ensure that the ethtool data buffer is zero'd for any stats - * which don't have a valid pointer. - */ - *data = 0; - return; - } - - p = (char *)pointer + stat->stat_offset; - switch (stat->sizeof_stat) { - case sizeof(u64): - *data = *((u64 *)p); - break; - case sizeof(u32): - *data = *((u32 *)p); - break; - case sizeof(u16): - *data = *((u16 *)p); - break; - case sizeof(u8): - *data = *((u8 *)p); - break; - default: - WARN_ONCE(1, "unexpected stat size for %s", - stat->stat_string); - *data = 0; - } -} - -/** - * __iavf_add_ethtool_stats - copy stats into the ethtool supplied buffer - * @data: ethtool stats buffer - * @pointer: location to copy stats from - * @stats: array of stats to copy - * @size: the size of the stats definition - * - * Copy the stats defined by the stats array using the pointer as a base into - * the data buffer supplied by ethtool. Updates the data pointer to point to - * the next empty location for successive calls to __iavf_add_ethtool_stats. - * If pointer is null, set the data values to zero and update the pointer to - * skip these stats. - **/ -static void -__iavf_add_ethtool_stats(u64 **data, void *pointer, - const struct i40e_stats stats[], - const unsigned int size) -{ - unsigned int i; - - for (i = 0; i < size; i++) - iavf_add_one_ethtool_stat((*data)++, pointer, &stats[i]); -} - -/** - * i40e_add_ethtool_stats - copy stats into ethtool supplied buffer - * @data: ethtool stats buffer - * @pointer: location where stats are stored - * @stats: static const array of stat definitions - * - * Macro to ease the use of __iavf_add_ethtool_stats by taking a static - * constant stats array and passing the ARRAY_SIZE(). This avoids typos by - * ensuring that we pass the size associated with the given stats array. - * - * The parameter @stats is evaluated twice, so parameters with side effects - * should be avoided. - **/ -#define i40e_add_ethtool_stats(data, pointer, stats) \ - __iavf_add_ethtool_stats(data, pointer, stats, ARRAY_SIZE(stats)) - -/** - * iavf_add_queue_stats - copy queue statistics into supplied buffer - * @data: ethtool stats buffer - * @ring: the ring to copy - * - * Queue statistics must be copied while protected by - * u64_stats_fetch_begin_irq, so we can't directly use i40e_add_ethtool_stats. - * Assumes that queue stats are defined in i40e_gstrings_queue_stats. If the - * ring pointer is null, zero out the queue stat values and update the data - * pointer. Otherwise safely copy the stats from the ring into the supplied - * buffer and update the data pointer when finished. - * - * This function expects to be called while under rcu_read_lock(). - **/ -static void -iavf_add_queue_stats(u64 **data, struct i40e_ring *ring) -{ - const unsigned int size = ARRAY_SIZE(i40e_gstrings_queue_stats); - const struct i40e_stats *stats = i40e_gstrings_queue_stats; - unsigned int start; - unsigned int i; - - /* To avoid invalid statistics values, ensure that we keep retrying - * the copy until we get a consistent value according to - * u64_stats_fetch_retry_irq. But first, make sure our ring is - * non-null before attempting to access its syncp. - */ - do { - start = !ring ? 0 : u64_stats_fetch_begin_irq(&ring->syncp); - for (i = 0; i < size; i++) - iavf_add_one_ethtool_stat(&(*data)[i], ring, &stats[i]); - } while (ring && u64_stats_fetch_retry_irq(&ring->syncp, start)); - - /* Once we successfully copy the stats in, update the data pointer */ - *data += size; -} - -/** - * __i40e_add_stat_strings - copy stat strings into ethtool buffer - * @p: ethtool supplied buffer - * @stats: stat definitions array - * @size: size of the stats array - * - * Format and copy the strings described by stats into the buffer pointed at - * by p. - **/ -static void __i40e_add_stat_strings(u8 **p, const struct i40e_stats stats[], - const unsigned int size, ...) -{ - unsigned int i; - - for (i = 0; i < size; i++) { - va_list args; - - va_start(args, size); - vsnprintf(*p, ETH_GSTRING_LEN, stats[i].stat_string, args); - *p += ETH_GSTRING_LEN; - va_end(args); - } -} - -/** - * i40e_add_stat_strings - copy stat strings into ethtool buffer - * @p: ethtool supplied buffer - * @stats: stat definitions array - * - * Format and copy the strings described by the const static stats value into - * the buffer pointed at by p. - * - * The parameter @stats is evaluated twice, so parameters with side effects - * should be avoided. Additionally, stats must be an array such that - * ARRAY_SIZE can be called on it. - **/ -#define i40e_add_stat_strings(p, stats, ...) \ - __i40e_add_stat_strings(p, stats, ARRAY_SIZE(stats), ## __VA_ARGS__) - -#define IAVF_STAT(_name, _stat) \ - I40E_STAT(struct iavf_adapter, _name, _stat) - -static const struct i40e_stats iavf_gstrings_stats[] = { - IAVF_STAT("rx_bytes", current_stats.rx_bytes), - IAVF_STAT("rx_unicast", current_stats.rx_unicast), - IAVF_STAT("rx_multicast", current_stats.rx_multicast), - IAVF_STAT("rx_broadcast", current_stats.rx_broadcast), - IAVF_STAT("rx_discards", current_stats.rx_discards), - IAVF_STAT("rx_unknown_protocol", current_stats.rx_unknown_protocol), - IAVF_STAT("tx_bytes", current_stats.tx_bytes), - IAVF_STAT("tx_unicast", current_stats.tx_unicast), - IAVF_STAT("tx_multicast", current_stats.tx_multicast), - IAVF_STAT("tx_broadcast", current_stats.tx_broadcast), - IAVF_STAT("tx_discards", current_stats.tx_discards), - IAVF_STAT("tx_errors", current_stats.tx_errors), -}; - -#define IAVF_STATS_LEN ARRAY_SIZE(iavf_gstrings_stats) - -#define IAVF_QUEUE_STATS_LEN ARRAY_SIZE(i40e_gstrings_queue_stats) - -/* For now we have one and only one private flag and it is only defined - * when we have support for the SKIP_CPU_SYNC DMA attribute. Instead - * of leaving all this code sitting around empty we will strip it unless - * our one private flag is actually available. - */ -struct iavf_priv_flags { - char flag_string[ETH_GSTRING_LEN]; - u32 flag; - bool read_only; -}; - -#define IAVF_PRIV_FLAG(_name, _flag, _read_only) { \ - .flag_string = _name, \ - .flag = _flag, \ - .read_only = _read_only, \ -} - -static const struct iavf_priv_flags iavf_gstrings_priv_flags[] = { - IAVF_PRIV_FLAG("legacy-rx", IAVF_FLAG_LEGACY_RX, 0), -}; - -#define IAVF_PRIV_FLAGS_STR_LEN ARRAY_SIZE(iavf_gstrings_priv_flags) - -/** - * iavf_get_link_ksettings - Get Link Speed and Duplex settings - * @netdev: network interface device structure - * @cmd: ethtool command - * - * Reports speed/duplex settings. Because this is a VF, we don't know what - * kind of link we really have, so we fake it. - **/ -static int iavf_get_link_ksettings(struct net_device *netdev, - struct ethtool_link_ksettings *cmd) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - - ethtool_link_ksettings_zero_link_mode(cmd, supported); - cmd->base.autoneg = AUTONEG_DISABLE; - cmd->base.port = PORT_NONE; - /* Set speed and duplex */ - switch (adapter->link_speed) { - case I40E_LINK_SPEED_40GB: - cmd->base.speed = SPEED_40000; - break; - case I40E_LINK_SPEED_25GB: -#ifdef SPEED_25000 - cmd->base.speed = SPEED_25000; -#else - netdev_info(netdev, - "Speed is 25G, display not supported by this version of ethtool.\n"); -#endif - break; - case I40E_LINK_SPEED_20GB: - cmd->base.speed = SPEED_20000; - break; - case I40E_LINK_SPEED_10GB: - cmd->base.speed = SPEED_10000; - break; - case I40E_LINK_SPEED_1GB: - cmd->base.speed = SPEED_1000; - break; - case I40E_LINK_SPEED_100MB: - cmd->base.speed = SPEED_100; - break; - default: - break; - } - cmd->base.duplex = DUPLEX_FULL; - - return 0; -} - -/** - * iavf_get_sset_count - Get length of string set - * @netdev: network interface device structure - * @sset: id of string set - * - * Reports size of various string tables. - **/ -static int iavf_get_sset_count(struct net_device *netdev, int sset) -{ - if (sset == ETH_SS_STATS) - return IAVF_STATS_LEN + - (IAVF_QUEUE_STATS_LEN * 2 * IAVF_MAX_REQ_QUEUES); - else if (sset == ETH_SS_PRIV_FLAGS) - return IAVF_PRIV_FLAGS_STR_LEN; - else - return -EINVAL; -} - -/** - * iavf_get_ethtool_stats - report device statistics - * @netdev: network interface device structure - * @stats: ethtool statistics structure - * @data: pointer to data buffer - * - * All statistics are added to the data buffer as an array of u64. - **/ -static void iavf_get_ethtool_stats(struct net_device *netdev, - struct ethtool_stats *stats, u64 *data) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - unsigned int i; - - i40e_add_ethtool_stats(&data, adapter, iavf_gstrings_stats); - - rcu_read_lock(); - for (i = 0; i < IAVF_MAX_REQ_QUEUES; i++) { - struct i40e_ring *ring; - - /* Avoid accessing un-allocated queues */ - ring = (i < adapter->num_active_queues ? - &adapter->tx_rings[i] : NULL); - iavf_add_queue_stats(&data, ring); - - /* Avoid accessing un-allocated queues */ - ring = (i < adapter->num_active_queues ? - &adapter->rx_rings[i] : NULL); - iavf_add_queue_stats(&data, ring); - } - rcu_read_unlock(); -} - -/** - * iavf_get_priv_flag_strings - Get private flag strings - * @netdev: network interface device structure - * @data: buffer for string data - * - * Builds the private flags string table - **/ -static void iavf_get_priv_flag_strings(struct net_device *netdev, u8 *data) -{ - unsigned int i; - - for (i = 0; i < IAVF_PRIV_FLAGS_STR_LEN; i++) { - snprintf(data, ETH_GSTRING_LEN, "%s", - iavf_gstrings_priv_flags[i].flag_string); - data += ETH_GSTRING_LEN; - } -} - -/** - * iavf_get_stat_strings - Get stat strings - * @netdev: network interface device structure - * @data: buffer for string data - * - * Builds the statistics string table - **/ -static void iavf_get_stat_strings(struct net_device *netdev, u8 *data) -{ - unsigned int i; - - i40e_add_stat_strings(&data, iavf_gstrings_stats); - - /* Queues are always allocated in pairs, so we just use num_tx_queues - * for both Tx and Rx queues. - */ - for (i = 0; i < netdev->num_tx_queues; i++) { - i40e_add_stat_strings(&data, i40e_gstrings_queue_stats, - "tx", i); - i40e_add_stat_strings(&data, i40e_gstrings_queue_stats, - "rx", i); - } -} - -/** - * iavf_get_strings - Get string set - * @netdev: network interface device structure - * @sset: id of string set - * @data: buffer for string data - * - * Builds string tables for various string sets - **/ -static void iavf_get_strings(struct net_device *netdev, u32 sset, u8 *data) -{ - switch (sset) { - case ETH_SS_STATS: - iavf_get_stat_strings(netdev, data); - break; - case ETH_SS_PRIV_FLAGS: - iavf_get_priv_flag_strings(netdev, data); - break; - default: - break; - } -} - -/** - * iavf_get_priv_flags - report device private flags - * @netdev: network interface device structure - * - * The get string set count and the string set should be matched for each - * flag returned. Add new strings for each flag to the i40e_gstrings_priv_flags - * array. - * - * Returns a u32 bitmap of flags. - **/ -static u32 iavf_get_priv_flags(struct net_device *netdev) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - u32 i, ret_flags = 0; - - for (i = 0; i < IAVF_PRIV_FLAGS_STR_LEN; i++) { - const struct iavf_priv_flags *priv_flags; - - priv_flags = &iavf_gstrings_priv_flags[i]; - - if (priv_flags->flag & adapter->flags) - ret_flags |= BIT(i); - } - - return ret_flags; -} - -/** - * iavf_set_priv_flags - set private flags - * @netdev: network interface device structure - * @flags: bit flags to be set - **/ -static int iavf_set_priv_flags(struct net_device *netdev, u32 flags) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - u32 orig_flags, new_flags, changed_flags; - u32 i; - - orig_flags = READ_ONCE(adapter->flags); - new_flags = orig_flags; - - for (i = 0; i < IAVF_PRIV_FLAGS_STR_LEN; i++) { - const struct iavf_priv_flags *priv_flags; - - priv_flags = &iavf_gstrings_priv_flags[i]; - - if (flags & BIT(i)) - new_flags |= priv_flags->flag; - else - new_flags &= ~(priv_flags->flag); - - if (priv_flags->read_only && - ((orig_flags ^ new_flags) & ~BIT(i))) - return -EOPNOTSUPP; - } - - /* Before we finalize any flag changes, any checks which we need to - * perform to determine if the new flags will be supported should go - * here... - */ - - /* Compare and exchange the new flags into place. If we failed, that - * is if cmpxchg returns anything but the old value, this means - * something else must have modified the flags variable since we - * copied it. We'll just punt with an error and log something in the - * message buffer. - */ - if (cmpxchg(&adapter->flags, orig_flags, new_flags) != orig_flags) { - dev_warn(&adapter->pdev->dev, - "Unable to update adapter->flags as it was modified by another thread...\n"); - return -EAGAIN; - } - - changed_flags = orig_flags ^ new_flags; - - /* Process any additional changes needed as a result of flag changes. - * The changed_flags value reflects the list of bits that were changed - * in the code above. - */ - - /* issue a reset to force legacy-rx change to take effect */ - if (changed_flags & IAVF_FLAG_LEGACY_RX) { - if (netif_running(netdev)) { - adapter->flags |= IAVF_FLAG_RESET_NEEDED; - schedule_work(&adapter->reset_task); - } - } - - return 0; -} - -/** - * iavf_get_msglevel - Get debug message level - * @netdev: network interface device structure - * - * Returns current debug message level. - **/ -static u32 iavf_get_msglevel(struct net_device *netdev) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - - return adapter->msg_enable; -} - -/** - * iavf_set_msglevel - Set debug message level - * @netdev: network interface device structure - * @data: message level - * - * Set current debug message level. Higher values cause the driver to - * be noisier. - **/ -static void iavf_set_msglevel(struct net_device *netdev, u32 data) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - - if (I40E_DEBUG_USER & data) - adapter->hw.debug_mask = data; - adapter->msg_enable = data; -} - -/** - * iavf_get_drvinfo - Get driver info - * @netdev: network interface device structure - * @drvinfo: ethool driver info structure - * - * Returns information about the driver and device for display to the user. - **/ -static void iavf_get_drvinfo(struct net_device *netdev, - struct ethtool_drvinfo *drvinfo) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - - strlcpy(drvinfo->driver, iavf_driver_name, 32); - strlcpy(drvinfo->version, iavf_driver_version, 32); - strlcpy(drvinfo->fw_version, "N/A", 4); - strlcpy(drvinfo->bus_info, pci_name(adapter->pdev), 32); - drvinfo->n_priv_flags = IAVF_PRIV_FLAGS_STR_LEN; -} - -/** - * iavf_get_ringparam - Get ring parameters - * @netdev: network interface device structure - * @ring: ethtool ringparam structure - * - * Returns current ring parameters. TX and RX rings are reported separately, - * but the number of rings is not reported. - **/ -static void iavf_get_ringparam(struct net_device *netdev, - struct ethtool_ringparam *ring) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - - ring->rx_max_pending = IAVF_MAX_RXD; - ring->tx_max_pending = IAVF_MAX_TXD; - ring->rx_pending = adapter->rx_desc_count; - ring->tx_pending = adapter->tx_desc_count; -} - -/** - * iavf_set_ringparam - Set ring parameters - * @netdev: network interface device structure - * @ring: ethtool ringparam structure - * - * Sets ring parameters. TX and RX rings are controlled separately, but the - * number of rings is not specified, so all rings get the same settings. - **/ -static int iavf_set_ringparam(struct net_device *netdev, - struct ethtool_ringparam *ring) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - u32 new_rx_count, new_tx_count; - - if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending)) - return -EINVAL; - - new_tx_count = clamp_t(u32, ring->tx_pending, - IAVF_MIN_TXD, - IAVF_MAX_TXD); - new_tx_count = ALIGN(new_tx_count, IAVF_REQ_DESCRIPTOR_MULTIPLE); - - new_rx_count = clamp_t(u32, ring->rx_pending, - IAVF_MIN_RXD, - IAVF_MAX_RXD); - new_rx_count = ALIGN(new_rx_count, IAVF_REQ_DESCRIPTOR_MULTIPLE); - - /* if nothing to do return success */ - if ((new_tx_count == adapter->tx_desc_count) && - (new_rx_count == adapter->rx_desc_count)) - return 0; - - adapter->tx_desc_count = new_tx_count; - adapter->rx_desc_count = new_rx_count; - - if (netif_running(netdev)) { - adapter->flags |= IAVF_FLAG_RESET_NEEDED; - schedule_work(&adapter->reset_task); - } - - return 0; -} - -/** - * __iavf_get_coalesce - get per-queue coalesce settings - * @netdev: the netdev to check - * @ec: ethtool coalesce data structure - * @queue: which queue to pick - * - * Gets the per-queue settings for coalescence. Specifically Rx and Tx usecs - * are per queue. If queue is <0 then we default to queue 0 as the - * representative value. - **/ -static int __iavf_get_coalesce(struct net_device *netdev, - struct ethtool_coalesce *ec, int queue) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - struct i40e_vsi *vsi = &adapter->vsi; - struct i40e_ring *rx_ring, *tx_ring; - - ec->tx_max_coalesced_frames = vsi->work_limit; - ec->rx_max_coalesced_frames = vsi->work_limit; - - /* Rx and Tx usecs per queue value. If user doesn't specify the - * queue, return queue 0's value to represent. - */ - if (queue < 0) - queue = 0; - else if (queue >= adapter->num_active_queues) - return -EINVAL; - - rx_ring = &adapter->rx_rings[queue]; - tx_ring = &adapter->tx_rings[queue]; - - if (ITR_IS_DYNAMIC(rx_ring->itr_setting)) - ec->use_adaptive_rx_coalesce = 1; - - if (ITR_IS_DYNAMIC(tx_ring->itr_setting)) - ec->use_adaptive_tx_coalesce = 1; - - ec->rx_coalesce_usecs = rx_ring->itr_setting & ~I40E_ITR_DYNAMIC; - ec->tx_coalesce_usecs = tx_ring->itr_setting & ~I40E_ITR_DYNAMIC; - - return 0; -} - -/** - * iavf_get_coalesce - Get interrupt coalescing settings - * @netdev: network interface device structure - * @ec: ethtool coalesce structure - * - * Returns current coalescing settings. This is referred to elsewhere in the - * driver as Interrupt Throttle Rate, as this is how the hardware describes - * this functionality. Note that if per-queue settings have been modified this - * only represents the settings of queue 0. - **/ -static int iavf_get_coalesce(struct net_device *netdev, - struct ethtool_coalesce *ec) -{ - return __iavf_get_coalesce(netdev, ec, -1); -} - -/** - * iavf_get_per_queue_coalesce - get coalesce values for specific queue - * @netdev: netdev to read - * @ec: coalesce settings from ethtool - * @queue: the queue to read - * - * Read specific queue's coalesce settings. - **/ -static int iavf_get_per_queue_coalesce(struct net_device *netdev, u32 queue, - struct ethtool_coalesce *ec) -{ - return __iavf_get_coalesce(netdev, ec, queue); -} - -/** - * iavf_set_itr_per_queue - set ITR values for specific queue - * @adapter: the VF adapter struct to set values for - * @ec: coalesce settings from ethtool - * @queue: the queue to modify - * - * Change the ITR settings for a specific queue. - **/ -static void iavf_set_itr_per_queue(struct iavf_adapter *adapter, - struct ethtool_coalesce *ec, int queue) -{ - struct i40e_ring *rx_ring = &adapter->rx_rings[queue]; - struct i40e_ring *tx_ring = &adapter->tx_rings[queue]; - struct i40e_q_vector *q_vector; - - rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs); - tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs); - - rx_ring->itr_setting |= I40E_ITR_DYNAMIC; - if (!ec->use_adaptive_rx_coalesce) - rx_ring->itr_setting ^= I40E_ITR_DYNAMIC; - - tx_ring->itr_setting |= I40E_ITR_DYNAMIC; - if (!ec->use_adaptive_tx_coalesce) - tx_ring->itr_setting ^= I40E_ITR_DYNAMIC; - - q_vector = rx_ring->q_vector; - q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting); - - q_vector = tx_ring->q_vector; - q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting); - - /* The interrupt handler itself will take care of programming - * the Tx and Rx ITR values based on the values we have entered - * into the q_vector, no need to write the values now. - */ -} - -/** - * __iavf_set_coalesce - set coalesce settings for particular queue - * @netdev: the netdev to change - * @ec: ethtool coalesce settings - * @queue: the queue to change - * - * Sets the coalesce settings for a particular queue. - **/ -static int __iavf_set_coalesce(struct net_device *netdev, - struct ethtool_coalesce *ec, int queue) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - struct i40e_vsi *vsi = &adapter->vsi; - int i; - - if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq) - vsi->work_limit = ec->tx_max_coalesced_frames_irq; - - if (ec->rx_coalesce_usecs == 0) { - if (ec->use_adaptive_rx_coalesce) - netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n"); - } else if ((ec->rx_coalesce_usecs < I40E_MIN_ITR) || - (ec->rx_coalesce_usecs > I40E_MAX_ITR)) { - netif_info(adapter, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n"); - return -EINVAL; - } else if (ec->tx_coalesce_usecs == 0) { - if (ec->use_adaptive_tx_coalesce) - netif_info(adapter, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n"); - } else if ((ec->tx_coalesce_usecs < I40E_MIN_ITR) || - (ec->tx_coalesce_usecs > I40E_MAX_ITR)) { - netif_info(adapter, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n"); - return -EINVAL; - } - - /* Rx and Tx usecs has per queue value. If user doesn't specify the - * queue, apply to all queues. - */ - if (queue < 0) { - for (i = 0; i < adapter->num_active_queues; i++) - iavf_set_itr_per_queue(adapter, ec, i); - } else if (queue < adapter->num_active_queues) { - iavf_set_itr_per_queue(adapter, ec, queue); - } else { - netif_info(adapter, drv, netdev, "Invalid queue value, queue range is 0 - %d\n", - adapter->num_active_queues - 1); - return -EINVAL; - } - - return 0; -} - -/** - * iavf_set_coalesce - Set interrupt coalescing settings - * @netdev: network interface device structure - * @ec: ethtool coalesce structure - * - * Change current coalescing settings for every queue. - **/ -static int iavf_set_coalesce(struct net_device *netdev, - struct ethtool_coalesce *ec) -{ - return __iavf_set_coalesce(netdev, ec, -1); -} - -/** - * iavf_set_per_queue_coalesce - set specific queue's coalesce settings - * @netdev: the netdev to change - * @ec: ethtool's coalesce settings - * @queue: the queue to modify - * - * Modifies a specific queue's coalesce settings. - */ -static int iavf_set_per_queue_coalesce(struct net_device *netdev, u32 queue, - struct ethtool_coalesce *ec) -{ - return __iavf_set_coalesce(netdev, ec, queue); -} - -/** - * iavf_get_rxnfc - command to get RX flow classification rules - * @netdev: network interface device structure - * @cmd: ethtool rxnfc command - * @rule_locs: pointer to store rule locations - * - * Returns Success if the command is supported. - **/ -static int iavf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, - u32 *rule_locs) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - int ret = -EOPNOTSUPP; - - switch (cmd->cmd) { - case ETHTOOL_GRXRINGS: - cmd->data = adapter->num_active_queues; - ret = 0; - break; - case ETHTOOL_GRXFH: - netdev_info(netdev, - "RSS hash info is not available to vf, use pf.\n"); - break; - default: - break; - } - - return ret; -} -/** - * iavf_get_channels: get the number of channels supported by the device - * @netdev: network interface device structure - * @ch: channel information structure - * - * For the purposes of our device, we only use combined channels, i.e. a tx/rx - * queue pair. Report one extra channel to match our "other" MSI-X vector. - **/ -static void iavf_get_channels(struct net_device *netdev, - struct ethtool_channels *ch) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - - /* Report maximum channels */ - ch->max_combined = IAVF_MAX_REQ_QUEUES; - - ch->max_other = NONQ_VECS; - ch->other_count = NONQ_VECS; - - ch->combined_count = adapter->num_active_queues; -} - -/** - * iavf_set_channels: set the new channel count - * @netdev: network interface device structure - * @ch: channel information structure - * - * Negotiate a new number of channels with the PF then do a reset. During - * reset we'll realloc queues and fix the RSS table. Returns 0 on success, - * negative on failure. - **/ -static int iavf_set_channels(struct net_device *netdev, - struct ethtool_channels *ch) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - int num_req = ch->combined_count; - - if (num_req != adapter->num_active_queues && - !(adapter->vf_res->vf_cap_flags & - VIRTCHNL_VF_OFFLOAD_REQ_QUEUES)) { - dev_info(&adapter->pdev->dev, "PF is not capable of queue negotiation.\n"); - return -EINVAL; - } - - if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) && - adapter->num_tc) { - dev_info(&adapter->pdev->dev, "Cannot set channels since ADq is enabled.\n"); - return -EINVAL; - } - - /* All of these should have already been checked by ethtool before this - * even gets to us, but just to be sure. - */ - if (num_req <= 0 || num_req > IAVF_MAX_REQ_QUEUES) - return -EINVAL; - - if (ch->rx_count || ch->tx_count || ch->other_count != NONQ_VECS) - return -EINVAL; - - adapter->num_req_queues = num_req; - return iavf_request_queues(adapter, num_req); -} - -/** - * iavf_get_rxfh_key_size - get the RSS hash key size - * @netdev: network interface device structure - * - * Returns the table size. - **/ -static u32 iavf_get_rxfh_key_size(struct net_device *netdev) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - - return adapter->rss_key_size; -} - -/** - * iavf_get_rxfh_indir_size - get the rx flow hash indirection table size - * @netdev: network interface device structure - * - * Returns the table size. - **/ -static u32 iavf_get_rxfh_indir_size(struct net_device *netdev) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - - return adapter->rss_lut_size; -} - -/** - * iavf_get_rxfh - get the rx flow hash indirection table - * @netdev: network interface device structure - * @indir: indirection table - * @key: hash key - * @hfunc: hash function in use - * - * Reads the indirection table directly from the hardware. Always returns 0. - **/ -static int iavf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, - u8 *hfunc) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - u16 i; - - if (hfunc) - *hfunc = ETH_RSS_HASH_TOP; - if (!indir) - return 0; - - memcpy(key, adapter->rss_key, adapter->rss_key_size); - - /* Each 32 bits pointed by 'indir' is stored with a lut entry */ - for (i = 0; i < adapter->rss_lut_size; i++) - indir[i] = (u32)adapter->rss_lut[i]; - - return 0; -} - -/** - * iavf_set_rxfh - set the rx flow hash indirection table - * @netdev: network interface device structure - * @indir: indirection table - * @key: hash key - * @hfunc: hash function to use - * - * Returns -EINVAL if the table specifies an inavlid queue id, otherwise - * returns 0 after programming the table. - **/ -static int iavf_set_rxfh(struct net_device *netdev, const u32 *indir, - const u8 *key, const u8 hfunc) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - u16 i; - - /* We do not allow change in unsupported parameters */ - if (key || - (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)) - return -EOPNOTSUPP; - if (!indir) - return 0; - - if (key) - memcpy(adapter->rss_key, key, adapter->rss_key_size); - - /* Each 32 bits pointed by 'indir' is stored with a lut entry */ - for (i = 0; i < adapter->rss_lut_size; i++) - adapter->rss_lut[i] = (u8)(indir[i]); - - return iavf_config_rss(adapter); -} - -static const struct ethtool_ops iavf_ethtool_ops = { - .get_drvinfo = iavf_get_drvinfo, - .get_link = ethtool_op_get_link, - .get_ringparam = iavf_get_ringparam, - .set_ringparam = iavf_set_ringparam, - .get_strings = iavf_get_strings, - .get_ethtool_stats = iavf_get_ethtool_stats, - .get_sset_count = iavf_get_sset_count, - .get_priv_flags = iavf_get_priv_flags, - .set_priv_flags = iavf_set_priv_flags, - .get_msglevel = iavf_get_msglevel, - .set_msglevel = iavf_set_msglevel, - .get_coalesce = iavf_get_coalesce, - .set_coalesce = iavf_set_coalesce, - .get_per_queue_coalesce = iavf_get_per_queue_coalesce, - .set_per_queue_coalesce = iavf_set_per_queue_coalesce, - .get_rxnfc = iavf_get_rxnfc, - .get_rxfh_indir_size = iavf_get_rxfh_indir_size, - .get_rxfh = iavf_get_rxfh, - .set_rxfh = iavf_set_rxfh, - .get_channels = iavf_get_channels, - .set_channels = iavf_set_channels, - .get_rxfh_key_size = iavf_get_rxfh_key_size, - .get_link_ksettings = iavf_get_link_ksettings, -}; - -/** - * iavf_set_ethtool_ops - Initialize ethtool ops struct - * @netdev: network interface device structure - * - * Sets ethtool ops struct in our netdev so that ethtool can call - * our functions. - **/ -void iavf_set_ethtool_ops(struct net_device *netdev) -{ - netdev->ethtool_ops = &iavf_ethtool_ops; -} diff --git a/drivers/net/ethernet/intel/iavf/i40evf_main.c b/drivers/net/ethernet/intel/iavf/i40evf_main.c deleted file mode 100644 index 46bdc59b9bed3..0000000000000 --- a/drivers/net/ethernet/intel/iavf/i40evf_main.c +++ /dev/null @@ -1,4015 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright(c) 2013 - 2018 Intel Corporation. */ - -#include "i40evf.h" -#include "i40e_prototype.h" -#include "i40evf_client.h" -/* All iavf tracepoints are defined by the include below, which must - * be included exactly once across the whole kernel with - * CREATE_TRACE_POINTS defined - */ -#define CREATE_TRACE_POINTS -#include "i40e_trace.h" - -static int iavf_setup_all_tx_resources(struct iavf_adapter *adapter); -static int iavf_setup_all_rx_resources(struct iavf_adapter *adapter); -static int iavf_close(struct net_device *netdev); - -char iavf_driver_name[] = "iavf"; -static const char iavf_driver_string[] = - "Intel(R) Ethernet Adaptive Virtual Function Network Driver"; - -#define DRV_KERN "-k" - -#define DRV_VERSION_MAJOR 3 -#define DRV_VERSION_MINOR 2 -#define DRV_VERSION_BUILD 3 -#define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ - __stringify(DRV_VERSION_MINOR) "." \ - __stringify(DRV_VERSION_BUILD) \ - DRV_KERN -const char iavf_driver_version[] = DRV_VERSION; -static const char iavf_copyright[] = - "Copyright (c) 2013 - 2018 Intel Corporation."; - -/* iavf_pci_tbl - PCI Device ID Table - * - * Wildcard entries (PCI_ANY_ID) should come last - * Last entry must be all 0s - * - * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, - * Class, Class Mask, private data (not used) } - */ -static const struct pci_device_id iavf_pci_tbl[] = { - {PCI_VDEVICE(INTEL, I40E_DEV_ID_VF), 0}, - {PCI_VDEVICE(INTEL, I40E_DEV_ID_VF_HV), 0}, - {PCI_VDEVICE(INTEL, I40E_DEV_ID_X722_VF), 0}, - {PCI_VDEVICE(INTEL, I40E_DEV_ID_ADAPTIVE_VF), 0}, - /* required last entry */ - {0, } -}; - -MODULE_DEVICE_TABLE(pci, iavf_pci_tbl); - -MODULE_ALIAS("i40evf"); -MODULE_AUTHOR("Intel Corporation, "); -MODULE_DESCRIPTION("Intel(R) XL710 X710 Virtual Function Network Driver"); -MODULE_LICENSE("GPL"); -MODULE_VERSION(DRV_VERSION); - -static struct workqueue_struct *iavf_wq; - -/** - * iavf_allocate_dma_mem_d - OS specific memory alloc for shared code - * @hw: pointer to the HW structure - * @mem: ptr to mem struct to fill out - * @size: size of memory requested - * @alignment: what to align the allocation to - **/ -iavf_status iavf_allocate_dma_mem_d(struct i40e_hw *hw, - struct i40e_dma_mem *mem, - u64 size, u32 alignment) -{ - struct iavf_adapter *adapter = (struct iavf_adapter *)hw->back; - - if (!mem) - return I40E_ERR_PARAM; - - mem->size = ALIGN(size, alignment); - mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size, - (dma_addr_t *)&mem->pa, GFP_KERNEL); - if (mem->va) - return 0; - else - return I40E_ERR_NO_MEMORY; -} - -/** - * iavf_free_dma_mem_d - OS specific memory free for shared code - * @hw: pointer to the HW structure - * @mem: ptr to mem struct to free - **/ -iavf_status iavf_free_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem) -{ - struct iavf_adapter *adapter = (struct iavf_adapter *)hw->back; - - if (!mem || !mem->va) - return I40E_ERR_PARAM; - dma_free_coherent(&adapter->pdev->dev, mem->size, - mem->va, (dma_addr_t)mem->pa); - return 0; -} - -/** - * iavf_allocate_virt_mem_d - OS specific memory alloc for shared code - * @hw: pointer to the HW structure - * @mem: ptr to mem struct to fill out - * @size: size of memory requested - **/ -iavf_status iavf_allocate_virt_mem_d(struct i40e_hw *hw, - struct i40e_virt_mem *mem, u32 size) -{ - if (!mem) - return I40E_ERR_PARAM; - - mem->size = size; - mem->va = kzalloc(size, GFP_KERNEL); - - if (mem->va) - return 0; - else - return I40E_ERR_NO_MEMORY; -} - -/** - * iavf_free_virt_mem_d - OS specific memory free for shared code - * @hw: pointer to the HW structure - * @mem: ptr to mem struct to free - **/ -iavf_status iavf_free_virt_mem_d(struct i40e_hw *hw, - struct i40e_virt_mem *mem) -{ - if (!mem) - return I40E_ERR_PARAM; - - /* it's ok to kfree a NULL pointer */ - kfree(mem->va); - - return 0; -} - -/** - * iavf_debug_d - OS dependent version of debug printing - * @hw: pointer to the HW structure - * @mask: debug level mask - * @fmt_str: printf-type format description - **/ -void iavf_debug_d(void *hw, u32 mask, char *fmt_str, ...) -{ - char buf[512]; - va_list argptr; - - if (!(mask & ((struct i40e_hw *)hw)->debug_mask)) - return; - - va_start(argptr, fmt_str); - vsnprintf(buf, sizeof(buf), fmt_str, argptr); - va_end(argptr); - - /* the debug string is already formatted with a newline */ - pr_info("%s", buf); -} - -/** - * iavf_schedule_reset - Set the flags and schedule a reset event - * @adapter: board private structure - **/ -void iavf_schedule_reset(struct iavf_adapter *adapter) -{ - if (!(adapter->flags & - (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED))) { - adapter->flags |= IAVF_FLAG_RESET_NEEDED; - schedule_work(&adapter->reset_task); - } -} - -/** - * iavf_tx_timeout - Respond to a Tx Hang - * @netdev: network interface device structure - **/ -static void iavf_tx_timeout(struct net_device *netdev) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - - adapter->tx_timeout_count++; - iavf_schedule_reset(adapter); -} - -/** - * iavf_misc_irq_disable - Mask off interrupt generation on the NIC - * @adapter: board private structure - **/ -static void iavf_misc_irq_disable(struct iavf_adapter *adapter) -{ - struct i40e_hw *hw = &adapter->hw; - - if (!adapter->msix_entries) - return; - - wr32(hw, I40E_VFINT_DYN_CTL01, 0); - - /* read flush */ - rd32(hw, I40E_VFGEN_RSTAT); - - synchronize_irq(adapter->msix_entries[0].vector); -} - -/** - * iavf_misc_irq_enable - Enable default interrupt generation settings - * @adapter: board private structure - **/ -static void iavf_misc_irq_enable(struct iavf_adapter *adapter) -{ - struct i40e_hw *hw = &adapter->hw; - - wr32(hw, I40E_VFINT_DYN_CTL01, I40E_VFINT_DYN_CTL01_INTENA_MASK | - I40E_VFINT_DYN_CTL01_ITR_INDX_MASK); - wr32(hw, I40E_VFINT_ICR0_ENA1, I40E_VFINT_ICR0_ENA1_ADMINQ_MASK); - - /* read flush */ - rd32(hw, I40E_VFGEN_RSTAT); -} - -/** - * iavf_irq_disable - Mask off interrupt generation on the NIC - * @adapter: board private structure - **/ -static void iavf_irq_disable(struct iavf_adapter *adapter) -{ - int i; - struct i40e_hw *hw = &adapter->hw; - - if (!adapter->msix_entries) - return; - - for (i = 1; i < adapter->num_msix_vectors; i++) { - wr32(hw, I40E_VFINT_DYN_CTLN1(i - 1), 0); - synchronize_irq(adapter->msix_entries[i].vector); - } - /* read flush */ - rd32(hw, I40E_VFGEN_RSTAT); -} - -/** - * iavf_irq_enable_queues - Enable interrupt for specified queues - * @adapter: board private structure - * @mask: bitmap of queues to enable - **/ -void iavf_irq_enable_queues(struct iavf_adapter *adapter, u32 mask) -{ - struct i40e_hw *hw = &adapter->hw; - int i; - - for (i = 1; i < adapter->num_msix_vectors; i++) { - if (mask & BIT(i - 1)) { - wr32(hw, I40E_VFINT_DYN_CTLN1(i - 1), - I40E_VFINT_DYN_CTLN1_INTENA_MASK | - I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK); - } - } -} - -/** - * iavf_irq_enable - Enable default interrupt generation settings - * @adapter: board private structure - * @flush: boolean value whether to run rd32() - **/ -void iavf_irq_enable(struct iavf_adapter *adapter, bool flush) -{ - struct i40e_hw *hw = &adapter->hw; - - iavf_misc_irq_enable(adapter); - iavf_irq_enable_queues(adapter, ~0); - - if (flush) - rd32(hw, I40E_VFGEN_RSTAT); -} - -/** - * iavf_msix_aq - Interrupt handler for vector 0 - * @irq: interrupt number - * @data: pointer to netdev - **/ -static irqreturn_t iavf_msix_aq(int irq, void *data) -{ - struct net_device *netdev = data; - struct iavf_adapter *adapter = netdev_priv(netdev); - struct i40e_hw *hw = &adapter->hw; - - /* handle non-queue interrupts, these reads clear the registers */ - rd32(hw, I40E_VFINT_ICR01); - rd32(hw, I40E_VFINT_ICR0_ENA1); - - /* schedule work on the private workqueue */ - schedule_work(&adapter->adminq_task); - - return IRQ_HANDLED; -} - -/** - * iavf_msix_clean_rings - MSIX mode Interrupt Handler - * @irq: interrupt number - * @data: pointer to a q_vector - **/ -static irqreturn_t iavf_msix_clean_rings(int irq, void *data) -{ - struct i40e_q_vector *q_vector = data; - - if (!q_vector->tx.ring && !q_vector->rx.ring) - return IRQ_HANDLED; - - napi_schedule_irqoff(&q_vector->napi); - - return IRQ_HANDLED; -} - -/** - * iavf_map_vector_to_rxq - associate irqs with rx queues - * @adapter: board private structure - * @v_idx: interrupt number - * @r_idx: queue number - **/ -static void -iavf_map_vector_to_rxq(struct iavf_adapter *adapter, int v_idx, int r_idx) -{ - struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx]; - struct i40e_ring *rx_ring = &adapter->rx_rings[r_idx]; - struct i40e_hw *hw = &adapter->hw; - - rx_ring->q_vector = q_vector; - rx_ring->next = q_vector->rx.ring; - rx_ring->vsi = &adapter->vsi; - q_vector->rx.ring = rx_ring; - q_vector->rx.count++; - q_vector->rx.next_update = jiffies + 1; - q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting); - q_vector->ring_mask |= BIT(r_idx); - wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, q_vector->reg_idx), - q_vector->rx.current_itr); - q_vector->rx.current_itr = q_vector->rx.target_itr; -} - -/** - * iavf_map_vector_to_txq - associate irqs with tx queues - * @adapter: board private structure - * @v_idx: interrupt number - * @t_idx: queue number - **/ -static void -iavf_map_vector_to_txq(struct iavf_adapter *adapter, int v_idx, int t_idx) -{ - struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx]; - struct i40e_ring *tx_ring = &adapter->tx_rings[t_idx]; - struct i40e_hw *hw = &adapter->hw; - - tx_ring->q_vector = q_vector; - tx_ring->next = q_vector->tx.ring; - tx_ring->vsi = &adapter->vsi; - q_vector->tx.ring = tx_ring; - q_vector->tx.count++; - q_vector->tx.next_update = jiffies + 1; - q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting); - q_vector->num_ringpairs++; - wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, q_vector->reg_idx), - q_vector->tx.target_itr); - q_vector->tx.current_itr = q_vector->tx.target_itr; -} - -/** - * iavf_map_rings_to_vectors - Maps descriptor rings to vectors - * @adapter: board private structure to initialize - * - * This function maps descriptor rings to the queue-specific vectors - * we were allotted through the MSI-X enabling code. Ideally, we'd have - * one vector per ring/queue, but on a constrained vector budget, we - * group the rings as "efficiently" as possible. You would add new - * mapping configurations in here. - **/ -static void iavf_map_rings_to_vectors(struct iavf_adapter *adapter) -{ - int rings_remaining = adapter->num_active_queues; - int ridx = 0, vidx = 0; - int q_vectors; - - q_vectors = adapter->num_msix_vectors - NONQ_VECS; - - for (; ridx < rings_remaining; ridx++) { - iavf_map_vector_to_rxq(adapter, vidx, ridx); - iavf_map_vector_to_txq(adapter, vidx, ridx); - - /* In the case where we have more queues than vectors, continue - * round-robin on vectors until all queues are mapped. - */ - if (++vidx >= q_vectors) - vidx = 0; - } - - adapter->aq_required |= IAVF_FLAG_AQ_MAP_VECTORS; -} - -#ifdef CONFIG_NET_POLL_CONTROLLER -/** - * iavf_netpoll - A Polling 'interrupt' handler - * @netdev: network interface device structure - * - * This is used by netconsole to send skbs without having to re-enable - * interrupts. It's not called while the normal interrupt routine is executing. - **/ -static void iavf_netpoll(struct net_device *netdev) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - int q_vectors = adapter->num_msix_vectors - NONQ_VECS; - int i; - - /* if interface is down do nothing */ - if (test_bit(__I40E_VSI_DOWN, adapter->vsi.state)) - return; - - for (i = 0; i < q_vectors; i++) - iavf_msix_clean_rings(0, &adapter->q_vectors[i]); -} - -#endif -/** - * iavf_irq_affinity_notify - Callback for affinity changes - * @notify: context as to what irq was changed - * @mask: the new affinity mask - * - * This is a callback function used by the irq_set_affinity_notifier function - * so that we may register to receive changes to the irq affinity masks. - **/ -static void iavf_irq_affinity_notify(struct irq_affinity_notify *notify, - const cpumask_t *mask) -{ - struct i40e_q_vector *q_vector = - container_of(notify, struct i40e_q_vector, affinity_notify); - - cpumask_copy(&q_vector->affinity_mask, mask); -} - -/** - * iavf_irq_affinity_release - Callback for affinity notifier release - * @ref: internal core kernel usage - * - * This is a callback function used by the irq_set_affinity_notifier function - * to inform the current notification subscriber that they will no longer - * receive notifications. - **/ -static void iavf_irq_affinity_release(struct kref *ref) {} - -/** - * iavf_request_traffic_irqs - Initialize MSI-X interrupts - * @adapter: board private structure - * @basename: device basename - * - * Allocates MSI-X vectors for tx and rx handling, and requests - * interrupts from the kernel. - **/ -static int -iavf_request_traffic_irqs(struct iavf_adapter *adapter, char *basename) -{ - unsigned int vector, q_vectors; - unsigned int rx_int_idx = 0, tx_int_idx = 0; - int irq_num, err; - int cpu; - - iavf_irq_disable(adapter); - /* Decrement for Other and TCP Timer vectors */ - q_vectors = adapter->num_msix_vectors - NONQ_VECS; - - for (vector = 0; vector < q_vectors; vector++) { - struct i40e_q_vector *q_vector = &adapter->q_vectors[vector]; - - irq_num = adapter->msix_entries[vector + NONQ_VECS].vector; - - if (q_vector->tx.ring && q_vector->rx.ring) { - snprintf(q_vector->name, sizeof(q_vector->name), - "iavf-%s-TxRx-%d", basename, rx_int_idx++); - tx_int_idx++; - } else if (q_vector->rx.ring) { - snprintf(q_vector->name, sizeof(q_vector->name), - "iavf-%s-rx-%d", basename, rx_int_idx++); - } else if (q_vector->tx.ring) { - snprintf(q_vector->name, sizeof(q_vector->name), - "iavf-%s-tx-%d", basename, tx_int_idx++); - } else { - /* skip this unused q_vector */ - continue; - } - err = request_irq(irq_num, - iavf_msix_clean_rings, - 0, - q_vector->name, - q_vector); - if (err) { - dev_info(&adapter->pdev->dev, - "Request_irq failed, error: %d\n", err); - goto free_queue_irqs; - } - /* register for affinity change notifications */ - q_vector->affinity_notify.notify = iavf_irq_affinity_notify; - q_vector->affinity_notify.release = - iavf_irq_affinity_release; - irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify); - /* Spread the IRQ affinity hints across online CPUs. Note that - * get_cpu_mask returns a mask with a permanent lifetime so - * it's safe to use as a hint for irq_set_affinity_hint. - */ - cpu = cpumask_local_spread(q_vector->v_idx, -1); - irq_set_affinity_hint(irq_num, get_cpu_mask(cpu)); - } - - return 0; - -free_queue_irqs: - while (vector) { - vector--; - irq_num = adapter->msix_entries[vector + NONQ_VECS].vector; - irq_set_affinity_notifier(irq_num, NULL); - irq_set_affinity_hint(irq_num, NULL); - free_irq(irq_num, &adapter->q_vectors[vector]); - } - return err; -} - -/** - * iavf_request_misc_irq - Initialize MSI-X interrupts - * @adapter: board private structure - * - * Allocates MSI-X vector 0 and requests interrupts from the kernel. This - * vector is only for the admin queue, and stays active even when the netdev - * is closed. - **/ -static int iavf_request_misc_irq(struct iavf_adapter *adapter) -{ - struct net_device *netdev = adapter->netdev; - int err; - - snprintf(adapter->misc_vector_name, - sizeof(adapter->misc_vector_name) - 1, "iavf-%s:mbx", - dev_name(&adapter->pdev->dev)); - err = request_irq(adapter->msix_entries[0].vector, - &iavf_msix_aq, 0, - adapter->misc_vector_name, netdev); - if (err) { - dev_err(&adapter->pdev->dev, - "request_irq for %s failed: %d\n", - adapter->misc_vector_name, err); - free_irq(adapter->msix_entries[0].vector, netdev); - } - return err; -} - -/** - * iavf_free_traffic_irqs - Free MSI-X interrupts - * @adapter: board private structure - * - * Frees all MSI-X vectors other than 0. - **/ -static void iavf_free_traffic_irqs(struct iavf_adapter *adapter) -{ - int vector, irq_num, q_vectors; - - if (!adapter->msix_entries) - return; - - q_vectors = adapter->num_msix_vectors - NONQ_VECS; - - for (vector = 0; vector < q_vectors; vector++) { - irq_num = adapter->msix_entries[vector + NONQ_VECS].vector; - irq_set_affinity_notifier(irq_num, NULL); - irq_set_affinity_hint(irq_num, NULL); - free_irq(irq_num, &adapter->q_vectors[vector]); - } -} - -/** - * iavf_free_misc_irq - Free MSI-X miscellaneous vector - * @adapter: board private structure - * - * Frees MSI-X vector 0. - **/ -static void iavf_free_misc_irq(struct iavf_adapter *adapter) -{ - struct net_device *netdev = adapter->netdev; - - if (!adapter->msix_entries) - return; - - free_irq(adapter->msix_entries[0].vector, netdev); -} - -/** - * iavf_configure_tx - Configure Transmit Unit after Reset - * @adapter: board private structure - * - * Configure the Tx unit of the MAC after a reset. - **/ -static void iavf_configure_tx(struct iavf_adapter *adapter) -{ - struct i40e_hw *hw = &adapter->hw; - int i; - - for (i = 0; i < adapter->num_active_queues; i++) - adapter->tx_rings[i].tail = hw->hw_addr + I40E_QTX_TAIL1(i); -} - -/** - * iavf_configure_rx - Configure Receive Unit after Reset - * @adapter: board private structure - * - * Configure the Rx unit of the MAC after a reset. - **/ -static void iavf_configure_rx(struct iavf_adapter *adapter) -{ - unsigned int rx_buf_len = I40E_RXBUFFER_2048; - struct i40e_hw *hw = &adapter->hw; - int i; - - /* Legacy Rx will always default to a 2048 buffer size. */ -#if (PAGE_SIZE < 8192) - if (!(adapter->flags & IAVF_FLAG_LEGACY_RX)) { - struct net_device *netdev = adapter->netdev; - - /* For jumbo frames on systems with 4K pages we have to use - * an order 1 page, so we might as well increase the size - * of our Rx buffer to make better use of the available space - */ - rx_buf_len = I40E_RXBUFFER_3072; - - /* We use a 1536 buffer size for configurations with - * standard Ethernet mtu. On x86 this gives us enough room - * for shared info and 192 bytes of padding. - */ - if (!I40E_2K_TOO_SMALL_WITH_PADDING && - (netdev->mtu <= ETH_DATA_LEN)) - rx_buf_len = I40E_RXBUFFER_1536 - NET_IP_ALIGN; - } -#endif - - for (i = 0; i < adapter->num_active_queues; i++) { - adapter->rx_rings[i].tail = hw->hw_addr + I40E_QRX_TAIL1(i); - adapter->rx_rings[i].rx_buf_len = rx_buf_len; - - if (adapter->flags & IAVF_FLAG_LEGACY_RX) - clear_ring_build_skb_enabled(&adapter->rx_rings[i]); - else - set_ring_build_skb_enabled(&adapter->rx_rings[i]); - } -} - -/** - * iavf_find_vlan - Search filter list for specific vlan filter - * @adapter: board private structure - * @vlan: vlan tag - * - * Returns ptr to the filter object or NULL. Must be called while holding the - * mac_vlan_list_lock. - **/ -static struct -iavf_vlan_filter *iavf_find_vlan(struct iavf_adapter *adapter, u16 vlan) -{ - struct iavf_vlan_filter *f; - - list_for_each_entry(f, &adapter->vlan_filter_list, list) { - if (vlan == f->vlan) - return f; - } - return NULL; -} - -/** - * iavf_add_vlan - Add a vlan filter to the list - * @adapter: board private structure - * @vlan: VLAN tag - * - * Returns ptr to the filter object or NULL when no memory available. - **/ -static struct -iavf_vlan_filter *iavf_add_vlan(struct iavf_adapter *adapter, u16 vlan) -{ - struct iavf_vlan_filter *f = NULL; - - spin_lock_bh(&adapter->mac_vlan_list_lock); - - f = iavf_find_vlan(adapter, vlan); - if (!f) { - f = kzalloc(sizeof(*f), GFP_KERNEL); - if (!f) - goto clearout; - - f->vlan = vlan; - - INIT_LIST_HEAD(&f->list); - list_add(&f->list, &adapter->vlan_filter_list); - f->add = true; - adapter->aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER; - } - -clearout: - spin_unlock_bh(&adapter->mac_vlan_list_lock); - return f; -} - -/** - * iavf_del_vlan - Remove a vlan filter from the list - * @adapter: board private structure - * @vlan: VLAN tag - **/ -static void iavf_del_vlan(struct iavf_adapter *adapter, u16 vlan) -{ - struct iavf_vlan_filter *f; - - spin_lock_bh(&adapter->mac_vlan_list_lock); - - f = iavf_find_vlan(adapter, vlan); - if (f) { - f->remove = true; - adapter->aq_required |= IAVF_FLAG_AQ_DEL_VLAN_FILTER; - } - - spin_unlock_bh(&adapter->mac_vlan_list_lock); -} - -/** - * iavf_vlan_rx_add_vid - Add a VLAN filter to a device - * @netdev: network device struct - * @proto: unused protocol data - * @vid: VLAN tag - **/ -static int iavf_vlan_rx_add_vid(struct net_device *netdev, - __always_unused __be16 proto, u16 vid) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - - if (!VLAN_ALLOWED(adapter)) - return -EIO; - if (iavf_add_vlan(adapter, vid) == NULL) - return -ENOMEM; - return 0; -} - -/** - * iavf_vlan_rx_kill_vid - Remove a VLAN filter from a device - * @netdev: network device struct - * @proto: unused protocol data - * @vid: VLAN tag - **/ -static int iavf_vlan_rx_kill_vid(struct net_device *netdev, - __always_unused __be16 proto, u16 vid) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - - if (VLAN_ALLOWED(adapter)) { - iavf_del_vlan(adapter, vid); - return 0; - } - return -EIO; -} - -/** - * iavf_find_filter - Search filter list for specific mac filter - * @adapter: board private structure - * @macaddr: the MAC address - * - * Returns ptr to the filter object or NULL. Must be called while holding the - * mac_vlan_list_lock. - **/ -static struct -iavf_mac_filter *iavf_find_filter(struct iavf_adapter *adapter, - const u8 *macaddr) -{ - struct iavf_mac_filter *f; - - if (!macaddr) - return NULL; - - list_for_each_entry(f, &adapter->mac_filter_list, list) { - if (ether_addr_equal(macaddr, f->macaddr)) - return f; - } - return NULL; -} - -/** - * i40e_add_filter - Add a mac filter to the filter list - * @adapter: board private structure - * @macaddr: the MAC address - * - * Returns ptr to the filter object or NULL when no memory available. - **/ -static struct -iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter, - const u8 *macaddr) -{ - struct iavf_mac_filter *f; - - if (!macaddr) - return NULL; - - f = iavf_find_filter(adapter, macaddr); - if (!f) { - f = kzalloc(sizeof(*f), GFP_ATOMIC); - if (!f) - return f; - - ether_addr_copy(f->macaddr, macaddr); - - list_add_tail(&f->list, &adapter->mac_filter_list); - f->add = true; - adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER; - } else { - f->remove = false; - } - - return f; -} - -/** - * iavf_set_mac - NDO callback to set port mac address - * @netdev: network interface device structure - * @p: pointer to an address structure - * - * Returns 0 on success, negative on failure - **/ -static int iavf_set_mac(struct net_device *netdev, void *p) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - struct i40e_hw *hw = &adapter->hw; - struct iavf_mac_filter *f; - struct sockaddr *addr = p; - - if (!is_valid_ether_addr(addr->sa_data)) - return -EADDRNOTAVAIL; - - if (ether_addr_equal(netdev->dev_addr, addr->sa_data)) - return 0; - - if (adapter->flags & IAVF_FLAG_ADDR_SET_BY_PF) - return -EPERM; - - spin_lock_bh(&adapter->mac_vlan_list_lock); - - f = iavf_find_filter(adapter, hw->mac.addr); - if (f) { - f->remove = true; - adapter->aq_required |= IAVF_FLAG_AQ_DEL_MAC_FILTER; - } - - f = iavf_add_filter(adapter, addr->sa_data); - - spin_unlock_bh(&adapter->mac_vlan_list_lock); - - if (f) { - ether_addr_copy(hw->mac.addr, addr->sa_data); - ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr); - } - - return (f == NULL) ? -ENOMEM : 0; -} - -/** - * iavf_addr_sync - Callback for dev_(mc|uc)_sync to add address - * @netdev: the netdevice - * @addr: address to add - * - * Called by __dev_(mc|uc)_sync when an address needs to be added. We call - * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock. - */ -static int iavf_addr_sync(struct net_device *netdev, const u8 *addr) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - - if (iavf_add_filter(adapter, addr)) - return 0; - else - return -ENOMEM; -} - -/** - * iavf_addr_unsync - Callback for dev_(mc|uc)_sync to remove address - * @netdev: the netdevice - * @addr: address to add - * - * Called by __dev_(mc|uc)_sync when an address needs to be removed. We call - * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock. - */ -static int iavf_addr_unsync(struct net_device *netdev, const u8 *addr) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - struct iavf_mac_filter *f; - - /* Under some circumstances, we might receive a request to delete - * our own device address from our uc list. Because we store the - * device address in the VSI's MAC/VLAN filter list, we need to ignore - * such requests and not delete our device address from this list. - */ - if (ether_addr_equal(addr, netdev->dev_addr)) - return 0; - - f = iavf_find_filter(adapter, addr); - if (f) { - f->remove = true; - adapter->aq_required |= IAVF_FLAG_AQ_DEL_MAC_FILTER; - } - return 0; -} - -/** - * iavf_set_rx_mode - NDO callback to set the netdev filters - * @netdev: network interface device structure - **/ -static void iavf_set_rx_mode(struct net_device *netdev) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - - spin_lock_bh(&adapter->mac_vlan_list_lock); - __dev_uc_sync(netdev, iavf_addr_sync, iavf_addr_unsync); - __dev_mc_sync(netdev, iavf_addr_sync, iavf_addr_unsync); - spin_unlock_bh(&adapter->mac_vlan_list_lock); - - if (netdev->flags & IFF_PROMISC && - !(adapter->flags & IAVF_FLAG_PROMISC_ON)) - adapter->aq_required |= IAVF_FLAG_AQ_REQUEST_PROMISC; - else if (!(netdev->flags & IFF_PROMISC) && - adapter->flags & IAVF_FLAG_PROMISC_ON) - adapter->aq_required |= IAVF_FLAG_AQ_RELEASE_PROMISC; - - if (netdev->flags & IFF_ALLMULTI && - !(adapter->flags & IAVF_FLAG_ALLMULTI_ON)) - adapter->aq_required |= IAVF_FLAG_AQ_REQUEST_ALLMULTI; - else if (!(netdev->flags & IFF_ALLMULTI) && - adapter->flags & IAVF_FLAG_ALLMULTI_ON) - adapter->aq_required |= IAVF_FLAG_AQ_RELEASE_ALLMULTI; -} - -/** - * iavf_napi_enable_all - enable NAPI on all queue vectors - * @adapter: board private structure - **/ -static void iavf_napi_enable_all(struct iavf_adapter *adapter) -{ - int q_idx; - struct i40e_q_vector *q_vector; - int q_vectors = adapter->num_msix_vectors - NONQ_VECS; - - for (q_idx = 0; q_idx < q_vectors; q_idx++) { - struct napi_struct *napi; - - q_vector = &adapter->q_vectors[q_idx]; - napi = &q_vector->napi; - napi_enable(napi); - } -} - -/** - * iavf_napi_disable_all - disable NAPI on all queue vectors - * @adapter: board private structure - **/ -static void iavf_napi_disable_all(struct iavf_adapter *adapter) -{ - int q_idx; - struct i40e_q_vector *q_vector; - int q_vectors = adapter->num_msix_vectors - NONQ_VECS; - - for (q_idx = 0; q_idx < q_vectors; q_idx++) { - q_vector = &adapter->q_vectors[q_idx]; - napi_disable(&q_vector->napi); - } -} - -/** - * iavf_configure - set up transmit and receive data structures - * @adapter: board private structure - **/ -static void iavf_configure(struct iavf_adapter *adapter) -{ - struct net_device *netdev = adapter->netdev; - int i; - - iavf_set_rx_mode(netdev); - - iavf_configure_tx(adapter); - iavf_configure_rx(adapter); - adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_QUEUES; - - for (i = 0; i < adapter->num_active_queues; i++) { - struct i40e_ring *ring = &adapter->rx_rings[i]; - - iavf_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring)); - } -} - -/** - * iavf_up_complete - Finish the last steps of bringing up a connection - * @adapter: board private structure - * - * Expects to be called while holding the __IAVF_IN_CRITICAL_TASK bit lock. - **/ -static void iavf_up_complete(struct iavf_adapter *adapter) -{ - adapter->state = __IAVF_RUNNING; - clear_bit(__I40E_VSI_DOWN, adapter->vsi.state); - - iavf_napi_enable_all(adapter); - - adapter->aq_required |= IAVF_FLAG_AQ_ENABLE_QUEUES; - if (CLIENT_ENABLED(adapter)) - adapter->flags |= IAVF_FLAG_CLIENT_NEEDS_OPEN; - mod_timer_pending(&adapter->watchdog_timer, jiffies + 1); -} - -/** - * i40e_down - Shutdown the connection processing - * @adapter: board private structure - * - * Expects to be called while holding the __IAVF_IN_CRITICAL_TASK bit lock. - **/ -void iavf_down(struct iavf_adapter *adapter) -{ - struct net_device *netdev = adapter->netdev; - struct iavf_vlan_filter *vlf; - struct iavf_mac_filter *f; - struct iavf_cloud_filter *cf; - - if (adapter->state <= __IAVF_DOWN_PENDING) - return; - - netif_carrier_off(netdev); - netif_tx_disable(netdev); - adapter->link_up = false; - iavf_napi_disable_all(adapter); - iavf_irq_disable(adapter); - - spin_lock_bh(&adapter->mac_vlan_list_lock); - - /* clear the sync flag on all filters */ - __dev_uc_unsync(adapter->netdev, NULL); - __dev_mc_unsync(adapter->netdev, NULL); - - /* remove all MAC filters */ - list_for_each_entry(f, &adapter->mac_filter_list, list) { - f->remove = true; - } - - /* remove all VLAN filters */ - list_for_each_entry(vlf, &adapter->vlan_filter_list, list) { - vlf->remove = true; - } - - spin_unlock_bh(&adapter->mac_vlan_list_lock); - - /* remove all cloud filters */ - spin_lock_bh(&adapter->cloud_filter_list_lock); - list_for_each_entry(cf, &adapter->cloud_filter_list, list) { - cf->del = true; - } - spin_unlock_bh(&adapter->cloud_filter_list_lock); - - if (!(adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) && - adapter->state != __IAVF_RESETTING) { - /* cancel any current operation */ - adapter->current_op = VIRTCHNL_OP_UNKNOWN; - /* Schedule operations to close down the HW. Don't wait - * here for this to complete. The watchdog is still running - * and it will take care of this. - */ - adapter->aq_required = IAVF_FLAG_AQ_DEL_MAC_FILTER; - adapter->aq_required |= IAVF_FLAG_AQ_DEL_VLAN_FILTER; - adapter->aq_required |= IAVF_FLAG_AQ_DEL_CLOUD_FILTER; - adapter->aq_required |= IAVF_FLAG_AQ_DISABLE_QUEUES; - } - - mod_timer_pending(&adapter->watchdog_timer, jiffies + 1); -} - -/** - * iavf_acquire_msix_vectors - Setup the MSIX capability - * @adapter: board private structure - * @vectors: number of vectors to request - * - * Work with the OS to set up the MSIX vectors needed. - * - * Returns 0 on success, negative on failure - **/ -static int -iavf_acquire_msix_vectors(struct iavf_adapter *adapter, int vectors) -{ - int err, vector_threshold; - - /* We'll want at least 3 (vector_threshold): - * 0) Other (Admin Queue and link, mostly) - * 1) TxQ[0] Cleanup - * 2) RxQ[0] Cleanup - */ - vector_threshold = MIN_MSIX_COUNT; - - /* The more we get, the more we will assign to Tx/Rx Cleanup - * for the separate queues...where Rx Cleanup >= Tx Cleanup. - * Right now, we simply care about how many we'll get; we'll - * set them up later while requesting irq's. - */ - err = pci_enable_msix_range(adapter->pdev, adapter->msix_entries, - vector_threshold, vectors); - if (err < 0) { - dev_err(&adapter->pdev->dev, "Unable to allocate MSI-X interrupts\n"); - kfree(adapter->msix_entries); - adapter->msix_entries = NULL; - return err; - } - - /* Adjust for only the vectors we'll use, which is minimum - * of max_msix_q_vectors + NONQ_VECS, or the number of - * vectors we were allocated. - */ - adapter->num_msix_vectors = err; - return 0; -} - -/** - * iavf_free_queues - Free memory for all rings - * @adapter: board private structure to initialize - * - * Free all of the memory associated with queue pairs. - **/ -static void iavf_free_queues(struct iavf_adapter *adapter) -{ - if (!adapter->vsi_res) - return; - adapter->num_active_queues = 0; - kfree(adapter->tx_rings); - adapter->tx_rings = NULL; - kfree(adapter->rx_rings); - adapter->rx_rings = NULL; -} - -/** - * iavf_alloc_queues - Allocate memory for all rings - * @adapter: board private structure to initialize - * - * We allocate one ring per queue at run-time since we don't know the - * number of queues at compile-time. The polling_netdev array is - * intended for Multiqueue, but should work fine with a single queue. - **/ -static int iavf_alloc_queues(struct iavf_adapter *adapter) -{ - int i, num_active_queues; - - /* If we're in reset reallocating queues we don't actually know yet for - * certain the PF gave us the number of queues we asked for but we'll - * assume it did. Once basic reset is finished we'll confirm once we - * start negotiating config with PF. - */ - if (adapter->num_req_queues) - num_active_queues = adapter->num_req_queues; - else if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) && - adapter->num_tc) - num_active_queues = adapter->ch_config.total_qps; - else - num_active_queues = min_t(int, - adapter->vsi_res->num_queue_pairs, - (int)(num_online_cpus())); - - - adapter->tx_rings = kcalloc(num_active_queues, - sizeof(struct i40e_ring), GFP_KERNEL); - if (!adapter->tx_rings) - goto err_out; - adapter->rx_rings = kcalloc(num_active_queues, - sizeof(struct i40e_ring), GFP_KERNEL); - if (!adapter->rx_rings) - goto err_out; - - for (i = 0; i < num_active_queues; i++) { - struct i40e_ring *tx_ring; - struct i40e_ring *rx_ring; - - tx_ring = &adapter->tx_rings[i]; - - tx_ring->queue_index = i; - tx_ring->netdev = adapter->netdev; - tx_ring->dev = &adapter->pdev->dev; - tx_ring->count = adapter->tx_desc_count; - tx_ring->itr_setting = I40E_ITR_TX_DEF; - if (adapter->flags & IAVF_FLAG_WB_ON_ITR_CAPABLE) - tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR; - - rx_ring = &adapter->rx_rings[i]; - rx_ring->queue_index = i; - rx_ring->netdev = adapter->netdev; - rx_ring->dev = &adapter->pdev->dev; - rx_ring->count = adapter->rx_desc_count; - rx_ring->itr_setting = I40E_ITR_RX_DEF; - } - - adapter->num_active_queues = num_active_queues; - - return 0; - -err_out: - iavf_free_queues(adapter); - return -ENOMEM; -} - -/** - * iavf_set_interrupt_capability - set MSI-X or FAIL if not supported - * @adapter: board private structure to initialize - * - * Attempt to configure the interrupts using the best available - * capabilities of the hardware and the kernel. - **/ -static int iavf_set_interrupt_capability(struct iavf_adapter *adapter) -{ - int vector, v_budget; - int pairs = 0; - int err = 0; - - if (!adapter->vsi_res) { - err = -EIO; - goto out; - } - pairs = adapter->num_active_queues; - - /* It's easy to be greedy for MSI-X vectors, but it really doesn't do - * us much good if we have more vectors than CPUs. However, we already - * limit the total number of queues by the number of CPUs so we do not - * need any further limiting here. - */ - v_budget = min_t(int, pairs + NONQ_VECS, - (int)adapter->vf_res->max_vectors); - - adapter->msix_entries = kcalloc(v_budget, - sizeof(struct msix_entry), GFP_KERNEL); - if (!adapter->msix_entries) { - err = -ENOMEM; - goto out; - } - - for (vector = 0; vector < v_budget; vector++) - adapter->msix_entries[vector].entry = vector; - - err = iavf_acquire_msix_vectors(adapter, v_budget); - -out: - netif_set_real_num_rx_queues(adapter->netdev, pairs); - netif_set_real_num_tx_queues(adapter->netdev, pairs); - return err; -} - -/** - * i40e_config_rss_aq - Configure RSS keys and lut by using AQ commands - * @adapter: board private structure - * - * Return 0 on success, negative on failure - **/ -static int iavf_config_rss_aq(struct iavf_adapter *adapter) -{ - struct i40e_aqc_get_set_rss_key_data *rss_key = - (struct i40e_aqc_get_set_rss_key_data *)adapter->rss_key; - struct i40e_hw *hw = &adapter->hw; - int ret = 0; - - if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { - /* bail because we already have a command pending */ - dev_err(&adapter->pdev->dev, "Cannot configure RSS, command %d pending\n", - adapter->current_op); - return -EBUSY; - } - - ret = iavf_aq_set_rss_key(hw, adapter->vsi.id, rss_key); - if (ret) { - dev_err(&adapter->pdev->dev, "Cannot set RSS key, err %s aq_err %s\n", - iavf_stat_str(hw, ret), - iavf_aq_str(hw, hw->aq.asq_last_status)); - return ret; - - } - - ret = iavf_aq_set_rss_lut(hw, adapter->vsi.id, false, - adapter->rss_lut, adapter->rss_lut_size); - if (ret) { - dev_err(&adapter->pdev->dev, "Cannot set RSS lut, err %s aq_err %s\n", - iavf_stat_str(hw, ret), - iavf_aq_str(hw, hw->aq.asq_last_status)); - } - - return ret; - -} - -/** - * iavf_config_rss_reg - Configure RSS keys and lut by writing registers - * @adapter: board private structure - * - * Returns 0 on success, negative on failure - **/ -static int iavf_config_rss_reg(struct iavf_adapter *adapter) -{ - struct i40e_hw *hw = &adapter->hw; - u32 *dw; - u16 i; - - dw = (u32 *)adapter->rss_key; - for (i = 0; i <= adapter->rss_key_size / 4; i++) - wr32(hw, I40E_VFQF_HKEY(i), dw[i]); - - dw = (u32 *)adapter->rss_lut; - for (i = 0; i <= adapter->rss_lut_size / 4; i++) - wr32(hw, I40E_VFQF_HLUT(i), dw[i]); - - i40e_flush(hw); - - return 0; -} - -/** - * iavf_config_rss - Configure RSS keys and lut - * @adapter: board private structure - * - * Returns 0 on success, negative on failure - **/ -int iavf_config_rss(struct iavf_adapter *adapter) -{ - - if (RSS_PF(adapter)) { - adapter->aq_required |= IAVF_FLAG_AQ_SET_RSS_LUT | - IAVF_FLAG_AQ_SET_RSS_KEY; - return 0; - } else if (RSS_AQ(adapter)) { - return iavf_config_rss_aq(adapter); - } else { - return iavf_config_rss_reg(adapter); - } -} - -/** - * iavf_fill_rss_lut - Fill the lut with default values - * @adapter: board private structure - **/ -static void iavf_fill_rss_lut(struct iavf_adapter *adapter) -{ - u16 i; - - for (i = 0; i < adapter->rss_lut_size; i++) - adapter->rss_lut[i] = i % adapter->num_active_queues; -} - -/** - * iavf_init_rss - Prepare for RSS - * @adapter: board private structure - * - * Return 0 on success, negative on failure - **/ -static int iavf_init_rss(struct iavf_adapter *adapter) -{ - struct i40e_hw *hw = &adapter->hw; - int ret; - - if (!RSS_PF(adapter)) { - /* Enable PCTYPES for RSS, TCP/UDP with IPv4/IPv6 */ - if (adapter->vf_res->vf_cap_flags & - VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2) - adapter->hena = I40E_DEFAULT_RSS_HENA_EXPANDED; - else - adapter->hena = I40E_DEFAULT_RSS_HENA; - - wr32(hw, I40E_VFQF_HENA(0), (u32)adapter->hena); - wr32(hw, I40E_VFQF_HENA(1), (u32)(adapter->hena >> 32)); - } - - iavf_fill_rss_lut(adapter); - - netdev_rss_key_fill((void *)adapter->rss_key, adapter->rss_key_size); - ret = iavf_config_rss(adapter); - - return ret; -} - -/** - * iavf_alloc_q_vectors - Allocate memory for interrupt vectors - * @adapter: board private structure to initialize - * - * We allocate one q_vector per queue interrupt. If allocation fails we - * return -ENOMEM. - **/ -static int iavf_alloc_q_vectors(struct iavf_adapter *adapter) -{ - int q_idx = 0, num_q_vectors; - struct i40e_q_vector *q_vector; - - num_q_vectors = adapter->num_msix_vectors - NONQ_VECS; - adapter->q_vectors = kcalloc(num_q_vectors, sizeof(*q_vector), - GFP_KERNEL); - if (!adapter->q_vectors) - return -ENOMEM; - - for (q_idx = 0; q_idx < num_q_vectors; q_idx++) { - q_vector = &adapter->q_vectors[q_idx]; - q_vector->adapter = adapter; - q_vector->vsi = &adapter->vsi; - q_vector->v_idx = q_idx; - q_vector->reg_idx = q_idx; - cpumask_copy(&q_vector->affinity_mask, cpu_possible_mask); - netif_napi_add(adapter->netdev, &q_vector->napi, - iavf_napi_poll, NAPI_POLL_WEIGHT); - } - - return 0; -} - -/** - * iavf_free_q_vectors - Free memory allocated for interrupt vectors - * @adapter: board private structure to initialize - * - * This function frees the memory allocated to the q_vectors. In addition if - * NAPI is enabled it will delete any references to the NAPI struct prior - * to freeing the q_vector. - **/ -static void iavf_free_q_vectors(struct iavf_adapter *adapter) -{ - int q_idx, num_q_vectors; - int napi_vectors; - - if (!adapter->q_vectors) - return; - - num_q_vectors = adapter->num_msix_vectors - NONQ_VECS; - napi_vectors = adapter->num_active_queues; - - for (q_idx = 0; q_idx < num_q_vectors; q_idx++) { - struct i40e_q_vector *q_vector = &adapter->q_vectors[q_idx]; - - if (q_idx < napi_vectors) - netif_napi_del(&q_vector->napi); - } - kfree(adapter->q_vectors); - adapter->q_vectors = NULL; -} - -/** - * iavf_reset_interrupt_capability - Reset MSIX setup - * @adapter: board private structure - * - **/ -void iavf_reset_interrupt_capability(struct iavf_adapter *adapter) -{ - if (!adapter->msix_entries) - return; - - pci_disable_msix(adapter->pdev); - kfree(adapter->msix_entries); - adapter->msix_entries = NULL; -} - -/** - * iavf_init_interrupt_scheme - Determine if MSIX is supported and init - * @adapter: board private structure to initialize - * - **/ -int iavf_init_interrupt_scheme(struct iavf_adapter *adapter) -{ - int err; - - err = iavf_alloc_queues(adapter); - if (err) { - dev_err(&adapter->pdev->dev, - "Unable to allocate memory for queues\n"); - goto err_alloc_queues; - } - - rtnl_lock(); - err = iavf_set_interrupt_capability(adapter); - rtnl_unlock(); - if (err) { - dev_err(&adapter->pdev->dev, - "Unable to setup interrupt capabilities\n"); - goto err_set_interrupt; - } - - err = iavf_alloc_q_vectors(adapter); - if (err) { - dev_err(&adapter->pdev->dev, - "Unable to allocate memory for queue vectors\n"); - goto err_alloc_q_vectors; - } - - /* If we've made it so far while ADq flag being ON, then we haven't - * bailed out anywhere in middle. And ADq isn't just enabled but actual - * resources have been allocated in the reset path. - * Now we can truly claim that ADq is enabled. - */ - if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) && - adapter->num_tc) - dev_info(&adapter->pdev->dev, "ADq Enabled, %u TCs created", - adapter->num_tc); - - dev_info(&adapter->pdev->dev, "Multiqueue %s: Queue pair count = %u", - (adapter->num_active_queues > 1) ? "Enabled" : "Disabled", - adapter->num_active_queues); - - return 0; -err_alloc_q_vectors: - iavf_reset_interrupt_capability(adapter); -err_set_interrupt: - iavf_free_queues(adapter); -err_alloc_queues: - return err; -} - -/** - * iavf_free_rss - Free memory used by RSS structs - * @adapter: board private structure - **/ -static void iavf_free_rss(struct iavf_adapter *adapter) -{ - kfree(adapter->rss_key); - adapter->rss_key = NULL; - - kfree(adapter->rss_lut); - adapter->rss_lut = NULL; -} - -/** - * iavf_reinit_interrupt_scheme - Reallocate queues and vectors - * @adapter: board private structure - * - * Returns 0 on success, negative on failure - **/ -static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter) -{ - struct net_device *netdev = adapter->netdev; - int err; - - if (netif_running(netdev)) - iavf_free_traffic_irqs(adapter); - iavf_free_misc_irq(adapter); - iavf_reset_interrupt_capability(adapter); - iavf_free_q_vectors(adapter); - iavf_free_queues(adapter); - - err = iavf_init_interrupt_scheme(adapter); - if (err) - goto err; - - netif_tx_stop_all_queues(netdev); - - err = iavf_request_misc_irq(adapter); - if (err) - goto err; - - set_bit(__I40E_VSI_DOWN, adapter->vsi.state); - - iavf_map_rings_to_vectors(adapter); - - if (RSS_AQ(adapter)) - adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_RSS; - else - err = iavf_init_rss(adapter); -err: - return err; -} - -/** - * iavf_watchdog_timer - Periodic call-back timer - * @data: pointer to adapter disguised as unsigned long - **/ -static void iavf_watchdog_timer(struct timer_list *t) -{ - struct iavf_adapter *adapter = from_timer(adapter, t, - watchdog_timer); - - schedule_work(&adapter->watchdog_task); - /* timer will be rescheduled in watchdog task */ -} - -/** - * iavf_watchdog_task - Periodic call-back task - * @work: pointer to work_struct - **/ -static void iavf_watchdog_task(struct work_struct *work) -{ - struct iavf_adapter *adapter = container_of(work, - struct iavf_adapter, - watchdog_task); - struct i40e_hw *hw = &adapter->hw; - u32 reg_val; - - if (test_and_set_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section)) - goto restart_watchdog; - - if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) { - reg_val = rd32(hw, I40E_VFGEN_RSTAT) & - I40E_VFGEN_RSTAT_VFR_STATE_MASK; - if ((reg_val == VIRTCHNL_VFR_VFACTIVE) || - (reg_val == VIRTCHNL_VFR_COMPLETED)) { - /* A chance for redemption! */ - dev_err(&adapter->pdev->dev, "Hardware came out of reset. Attempting reinit.\n"); - adapter->state = __IAVF_STARTUP; - adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED; - schedule_delayed_work(&adapter->init_task, 10); - clear_bit(__IAVF_IN_CRITICAL_TASK, - &adapter->crit_section); - /* Don't reschedule the watchdog, since we've restarted - * the init task. When init_task contacts the PF and - * gets everything set up again, it'll restart the - * watchdog for us. Down, boy. Sit. Stay. Woof. - */ - return; - } - adapter->aq_required = 0; - adapter->current_op = VIRTCHNL_OP_UNKNOWN; - goto watchdog_done; - } - - if ((adapter->state < __IAVF_DOWN) || - (adapter->flags & IAVF_FLAG_RESET_PENDING)) - goto watchdog_done; - - /* check for reset */ - reg_val = rd32(hw, I40E_VF_ARQLEN1) & I40E_VF_ARQLEN1_ARQENABLE_MASK; - if (!(adapter->flags & IAVF_FLAG_RESET_PENDING) && !reg_val) { - adapter->state = __IAVF_RESETTING; - adapter->flags |= IAVF_FLAG_RESET_PENDING; - dev_err(&adapter->pdev->dev, "Hardware reset detected\n"); - schedule_work(&adapter->reset_task); - adapter->aq_required = 0; - adapter->current_op = VIRTCHNL_OP_UNKNOWN; - goto watchdog_done; - } - - /* Process admin queue tasks. After init, everything gets done - * here so we don't race on the admin queue. - */ - if (adapter->current_op) { - if (!iavf_asq_done(hw)) { - dev_dbg(&adapter->pdev->dev, "Admin queue timeout\n"); - iavf_send_api_ver(adapter); - } - goto watchdog_done; - } - if (adapter->aq_required & IAVF_FLAG_AQ_GET_CONFIG) { - iavf_send_vf_config_msg(adapter); - goto watchdog_done; - } - - if (adapter->aq_required & IAVF_FLAG_AQ_DISABLE_QUEUES) { - iavf_disable_queues(adapter); - goto watchdog_done; - } - - if (adapter->aq_required & IAVF_FLAG_AQ_MAP_VECTORS) { - iavf_map_queues(adapter); - goto watchdog_done; - } - - if (adapter->aq_required & IAVF_FLAG_AQ_ADD_MAC_FILTER) { - iavf_add_ether_addrs(adapter); - goto watchdog_done; - } - - if (adapter->aq_required & IAVF_FLAG_AQ_ADD_VLAN_FILTER) { - iavf_add_vlans(adapter); - goto watchdog_done; - } - - if (adapter->aq_required & IAVF_FLAG_AQ_DEL_MAC_FILTER) { - iavf_del_ether_addrs(adapter); - goto watchdog_done; - } - - if (adapter->aq_required & IAVF_FLAG_AQ_DEL_VLAN_FILTER) { - iavf_del_vlans(adapter); - goto watchdog_done; - } - - if (adapter->aq_required & IAVF_FLAG_AQ_ENABLE_VLAN_STRIPPING) { - iavf_enable_vlan_stripping(adapter); - goto watchdog_done; - } - - if (adapter->aq_required & IAVF_FLAG_AQ_DISABLE_VLAN_STRIPPING) { - iavf_disable_vlan_stripping(adapter); - goto watchdog_done; - } - - if (adapter->aq_required & IAVF_FLAG_AQ_CONFIGURE_QUEUES) { - iavf_configure_queues(adapter); - goto watchdog_done; - } - - if (adapter->aq_required & IAVF_FLAG_AQ_ENABLE_QUEUES) { - iavf_enable_queues(adapter); - goto watchdog_done; - } - - if (adapter->aq_required & IAVF_FLAG_AQ_CONFIGURE_RSS) { - /* This message goes straight to the firmware, not the - * PF, so we don't have to set current_op as we will - * not get a response through the ARQ. - */ - iavf_init_rss(adapter); - adapter->aq_required &= ~IAVF_FLAG_AQ_CONFIGURE_RSS; - goto watchdog_done; - } - if (adapter->aq_required & IAVF_FLAG_AQ_GET_HENA) { - iavf_get_hena(adapter); - goto watchdog_done; - } - if (adapter->aq_required & IAVF_FLAG_AQ_SET_HENA) { - iavf_set_hena(adapter); - goto watchdog_done; - } - if (adapter->aq_required & IAVF_FLAG_AQ_SET_RSS_KEY) { - iavf_set_rss_key(adapter); - goto watchdog_done; - } - if (adapter->aq_required & IAVF_FLAG_AQ_SET_RSS_LUT) { - iavf_set_rss_lut(adapter); - goto watchdog_done; - } - - if (adapter->aq_required & IAVF_FLAG_AQ_REQUEST_PROMISC) { - iavf_set_promiscuous(adapter, FLAG_VF_UNICAST_PROMISC | - FLAG_VF_MULTICAST_PROMISC); - goto watchdog_done; - } - - if (adapter->aq_required & IAVF_FLAG_AQ_REQUEST_ALLMULTI) { - iavf_set_promiscuous(adapter, FLAG_VF_MULTICAST_PROMISC); - goto watchdog_done; - } - - if ((adapter->aq_required & IAVF_FLAG_AQ_RELEASE_PROMISC) && - (adapter->aq_required & IAVF_FLAG_AQ_RELEASE_ALLMULTI)) { - iavf_set_promiscuous(adapter, 0); - goto watchdog_done; - } - - if (adapter->aq_required & IAVF_FLAG_AQ_ENABLE_CHANNELS) { - iavf_enable_channels(adapter); - goto watchdog_done; - } - - if (adapter->aq_required & IAVF_FLAG_AQ_DISABLE_CHANNELS) { - iavf_disable_channels(adapter); - goto watchdog_done; - } - - if (adapter->aq_required & IAVF_FLAG_AQ_ADD_CLOUD_FILTER) { - iavf_add_cloud_filter(adapter); - goto watchdog_done; - } - - if (adapter->aq_required & IAVF_FLAG_AQ_DEL_CLOUD_FILTER) { - iavf_del_cloud_filter(adapter); - goto watchdog_done; - } - - schedule_delayed_work(&adapter->client_task, msecs_to_jiffies(5)); - - if (adapter->state == __IAVF_RUNNING) - iavf_request_stats(adapter); -watchdog_done: - if (adapter->state == __IAVF_RUNNING) - iavf_detect_recover_hung(&adapter->vsi); - clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); -restart_watchdog: - if (adapter->state == __IAVF_REMOVE) - return; - if (adapter->aq_required) - mod_timer(&adapter->watchdog_timer, - jiffies + msecs_to_jiffies(20)); - else - mod_timer(&adapter->watchdog_timer, jiffies + (HZ * 2)); - schedule_work(&adapter->adminq_task); -} - -static void iavf_disable_vf(struct iavf_adapter *adapter) -{ - struct iavf_mac_filter *f, *ftmp; - struct iavf_vlan_filter *fv, *fvtmp; - struct iavf_cloud_filter *cf, *cftmp; - - adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED; - - /* We don't use netif_running() because it may be true prior to - * ndo_open() returning, so we can't assume it means all our open - * tasks have finished, since we're not holding the rtnl_lock here. - */ - if (adapter->state == __IAVF_RUNNING) { - set_bit(__I40E_VSI_DOWN, adapter->vsi.state); - netif_carrier_off(adapter->netdev); - netif_tx_disable(adapter->netdev); - adapter->link_up = false; - iavf_napi_disable_all(adapter); - iavf_irq_disable(adapter); - iavf_free_traffic_irqs(adapter); - iavf_free_all_tx_resources(adapter); - iavf_free_all_rx_resources(adapter); - } - - spin_lock_bh(&adapter->mac_vlan_list_lock); - - /* Delete all of the filters */ - list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) { - list_del(&f->list); - kfree(f); - } - - list_for_each_entry_safe(fv, fvtmp, &adapter->vlan_filter_list, list) { - list_del(&fv->list); - kfree(fv); - } - - spin_unlock_bh(&adapter->mac_vlan_list_lock); - - spin_lock_bh(&adapter->cloud_filter_list_lock); - list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) { - list_del(&cf->list); - kfree(cf); - adapter->num_cloud_filters--; - } - spin_unlock_bh(&adapter->cloud_filter_list_lock); - - iavf_free_misc_irq(adapter); - iavf_reset_interrupt_capability(adapter); - iavf_free_queues(adapter); - iavf_free_q_vectors(adapter); - kfree(adapter->vf_res); - iavf_shutdown_adminq(&adapter->hw); - adapter->netdev->flags &= ~IFF_UP; - clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); - adapter->flags &= ~IAVF_FLAG_RESET_PENDING; - adapter->state = __IAVF_DOWN; - wake_up(&adapter->down_waitqueue); - dev_info(&adapter->pdev->dev, "Reset task did not complete, VF disabled\n"); -} - -#define IAVF_RESET_WAIT_MS 10 -#define IAVF_RESET_WAIT_COUNT 500 -/** - * iavf_reset_task - Call-back task to handle hardware reset - * @work: pointer to work_struct - * - * During reset we need to shut down and reinitialize the admin queue - * before we can use it to communicate with the PF again. We also clear - * and reinit the rings because that context is lost as well. - **/ -static void iavf_reset_task(struct work_struct *work) -{ - struct iavf_adapter *adapter = container_of(work, - struct iavf_adapter, - reset_task); - struct virtchnl_vf_resource *vfres = adapter->vf_res; - struct net_device *netdev = adapter->netdev; - struct i40e_hw *hw = &adapter->hw; - struct iavf_vlan_filter *vlf; - struct iavf_cloud_filter *cf; - struct iavf_mac_filter *f; - u32 reg_val; - int i = 0, err; - bool running; - - /* When device is being removed it doesn't make sense to run the reset - * task, just return in such a case. - */ - if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) - return; - - while (test_and_set_bit(__IAVF_IN_CLIENT_TASK, - &adapter->crit_section)) - usleep_range(500, 1000); - if (CLIENT_ENABLED(adapter)) { - adapter->flags &= ~(IAVF_FLAG_CLIENT_NEEDS_OPEN | - IAVF_FLAG_CLIENT_NEEDS_CLOSE | - IAVF_FLAG_CLIENT_NEEDS_L2_PARAMS | - IAVF_FLAG_SERVICE_CLIENT_REQUESTED); - cancel_delayed_work_sync(&adapter->client_task); - iavf_notify_client_close(&adapter->vsi, true); - } - iavf_misc_irq_disable(adapter); - if (adapter->flags & IAVF_FLAG_RESET_NEEDED) { - adapter->flags &= ~IAVF_FLAG_RESET_NEEDED; - /* Restart the AQ here. If we have been reset but didn't - * detect it, or if the PF had to reinit, our AQ will be hosed. - */ - iavf_shutdown_adminq(hw); - iavf_init_adminq(hw); - iavf_request_reset(adapter); - } - adapter->flags |= IAVF_FLAG_RESET_PENDING; - - /* poll until we see the reset actually happen */ - for (i = 0; i < IAVF_RESET_WAIT_COUNT; i++) { - reg_val = rd32(hw, I40E_VF_ARQLEN1) & - I40E_VF_ARQLEN1_ARQENABLE_MASK; - if (!reg_val) - break; - usleep_range(5000, 10000); - } - if (i == IAVF_RESET_WAIT_COUNT) { - dev_info(&adapter->pdev->dev, "Never saw reset\n"); - goto continue_reset; /* act like the reset happened */ - } - - /* wait until the reset is complete and the PF is responding to us */ - for (i = 0; i < IAVF_RESET_WAIT_COUNT; i++) { - /* sleep first to make sure a minimum wait time is met */ - msleep(IAVF_RESET_WAIT_MS); - - reg_val = rd32(hw, I40E_VFGEN_RSTAT) & - I40E_VFGEN_RSTAT_VFR_STATE_MASK; - if (reg_val == VIRTCHNL_VFR_VFACTIVE) - break; - } - - pci_set_master(adapter->pdev); - - if (i == IAVF_RESET_WAIT_COUNT) { - dev_err(&adapter->pdev->dev, "Reset never finished (%x)\n", - reg_val); - iavf_disable_vf(adapter); - clear_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section); - return; /* Do not attempt to reinit. It's dead, Jim. */ - } - -continue_reset: - /* We don't use netif_running() because it may be true prior to - * ndo_open() returning, so we can't assume it means all our open - * tasks have finished, since we're not holding the rtnl_lock here. - */ - running = ((adapter->state == __IAVF_RUNNING) || - (adapter->state == __IAVF_RESETTING)); - - if (running) { - netif_carrier_off(netdev); - netif_tx_stop_all_queues(netdev); - adapter->link_up = false; - iavf_napi_disable_all(adapter); - } - iavf_irq_disable(adapter); - - adapter->state = __IAVF_RESETTING; - adapter->flags &= ~IAVF_FLAG_RESET_PENDING; - - /* free the Tx/Rx rings and descriptors, might be better to just - * re-use them sometime in the future - */ - iavf_free_all_rx_resources(adapter); - iavf_free_all_tx_resources(adapter); - - adapter->flags |= IAVF_FLAG_QUEUES_DISABLED; - /* kill and reinit the admin queue */ - iavf_shutdown_adminq(hw); - adapter->current_op = VIRTCHNL_OP_UNKNOWN; - err = iavf_init_adminq(hw); - if (err) - dev_info(&adapter->pdev->dev, "Failed to init adminq: %d\n", - err); - adapter->aq_required = 0; - - if (adapter->flags & IAVF_FLAG_REINIT_ITR_NEEDED) { - err = iavf_reinit_interrupt_scheme(adapter); - if (err) - goto reset_err; - } - - adapter->aq_required |= IAVF_FLAG_AQ_GET_CONFIG; - adapter->aq_required |= IAVF_FLAG_AQ_MAP_VECTORS; - - spin_lock_bh(&adapter->mac_vlan_list_lock); - - /* re-add all MAC filters */ - list_for_each_entry(f, &adapter->mac_filter_list, list) { - f->add = true; - } - /* re-add all VLAN filters */ - list_for_each_entry(vlf, &adapter->vlan_filter_list, list) { - vlf->add = true; - } - - spin_unlock_bh(&adapter->mac_vlan_list_lock); - - /* check if TCs are running and re-add all cloud filters */ - spin_lock_bh(&adapter->cloud_filter_list_lock); - if ((vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) && - adapter->num_tc) { - list_for_each_entry(cf, &adapter->cloud_filter_list, list) { - cf->add = true; - } - } - spin_unlock_bh(&adapter->cloud_filter_list_lock); - - adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER; - adapter->aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER; - adapter->aq_required |= IAVF_FLAG_AQ_ADD_CLOUD_FILTER; - iavf_misc_irq_enable(adapter); - - mod_timer(&adapter->watchdog_timer, jiffies + 2); - - /* We were running when the reset started, so we need to restore some - * state here. - */ - if (running) { - /* allocate transmit descriptors */ - err = iavf_setup_all_tx_resources(adapter); - if (err) - goto reset_err; - - /* allocate receive descriptors */ - err = iavf_setup_all_rx_resources(adapter); - if (err) - goto reset_err; - - if (adapter->flags & IAVF_FLAG_REINIT_ITR_NEEDED) { - err = iavf_request_traffic_irqs(adapter, netdev->name); - if (err) - goto reset_err; - - adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; - } - - iavf_configure(adapter); - - iavf_up_complete(adapter); - - iavf_irq_enable(adapter, true); - } else { - adapter->state = __IAVF_DOWN; - wake_up(&adapter->down_waitqueue); - } - clear_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section); - clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); - - return; -reset_err: - clear_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section); - clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); - dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n"); - iavf_close(netdev); -} - -/** - * iavf_adminq_task - worker thread to clean the admin queue - * @work: pointer to work_struct containing our data - **/ -static void iavf_adminq_task(struct work_struct *work) -{ - struct iavf_adapter *adapter = - container_of(work, struct iavf_adapter, adminq_task); - struct i40e_hw *hw = &adapter->hw; - struct i40e_arq_event_info event; - enum virtchnl_ops v_op; - iavf_status ret, v_ret; - u32 val, oldval; - u16 pending; - - if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) - goto out; - - event.buf_len = IAVF_MAX_AQ_BUF_SIZE; - event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL); - if (!event.msg_buf) - goto out; - - do { - ret = iavf_clean_arq_element(hw, &event, &pending); - v_op = (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high); - v_ret = (iavf_status)le32_to_cpu(event.desc.cookie_low); - - if (ret || !v_op) - break; /* No event to process or error cleaning ARQ */ - - iavf_virtchnl_completion(adapter, v_op, v_ret, event.msg_buf, - event.msg_len); - if (pending != 0) - memset(event.msg_buf, 0, IAVF_MAX_AQ_BUF_SIZE); - } while (pending); - - if ((adapter->flags & - (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) || - adapter->state == __IAVF_RESETTING) - goto freedom; - - /* check for error indications */ - val = rd32(hw, hw->aq.arq.len); - if (val == 0xdeadbeef) /* indicates device in reset */ - goto freedom; - oldval = val; - if (val & I40E_VF_ARQLEN1_ARQVFE_MASK) { - dev_info(&adapter->pdev->dev, "ARQ VF Error detected\n"); - val &= ~I40E_VF_ARQLEN1_ARQVFE_MASK; - } - if (val & I40E_VF_ARQLEN1_ARQOVFL_MASK) { - dev_info(&adapter->pdev->dev, "ARQ Overflow Error detected\n"); - val &= ~I40E_VF_ARQLEN1_ARQOVFL_MASK; - } - if (val & I40E_VF_ARQLEN1_ARQCRIT_MASK) { - dev_info(&adapter->pdev->dev, "ARQ Critical Error detected\n"); - val &= ~I40E_VF_ARQLEN1_ARQCRIT_MASK; - } - if (oldval != val) - wr32(hw, hw->aq.arq.len, val); - - val = rd32(hw, hw->aq.asq.len); - oldval = val; - if (val & I40E_VF_ATQLEN1_ATQVFE_MASK) { - dev_info(&adapter->pdev->dev, "ASQ VF Error detected\n"); - val &= ~I40E_VF_ATQLEN1_ATQVFE_MASK; - } - if (val & I40E_VF_ATQLEN1_ATQOVFL_MASK) { - dev_info(&adapter->pdev->dev, "ASQ Overflow Error detected\n"); - val &= ~I40E_VF_ATQLEN1_ATQOVFL_MASK; - } - if (val & I40E_VF_ATQLEN1_ATQCRIT_MASK) { - dev_info(&adapter->pdev->dev, "ASQ Critical Error detected\n"); - val &= ~I40E_VF_ATQLEN1_ATQCRIT_MASK; - } - if (oldval != val) - wr32(hw, hw->aq.asq.len, val); - -freedom: - kfree(event.msg_buf); -out: - /* re-enable Admin queue interrupt cause */ - iavf_misc_irq_enable(adapter); -} - -/** - * iavf_client_task - worker thread to perform client work - * @work: pointer to work_struct containing our data - * - * This task handles client interactions. Because client calls can be - * reentrant, we can't handle them in the watchdog. - **/ -static void iavf_client_task(struct work_struct *work) -{ - struct iavf_adapter *adapter = - container_of(work, struct iavf_adapter, client_task.work); - - /* If we can't get the client bit, just give up. We'll be rescheduled - * later. - */ - - if (test_and_set_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section)) - return; - - if (adapter->flags & IAVF_FLAG_SERVICE_CLIENT_REQUESTED) { - iavf_client_subtask(adapter); - adapter->flags &= ~IAVF_FLAG_SERVICE_CLIENT_REQUESTED; - goto out; - } - if (adapter->flags & IAVF_FLAG_CLIENT_NEEDS_L2_PARAMS) { - iavf_notify_client_l2_params(&adapter->vsi); - adapter->flags &= ~IAVF_FLAG_CLIENT_NEEDS_L2_PARAMS; - goto out; - } - if (adapter->flags & IAVF_FLAG_CLIENT_NEEDS_CLOSE) { - iavf_notify_client_close(&adapter->vsi, false); - adapter->flags &= ~IAVF_FLAG_CLIENT_NEEDS_CLOSE; - goto out; - } - if (adapter->flags & IAVF_FLAG_CLIENT_NEEDS_OPEN) { - iavf_notify_client_open(&adapter->vsi); - adapter->flags &= ~IAVF_FLAG_CLIENT_NEEDS_OPEN; - } -out: - clear_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section); -} - -/** - * iavf_free_all_tx_resources - Free Tx Resources for All Queues - * @adapter: board private structure - * - * Free all transmit software resources - **/ -void iavf_free_all_tx_resources(struct iavf_adapter *adapter) -{ - int i; - - if (!adapter->tx_rings) - return; - - for (i = 0; i < adapter->num_active_queues; i++) - if (adapter->tx_rings[i].desc) - iavf_free_tx_resources(&adapter->tx_rings[i]); -} - -/** - * iavf_setup_all_tx_resources - allocate all queues Tx resources - * @adapter: board private structure - * - * If this function returns with an error, then it's possible one or - * more of the rings is populated (while the rest are not). It is the - * callers duty to clean those orphaned rings. - * - * Return 0 on success, negative on failure - **/ -static int iavf_setup_all_tx_resources(struct iavf_adapter *adapter) -{ - int i, err = 0; - - for (i = 0; i < adapter->num_active_queues; i++) { - adapter->tx_rings[i].count = adapter->tx_desc_count; - err = iavf_setup_tx_descriptors(&adapter->tx_rings[i]); - if (!err) - continue; - dev_err(&adapter->pdev->dev, - "Allocation for Tx Queue %u failed\n", i); - break; - } - - return err; -} - -/** - * iavf_setup_all_rx_resources - allocate all queues Rx resources - * @adapter: board private structure - * - * If this function returns with an error, then it's possible one or - * more of the rings is populated (while the rest are not). It is the - * callers duty to clean those orphaned rings. - * - * Return 0 on success, negative on failure - **/ -static int iavf_setup_all_rx_resources(struct iavf_adapter *adapter) -{ - int i, err = 0; - - for (i = 0; i < adapter->num_active_queues; i++) { - adapter->rx_rings[i].count = adapter->rx_desc_count; - err = iavf_setup_rx_descriptors(&adapter->rx_rings[i]); - if (!err) - continue; - dev_err(&adapter->pdev->dev, - "Allocation for Rx Queue %u failed\n", i); - break; - } - return err; -} - -/** - * iavf_free_all_rx_resources - Free Rx Resources for All Queues - * @adapter: board private structure - * - * Free all receive software resources - **/ -void iavf_free_all_rx_resources(struct iavf_adapter *adapter) -{ - int i; - - if (!adapter->rx_rings) - return; - - for (i = 0; i < adapter->num_active_queues; i++) - if (adapter->rx_rings[i].desc) - iavf_free_rx_resources(&adapter->rx_rings[i]); -} - -/** - * iavf_validate_tx_bandwidth - validate the max Tx bandwidth - * @adapter: board private structure - * @max_tx_rate: max Tx bw for a tc - **/ -static int iavf_validate_tx_bandwidth(struct iavf_adapter *adapter, - u64 max_tx_rate) -{ - int speed = 0, ret = 0; - - switch (adapter->link_speed) { - case I40E_LINK_SPEED_40GB: - speed = 40000; - break; - case I40E_LINK_SPEED_25GB: - speed = 25000; - break; - case I40E_LINK_SPEED_20GB: - speed = 20000; - break; - case I40E_LINK_SPEED_10GB: - speed = 10000; - break; - case I40E_LINK_SPEED_1GB: - speed = 1000; - break; - case I40E_LINK_SPEED_100MB: - speed = 100; - break; - default: - break; - } - - if (max_tx_rate > speed) { - dev_err(&adapter->pdev->dev, - "Invalid tx rate specified\n"); - ret = -EINVAL; - } - - return ret; -} - -/** - * iavf_validate_channel_config - validate queue mapping info - * @adapter: board private structure - * @mqprio_qopt: queue parameters - * - * This function validates if the config provided by the user to - * configure queue channels is valid or not. Returns 0 on a valid - * config. - **/ -static int iavf_validate_ch_config(struct iavf_adapter *adapter, - struct tc_mqprio_qopt_offload *mqprio_qopt) -{ - u64 total_max_rate = 0; - int i, num_qps = 0; - u64 tx_rate = 0; - int ret = 0; - - if (mqprio_qopt->qopt.num_tc > IAVF_MAX_TRAFFIC_CLASS || - mqprio_qopt->qopt.num_tc < 1) - return -EINVAL; - - for (i = 0; i <= mqprio_qopt->qopt.num_tc - 1; i++) { - if (!mqprio_qopt->qopt.count[i] || - mqprio_qopt->qopt.offset[i] != num_qps) - return -EINVAL; - if (mqprio_qopt->min_rate[i]) { - dev_err(&adapter->pdev->dev, - "Invalid min tx rate (greater than 0) specified\n"); - return -EINVAL; - } - /*convert to Mbps */ - tx_rate = div_u64(mqprio_qopt->max_rate[i], - IAVF_MBPS_DIVISOR); - total_max_rate += tx_rate; - num_qps += mqprio_qopt->qopt.count[i]; - } - if (num_qps > IAVF_MAX_REQ_QUEUES) - return -EINVAL; - - ret = iavf_validate_tx_bandwidth(adapter, total_max_rate); - return ret; -} - -/** - * iavf_del_all_cloud_filters - delete all cloud filters - * on the traffic classes - **/ -static void iavf_del_all_cloud_filters(struct iavf_adapter *adapter) -{ - struct iavf_cloud_filter *cf, *cftmp; - - spin_lock_bh(&adapter->cloud_filter_list_lock); - list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, - list) { - list_del(&cf->list); - kfree(cf); - adapter->num_cloud_filters--; - } - spin_unlock_bh(&adapter->cloud_filter_list_lock); -} - -/** - * __iavf_setup_tc - configure multiple traffic classes - * @netdev: network interface device structure - * @type_date: tc offload data - * - * This function processes the config information provided by the - * user to configure traffic classes/queue channels and packages the - * information to request the PF to setup traffic classes. - * - * Returns 0 on success. - **/ -static int __iavf_setup_tc(struct net_device *netdev, void *type_data) -{ - struct tc_mqprio_qopt_offload *mqprio_qopt = type_data; - struct iavf_adapter *adapter = netdev_priv(netdev); - struct virtchnl_vf_resource *vfres = adapter->vf_res; - u8 num_tc = 0, total_qps = 0; - int ret = 0, netdev_tc = 0; - u64 max_tx_rate; - u16 mode; - int i; - - num_tc = mqprio_qopt->qopt.num_tc; - mode = mqprio_qopt->mode; - - /* delete queue_channel */ - if (!mqprio_qopt->qopt.hw) { - if (adapter->ch_config.state == __IAVF_TC_RUNNING) { - /* reset the tc configuration */ - netdev_reset_tc(netdev); - adapter->num_tc = 0; - netif_tx_stop_all_queues(netdev); - netif_tx_disable(netdev); - iavf_del_all_cloud_filters(adapter); - adapter->aq_required = IAVF_FLAG_AQ_DISABLE_CHANNELS; - goto exit; - } else { - return -EINVAL; - } - } - - /* add queue channel */ - if (mode == TC_MQPRIO_MODE_CHANNEL) { - if (!(vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ)) { - dev_err(&adapter->pdev->dev, "ADq not supported\n"); - return -EOPNOTSUPP; - } - if (adapter->ch_config.state != __IAVF_TC_INVALID) { - dev_err(&adapter->pdev->dev, "TC configuration already exists\n"); - return -EINVAL; - } - - ret = iavf_validate_ch_config(adapter, mqprio_qopt); - if (ret) - return ret; - /* Return if same TC config is requested */ - if (adapter->num_tc == num_tc) - return 0; - adapter->num_tc = num_tc; - - for (i = 0; i < IAVF_MAX_TRAFFIC_CLASS; i++) { - if (i < num_tc) { - adapter->ch_config.ch_info[i].count = - mqprio_qopt->qopt.count[i]; - adapter->ch_config.ch_info[i].offset = - mqprio_qopt->qopt.offset[i]; - total_qps += mqprio_qopt->qopt.count[i]; - max_tx_rate = mqprio_qopt->max_rate[i]; - /* convert to Mbps */ - max_tx_rate = div_u64(max_tx_rate, - IAVF_MBPS_DIVISOR); - adapter->ch_config.ch_info[i].max_tx_rate = - max_tx_rate; - } else { - adapter->ch_config.ch_info[i].count = 1; - adapter->ch_config.ch_info[i].offset = 0; - } - } - adapter->ch_config.total_qps = total_qps; - netif_tx_stop_all_queues(netdev); - netif_tx_disable(netdev); - adapter->aq_required |= IAVF_FLAG_AQ_ENABLE_CHANNELS; - netdev_reset_tc(netdev); - /* Report the tc mapping up the stack */ - netdev_set_num_tc(adapter->netdev, num_tc); - for (i = 0; i < IAVF_MAX_TRAFFIC_CLASS; i++) { - u16 qcount = mqprio_qopt->qopt.count[i]; - u16 qoffset = mqprio_qopt->qopt.offset[i]; - - if (i < num_tc) - netdev_set_tc_queue(netdev, netdev_tc++, qcount, - qoffset); - } - } -exit: - return ret; -} - -/** - * iavf_parse_cls_flower - Parse tc flower filters provided by kernel - * @adapter: board private structure - * @cls_flower: pointer to struct tc_cls_flower_offload - * @filter: pointer to cloud filter structure - */ -static int iavf_parse_cls_flower(struct iavf_adapter *adapter, - struct tc_cls_flower_offload *f, - struct iavf_cloud_filter *filter) -{ - u16 n_proto_mask = 0; - u16 n_proto_key = 0; - u8 field_flags = 0; - u16 addr_type = 0; - u16 n_proto = 0; - int i = 0; - struct virtchnl_filter *vf = &filter->f; - - if (f->dissector->used_keys & - ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | - BIT(FLOW_DISSECTOR_KEY_BASIC) | - BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | - BIT(FLOW_DISSECTOR_KEY_VLAN) | - BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | - BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | - BIT(FLOW_DISSECTOR_KEY_PORTS) | - BIT(FLOW_DISSECTOR_KEY_ENC_KEYID))) { - dev_err(&adapter->pdev->dev, "Unsupported key used: 0x%x\n", - f->dissector->used_keys); - return -EOPNOTSUPP; - } - - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { - struct flow_dissector_key_keyid *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_KEYID, - f->mask); - - if (mask->keyid != 0) - field_flags |= IAVF_CLOUD_FIELD_TEN_ID; - } - - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { - struct flow_dissector_key_basic *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->key); - - struct flow_dissector_key_basic *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->mask); - n_proto_key = ntohs(key->n_proto); - n_proto_mask = ntohs(mask->n_proto); - - if (n_proto_key == ETH_P_ALL) { - n_proto_key = 0; - n_proto_mask = 0; - } - n_proto = n_proto_key & n_proto_mask; - if (n_proto != ETH_P_IP && n_proto != ETH_P_IPV6) - return -EINVAL; - if (n_proto == ETH_P_IPV6) { - /* specify flow type as TCP IPv6 */ - vf->flow_type = VIRTCHNL_TCP_V6_FLOW; - } - - if (key->ip_proto != IPPROTO_TCP) { - dev_info(&adapter->pdev->dev, "Only TCP transport is supported\n"); - return -EINVAL; - } - } - - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { - struct flow_dissector_key_eth_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ETH_ADDRS, - f->key); - - struct flow_dissector_key_eth_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ETH_ADDRS, - f->mask); - /* use is_broadcast and is_zero to check for all 0xf or 0 */ - if (!is_zero_ether_addr(mask->dst)) { - if (is_broadcast_ether_addr(mask->dst)) { - field_flags |= IAVF_CLOUD_FIELD_OMAC; - } else { - dev_err(&adapter->pdev->dev, "Bad ether dest mask %pM\n", - mask->dst); - return I40E_ERR_CONFIG; - } - } - - if (!is_zero_ether_addr(mask->src)) { - if (is_broadcast_ether_addr(mask->src)) { - field_flags |= IAVF_CLOUD_FIELD_IMAC; - } else { - dev_err(&adapter->pdev->dev, "Bad ether src mask %pM\n", - mask->src); - return I40E_ERR_CONFIG; - } - } - - if (!is_zero_ether_addr(key->dst)) - if (is_valid_ether_addr(key->dst) || - is_multicast_ether_addr(key->dst)) { - /* set the mask if a valid dst_mac address */ - for (i = 0; i < ETH_ALEN; i++) - vf->mask.tcp_spec.dst_mac[i] |= 0xff; - ether_addr_copy(vf->data.tcp_spec.dst_mac, - key->dst); - } - - if (!is_zero_ether_addr(key->src)) - if (is_valid_ether_addr(key->src) || - is_multicast_ether_addr(key->src)) { - /* set the mask if a valid dst_mac address */ - for (i = 0; i < ETH_ALEN; i++) - vf->mask.tcp_spec.src_mac[i] |= 0xff; - ether_addr_copy(vf->data.tcp_spec.src_mac, - key->src); - } - } - - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) { - struct flow_dissector_key_vlan *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_VLAN, - f->key); - struct flow_dissector_key_vlan *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_VLAN, - f->mask); - - if (mask->vlan_id) { - if (mask->vlan_id == VLAN_VID_MASK) { - field_flags |= IAVF_CLOUD_FIELD_IVLAN; - } else { - dev_err(&adapter->pdev->dev, "Bad vlan mask %u\n", - mask->vlan_id); - return I40E_ERR_CONFIG; - } - } - vf->mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff); - vf->data.tcp_spec.vlan_id = cpu_to_be16(key->vlan_id); - } - - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { - struct flow_dissector_key_control *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_CONTROL, - f->key); - - addr_type = key->addr_type; - } - - if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { - struct flow_dissector_key_ipv4_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV4_ADDRS, - f->key); - struct flow_dissector_key_ipv4_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV4_ADDRS, - f->mask); - - if (mask->dst) { - if (mask->dst == cpu_to_be32(0xffffffff)) { - field_flags |= IAVF_CLOUD_FIELD_IIP; - } else { - dev_err(&adapter->pdev->dev, "Bad ip dst mask 0x%08x\n", - be32_to_cpu(mask->dst)); - return I40E_ERR_CONFIG; - } - } - - if (mask->src) { - if (mask->src == cpu_to_be32(0xffffffff)) { - field_flags |= IAVF_CLOUD_FIELD_IIP; - } else { - dev_err(&adapter->pdev->dev, "Bad ip src mask 0x%08x\n", - be32_to_cpu(mask->dst)); - return I40E_ERR_CONFIG; - } - } - - if (field_flags & IAVF_CLOUD_FIELD_TEN_ID) { - dev_info(&adapter->pdev->dev, "Tenant id not allowed for ip filter\n"); - return I40E_ERR_CONFIG; - } - if (key->dst) { - vf->mask.tcp_spec.dst_ip[0] |= cpu_to_be32(0xffffffff); - vf->data.tcp_spec.dst_ip[0] = key->dst; - } - if (key->src) { - vf->mask.tcp_spec.src_ip[0] |= cpu_to_be32(0xffffffff); - vf->data.tcp_spec.src_ip[0] = key->src; - } - } - - if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { - struct flow_dissector_key_ipv6_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV6_ADDRS, - f->key); - struct flow_dissector_key_ipv6_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV6_ADDRS, - f->mask); - - /* validate mask, make sure it is not IPV6_ADDR_ANY */ - if (ipv6_addr_any(&mask->dst)) { - dev_err(&adapter->pdev->dev, "Bad ipv6 dst mask 0x%02x\n", - IPV6_ADDR_ANY); - return I40E_ERR_CONFIG; - } - - /* src and dest IPv6 address should not be LOOPBACK - * (0:0:0:0:0:0:0:1) which can be represented as ::1 - */ - if (ipv6_addr_loopback(&key->dst) || - ipv6_addr_loopback(&key->src)) { - dev_err(&adapter->pdev->dev, - "ipv6 addr should not be loopback\n"); - return I40E_ERR_CONFIG; - } - if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src)) - field_flags |= IAVF_CLOUD_FIELD_IIP; - - for (i = 0; i < 4; i++) - vf->mask.tcp_spec.dst_ip[i] |= cpu_to_be32(0xffffffff); - memcpy(&vf->data.tcp_spec.dst_ip, &key->dst.s6_addr32, - sizeof(vf->data.tcp_spec.dst_ip)); - for (i = 0; i < 4; i++) - vf->mask.tcp_spec.src_ip[i] |= cpu_to_be32(0xffffffff); - memcpy(&vf->data.tcp_spec.src_ip, &key->src.s6_addr32, - sizeof(vf->data.tcp_spec.src_ip)); - } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) { - struct flow_dissector_key_ports *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_PORTS, - f->key); - struct flow_dissector_key_ports *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_PORTS, - f->mask); - - if (mask->src) { - if (mask->src == cpu_to_be16(0xffff)) { - field_flags |= IAVF_CLOUD_FIELD_IIP; - } else { - dev_err(&adapter->pdev->dev, "Bad src port mask %u\n", - be16_to_cpu(mask->src)); - return I40E_ERR_CONFIG; - } - } - - if (mask->dst) { - if (mask->dst == cpu_to_be16(0xffff)) { - field_flags |= IAVF_CLOUD_FIELD_IIP; - } else { - dev_err(&adapter->pdev->dev, "Bad dst port mask %u\n", - be16_to_cpu(mask->dst)); - return I40E_ERR_CONFIG; - } - } - if (key->dst) { - vf->mask.tcp_spec.dst_port |= cpu_to_be16(0xffff); - vf->data.tcp_spec.dst_port = key->dst; - } - - if (key->src) { - vf->mask.tcp_spec.src_port |= cpu_to_be16(0xffff); - vf->data.tcp_spec.src_port = key->src; - } - } - vf->field_flags = field_flags; - - return 0; -} - -/** - * iavf_handle_tclass - Forward to a traffic class on the device - * @adapter: board private structure - * @tc: traffic class index on the device - * @filter: pointer to cloud filter structure - */ -static int iavf_handle_tclass(struct iavf_adapter *adapter, u32 tc, - struct iavf_cloud_filter *filter) -{ - if (tc == 0) - return 0; - if (tc < adapter->num_tc) { - if (!filter->f.data.tcp_spec.dst_port) { - dev_err(&adapter->pdev->dev, - "Specify destination port to redirect to traffic class other than TC0\n"); - return -EINVAL; - } - } - /* redirect to a traffic class on the same device */ - filter->f.action = VIRTCHNL_ACTION_TC_REDIRECT; - filter->f.action_meta = tc; - return 0; -} - -/** - * iavf_configure_clsflower - Add tc flower filters - * @adapter: board private structure - * @cls_flower: Pointer to struct tc_cls_flower_offload - */ -static int iavf_configure_clsflower(struct iavf_adapter *adapter, - struct tc_cls_flower_offload *cls_flower) -{ - int tc = tc_classid_to_hwtc(adapter->netdev, cls_flower->classid); - struct iavf_cloud_filter *filter = NULL; - int err = -EINVAL, count = 50; - - if (tc < 0) { - dev_err(&adapter->pdev->dev, "Invalid traffic class\n"); - return -EINVAL; - } - - filter = kzalloc(sizeof(*filter), GFP_KERNEL); - if (!filter) - return -ENOMEM; - - while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK, - &adapter->crit_section)) { - if (--count == 0) - goto err; - udelay(1); - } - - filter->cookie = cls_flower->cookie; - - /* set the mask to all zeroes to begin with */ - memset(&filter->f.mask.tcp_spec, 0, sizeof(struct virtchnl_l4_spec)); - /* start out with flow type and eth type IPv4 to begin with */ - filter->f.flow_type = VIRTCHNL_TCP_V4_FLOW; - err = iavf_parse_cls_flower(adapter, cls_flower, filter); - if (err < 0) - goto err; - - err = iavf_handle_tclass(adapter, tc, filter); - if (err < 0) - goto err; - - /* add filter to the list */ - spin_lock_bh(&adapter->cloud_filter_list_lock); - list_add_tail(&filter->list, &adapter->cloud_filter_list); - adapter->num_cloud_filters++; - filter->add = true; - adapter->aq_required |= IAVF_FLAG_AQ_ADD_CLOUD_FILTER; - spin_unlock_bh(&adapter->cloud_filter_list_lock); -err: - if (err) - kfree(filter); - - clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); - return err; -} - -/* iavf_find_cf - Find the cloud filter in the list - * @adapter: Board private structure - * @cookie: filter specific cookie - * - * Returns ptr to the filter object or NULL. Must be called while holding the - * cloud_filter_list_lock. - */ -static struct iavf_cloud_filter *iavf_find_cf(struct iavf_adapter *adapter, - unsigned long *cookie) -{ - struct iavf_cloud_filter *filter = NULL; - - if (!cookie) - return NULL; - - list_for_each_entry(filter, &adapter->cloud_filter_list, list) { - if (!memcmp(cookie, &filter->cookie, sizeof(filter->cookie))) - return filter; - } - return NULL; -} - -/** - * iavf_delete_clsflower - Remove tc flower filters - * @adapter: board private structure - * @cls_flower: Pointer to struct tc_cls_flower_offload - */ -static int iavf_delete_clsflower(struct iavf_adapter *adapter, - struct tc_cls_flower_offload *cls_flower) -{ - struct iavf_cloud_filter *filter = NULL; - int err = 0; - - spin_lock_bh(&adapter->cloud_filter_list_lock); - filter = iavf_find_cf(adapter, &cls_flower->cookie); - if (filter) { - filter->del = true; - adapter->aq_required |= IAVF_FLAG_AQ_DEL_CLOUD_FILTER; - } else { - err = -EINVAL; - } - spin_unlock_bh(&adapter->cloud_filter_list_lock); - - return err; -} - -/** - * iavf_setup_tc_cls_flower - flower classifier offloads - * @netdev: net device to configure - * @type_data: offload data - */ -static int iavf_setup_tc_cls_flower(struct iavf_adapter *adapter, - struct tc_cls_flower_offload *cls_flower) -{ - if (cls_flower->common.chain_index) - return -EOPNOTSUPP; - - switch (cls_flower->command) { - case TC_CLSFLOWER_REPLACE: - return iavf_configure_clsflower(adapter, cls_flower); - case TC_CLSFLOWER_DESTROY: - return iavf_delete_clsflower(adapter, cls_flower); - case TC_CLSFLOWER_STATS: - return -EOPNOTSUPP; - default: - return -EOPNOTSUPP; - } -} - -/** - * iavf_setup_tc_block_cb - block callback for tc - * @type: type of offload - * @type_data: offload data - * @cb_priv: - * - * This function is the block callback for traffic classes - **/ -static int iavf_setup_tc_block_cb(enum tc_setup_type type, void *type_data, - void *cb_priv) -{ - switch (type) { - case TC_SETUP_CLSFLOWER: - return iavf_setup_tc_cls_flower(cb_priv, type_data); - default: - return -EOPNOTSUPP; - } -} - -/** - * iavf_setup_tc_block - register callbacks for tc - * @netdev: network interface device structure - * @f: tc offload data - * - * This function registers block callbacks for tc - * offloads - **/ -static int iavf_setup_tc_block(struct net_device *dev, - struct tc_block_offload *f) -{ - struct iavf_adapter *adapter = netdev_priv(dev); - - if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) - return -EOPNOTSUPP; - - switch (f->command) { - case TC_BLOCK_BIND: - return tcf_block_cb_register(f->block, iavf_setup_tc_block_cb, - adapter, adapter, f->extack); - case TC_BLOCK_UNBIND: - tcf_block_cb_unregister(f->block, iavf_setup_tc_block_cb, - adapter); - return 0; - default: - return -EOPNOTSUPP; - } -} - -/** - * iavf_setup_tc - configure multiple traffic classes - * @netdev: network interface device structure - * @type: type of offload - * @type_date: tc offload data - * - * This function is the callback to ndo_setup_tc in the - * netdev_ops. - * - * Returns 0 on success - **/ -static int iavf_setup_tc(struct net_device *netdev, enum tc_setup_type type, - void *type_data) -{ - switch (type) { - case TC_SETUP_QDISC_MQPRIO: - return __iavf_setup_tc(netdev, type_data); - case TC_SETUP_BLOCK: - return iavf_setup_tc_block(netdev, type_data); - default: - return -EOPNOTSUPP; - } -} - -/** - * iavf_open - Called when a network interface is made active - * @netdev: network interface device structure - * - * Returns 0 on success, negative value on failure - * - * The open entry point is called when a network interface is made - * active by the system (IFF_UP). At this point all resources needed - * for transmit and receive operations are allocated, the interrupt - * handler is registered with the OS, the watchdog timer is started, - * and the stack is notified that the interface is ready. - **/ -static int iavf_open(struct net_device *netdev) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - int err; - - if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) { - dev_err(&adapter->pdev->dev, "Unable to open device due to PF driver failure.\n"); - return -EIO; - } - - while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK, - &adapter->crit_section)) - usleep_range(500, 1000); - - if (adapter->state != __IAVF_DOWN) { - err = -EBUSY; - goto err_unlock; - } - - /* allocate transmit descriptors */ - err = iavf_setup_all_tx_resources(adapter); - if (err) - goto err_setup_tx; - - /* allocate receive descriptors */ - err = iavf_setup_all_rx_resources(adapter); - if (err) - goto err_setup_rx; - - /* clear any pending interrupts, may auto mask */ - err = iavf_request_traffic_irqs(adapter, netdev->name); - if (err) - goto err_req_irq; - - spin_lock_bh(&adapter->mac_vlan_list_lock); - - iavf_add_filter(adapter, adapter->hw.mac.addr); - - spin_unlock_bh(&adapter->mac_vlan_list_lock); - - iavf_configure(adapter); - - iavf_up_complete(adapter); - - iavf_irq_enable(adapter, true); - - clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); - - return 0; - -err_req_irq: - iavf_down(adapter); - iavf_free_traffic_irqs(adapter); -err_setup_rx: - iavf_free_all_rx_resources(adapter); -err_setup_tx: - iavf_free_all_tx_resources(adapter); -err_unlock: - clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); - - return err; -} - -/** - * iavf_close - Disables a network interface - * @netdev: network interface device structure - * - * Returns 0, this is not allowed to fail - * - * The close entry point is called when an interface is de-activated - * by the OS. The hardware is still under the drivers control, but - * needs to be disabled. All IRQs except vector 0 (reserved for admin queue) - * are freed, along with all transmit and receive resources. - **/ -static int iavf_close(struct net_device *netdev) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - int status; - - if (adapter->state <= __IAVF_DOWN_PENDING) - return 0; - - while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK, - &adapter->crit_section)) - usleep_range(500, 1000); - - set_bit(__I40E_VSI_DOWN, adapter->vsi.state); - if (CLIENT_ENABLED(adapter)) - adapter->flags |= IAVF_FLAG_CLIENT_NEEDS_CLOSE; - - iavf_down(adapter); - adapter->state = __IAVF_DOWN_PENDING; - iavf_free_traffic_irqs(adapter); - - clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); - - /* We explicitly don't free resources here because the hardware is - * still active and can DMA into memory. Resources are cleared in - * iavf_virtchnl_completion() after we get confirmation from the PF - * driver that the rings have been stopped. - * - * Also, we wait for state to transition to __IAVF_DOWN before - * returning. State change occurs in iavf_virtchnl_completion() after - * VF resources are released (which occurs after PF driver processes and - * responds to admin queue commands). - */ - - status = wait_event_timeout(adapter->down_waitqueue, - adapter->state == __IAVF_DOWN, - msecs_to_jiffies(200)); - if (!status) - netdev_warn(netdev, "Device resources not yet released\n"); - return 0; -} - -/** - * iavf_change_mtu - Change the Maximum Transfer Unit - * @netdev: network interface device structure - * @new_mtu: new value for maximum frame size - * - * Returns 0 on success, negative on failure - **/ -static int iavf_change_mtu(struct net_device *netdev, int new_mtu) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - - netdev->mtu = new_mtu; - if (CLIENT_ENABLED(adapter)) { - iavf_notify_client_l2_params(&adapter->vsi); - adapter->flags |= IAVF_FLAG_SERVICE_CLIENT_REQUESTED; - } - adapter->flags |= IAVF_FLAG_RESET_NEEDED; - schedule_work(&adapter->reset_task); - - return 0; -} - -/** - * i40e_set_features - set the netdev feature flags - * @netdev: ptr to the netdev being adjusted - * @features: the feature set that the stack is suggesting - * Note: expects to be called while under rtnl_lock() - **/ -static int iavf_set_features(struct net_device *netdev, - netdev_features_t features) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - - /* Don't allow changing VLAN_RX flag when adapter is not capable - * of VLAN offload - */ - if (!VLAN_ALLOWED(adapter)) { - if ((netdev->features ^ features) & NETIF_F_HW_VLAN_CTAG_RX) - return -EINVAL; - } else if ((netdev->features ^ features) & NETIF_F_HW_VLAN_CTAG_RX) { - if (features & NETIF_F_HW_VLAN_CTAG_RX) - adapter->aq_required |= - IAVF_FLAG_AQ_ENABLE_VLAN_STRIPPING; - else - adapter->aq_required |= - IAVF_FLAG_AQ_DISABLE_VLAN_STRIPPING; - } - - return 0; -} - -/** - * iavf_features_check - Validate encapsulated packet conforms to limits - * @skb: skb buff - * @dev: This physical port's netdev - * @features: Offload features that the stack believes apply - **/ -static netdev_features_t iavf_features_check(struct sk_buff *skb, - struct net_device *dev, - netdev_features_t features) -{ - size_t len; - - /* No point in doing any of this if neither checksum nor GSO are - * being requested for this frame. We can rule out both by just - * checking for CHECKSUM_PARTIAL - */ - if (skb->ip_summed != CHECKSUM_PARTIAL) - return features; - - /* We cannot support GSO if the MSS is going to be less than - * 64 bytes. If it is then we need to drop support for GSO. - */ - if (skb_is_gso(skb) && (skb_shinfo(skb)->gso_size < 64)) - features &= ~NETIF_F_GSO_MASK; - - /* MACLEN can support at most 63 words */ - len = skb_network_header(skb) - skb->data; - if (len & ~(63 * 2)) - goto out_err; - - /* IPLEN and EIPLEN can support at most 127 dwords */ - len = skb_transport_header(skb) - skb_network_header(skb); - if (len & ~(127 * 4)) - goto out_err; - - if (skb->encapsulation) { - /* L4TUNLEN can support 127 words */ - len = skb_inner_network_header(skb) - skb_transport_header(skb); - if (len & ~(127 * 2)) - goto out_err; - - /* IPLEN can support at most 127 dwords */ - len = skb_inner_transport_header(skb) - - skb_inner_network_header(skb); - if (len & ~(127 * 4)) - goto out_err; - } - - /* No need to validate L4LEN as TCP is the only protocol with a - * a flexible value and we support all possible values supported - * by TCP, which is at most 15 dwords - */ - - return features; -out_err: - return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); -} - -/** - * iavf_fix_features - fix up the netdev feature bits - * @netdev: our net device - * @features: desired feature bits - * - * Returns fixed-up features bits - **/ -static netdev_features_t iavf_fix_features(struct net_device *netdev, - netdev_features_t features) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - - if (!(adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)) - features &= ~(NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_CTAG_RX | - NETIF_F_HW_VLAN_CTAG_FILTER); - - return features; -} - -static const struct net_device_ops iavf_netdev_ops = { - .ndo_open = iavf_open, - .ndo_stop = iavf_close, - .ndo_start_xmit = iavf_xmit_frame, - .ndo_set_rx_mode = iavf_set_rx_mode, - .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = iavf_set_mac, - .ndo_change_mtu = iavf_change_mtu, - .ndo_tx_timeout = iavf_tx_timeout, - .ndo_vlan_rx_add_vid = iavf_vlan_rx_add_vid, - .ndo_vlan_rx_kill_vid = iavf_vlan_rx_kill_vid, - .ndo_features_check = iavf_features_check, - .ndo_fix_features = iavf_fix_features, - .ndo_set_features = iavf_set_features, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = iavf_netpoll, -#endif - .ndo_setup_tc = iavf_setup_tc, -}; - -/** - * iavf_check_reset_complete - check that VF reset is complete - * @hw: pointer to hw struct - * - * Returns 0 if device is ready to use, or -EBUSY if it's in reset. - **/ -static int iavf_check_reset_complete(struct i40e_hw *hw) -{ - u32 rstat; - int i; - - for (i = 0; i < 100; i++) { - rstat = rd32(hw, I40E_VFGEN_RSTAT) & - I40E_VFGEN_RSTAT_VFR_STATE_MASK; - if ((rstat == VIRTCHNL_VFR_VFACTIVE) || - (rstat == VIRTCHNL_VFR_COMPLETED)) - return 0; - usleep_range(10, 20); - } - return -EBUSY; -} - -/** - * iavf_process_config - Process the config information we got from the PF - * @adapter: board private structure - * - * Verify that we have a valid config struct, and set up our netdev features - * and our VSI struct. - **/ -int iavf_process_config(struct iavf_adapter *adapter) -{ - struct virtchnl_vf_resource *vfres = adapter->vf_res; - int i, num_req_queues = adapter->num_req_queues; - struct net_device *netdev = adapter->netdev; - struct i40e_vsi *vsi = &adapter->vsi; - netdev_features_t hw_enc_features; - netdev_features_t hw_features; - - /* got VF config message back from PF, now we can parse it */ - for (i = 0; i < vfres->num_vsis; i++) { - if (vfres->vsi_res[i].vsi_type == VIRTCHNL_VSI_SRIOV) - adapter->vsi_res = &vfres->vsi_res[i]; - } - if (!adapter->vsi_res) { - dev_err(&adapter->pdev->dev, "No LAN VSI found\n"); - return -ENODEV; - } - - if (num_req_queues && - num_req_queues != adapter->vsi_res->num_queue_pairs) { - /* Problem. The PF gave us fewer queues than what we had - * negotiated in our request. Need a reset to see if we can't - * get back to a working state. - */ - dev_err(&adapter->pdev->dev, - "Requested %d queues, but PF only gave us %d.\n", - num_req_queues, - adapter->vsi_res->num_queue_pairs); - adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED; - adapter->num_req_queues = adapter->vsi_res->num_queue_pairs; - iavf_schedule_reset(adapter); - return -ENODEV; - } - adapter->num_req_queues = 0; - - hw_enc_features = NETIF_F_SG | - NETIF_F_IP_CSUM | - NETIF_F_IPV6_CSUM | - NETIF_F_HIGHDMA | - NETIF_F_SOFT_FEATURES | - NETIF_F_TSO | - NETIF_F_TSO_ECN | - NETIF_F_TSO6 | - NETIF_F_SCTP_CRC | - NETIF_F_RXHASH | - NETIF_F_RXCSUM | - 0; - - /* advertise to stack only if offloads for encapsulated packets is - * supported - */ - if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ENCAP) { - hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_GRE | - NETIF_F_GSO_GRE_CSUM | - NETIF_F_GSO_IPXIP4 | - NETIF_F_GSO_IPXIP6 | - NETIF_F_GSO_UDP_TUNNEL_CSUM | - NETIF_F_GSO_PARTIAL | - 0; - - if (!(vfres->vf_cap_flags & - VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM)) - netdev->gso_partial_features |= - NETIF_F_GSO_UDP_TUNNEL_CSUM; - - netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM; - netdev->hw_enc_features |= NETIF_F_TSO_MANGLEID; - netdev->hw_enc_features |= hw_enc_features; - } - /* record features VLANs can make use of */ - netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID; - - /* Write features and hw_features separately to avoid polluting - * with, or dropping, features that are set when we registered. - */ - hw_features = hw_enc_features; - - /* Enable VLAN features if supported */ - if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN) - hw_features |= (NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_CTAG_RX); - /* Enable cloud filter if ADQ is supported */ - if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) - hw_features |= NETIF_F_HW_TC; - - netdev->hw_features |= hw_features; - - netdev->features |= hw_features; - - if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN) - netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; - - netdev->priv_flags |= IFF_UNICAST_FLT; - - /* Do not turn on offloads when they are requested to be turned off. - * TSO needs minimum 576 bytes to work correctly. - */ - if (netdev->wanted_features) { - if (!(netdev->wanted_features & NETIF_F_TSO) || - netdev->mtu < 576) - netdev->features &= ~NETIF_F_TSO; - if (!(netdev->wanted_features & NETIF_F_TSO6) || - netdev->mtu < 576) - netdev->features &= ~NETIF_F_TSO6; - if (!(netdev->wanted_features & NETIF_F_TSO_ECN)) - netdev->features &= ~NETIF_F_TSO_ECN; - if (!(netdev->wanted_features & NETIF_F_GRO)) - netdev->features &= ~NETIF_F_GRO; - if (!(netdev->wanted_features & NETIF_F_GSO)) - netdev->features &= ~NETIF_F_GSO; - } - - adapter->vsi.id = adapter->vsi_res->vsi_id; - - adapter->vsi.back = adapter; - adapter->vsi.base_vector = 1; - adapter->vsi.work_limit = I40E_DEFAULT_IRQ_WORK; - vsi->netdev = adapter->netdev; - vsi->qs_handle = adapter->vsi_res->qset_handle; - if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF) { - adapter->rss_key_size = vfres->rss_key_size; - adapter->rss_lut_size = vfres->rss_lut_size; - } else { - adapter->rss_key_size = IAVF_HKEY_ARRAY_SIZE; - adapter->rss_lut_size = IAVF_HLUT_ARRAY_SIZE; - } - - return 0; -} - -/** - * iavf_init_task - worker thread to perform delayed initialization - * @work: pointer to work_struct containing our data - * - * This task completes the work that was begun in probe. Due to the nature - * of VF-PF communications, we may need to wait tens of milliseconds to get - * responses back from the PF. Rather than busy-wait in probe and bog down the - * whole system, we'll do it in a task so we can sleep. - * This task only runs during driver init. Once we've established - * communications with the PF driver and set up our netdev, the watchdog - * takes over. - **/ -static void iavf_init_task(struct work_struct *work) -{ - struct iavf_adapter *adapter = container_of(work, - struct iavf_adapter, - init_task.work); - struct net_device *netdev = adapter->netdev; - struct i40e_hw *hw = &adapter->hw; - struct pci_dev *pdev = adapter->pdev; - int err, bufsz; - - switch (adapter->state) { - case __IAVF_STARTUP: - /* driver loaded, probe complete */ - adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED; - adapter->flags &= ~IAVF_FLAG_RESET_PENDING; - err = i40e_set_mac_type(hw); - if (err) { - dev_err(&pdev->dev, "Failed to set MAC type (%d)\n", - err); - goto err; - } - err = iavf_check_reset_complete(hw); - if (err) { - dev_info(&pdev->dev, "Device is still in reset (%d), retrying\n", - err); - goto err; - } - hw->aq.num_arq_entries = IAVF_AQ_LEN; - hw->aq.num_asq_entries = IAVF_AQ_LEN; - hw->aq.arq_buf_size = IAVF_MAX_AQ_BUF_SIZE; - hw->aq.asq_buf_size = IAVF_MAX_AQ_BUF_SIZE; - - err = iavf_init_adminq(hw); - if (err) { - dev_err(&pdev->dev, "Failed to init Admin Queue (%d)\n", - err); - goto err; - } - err = iavf_send_api_ver(adapter); - if (err) { - dev_err(&pdev->dev, "Unable to send to PF (%d)\n", err); - iavf_shutdown_adminq(hw); - goto err; - } - adapter->state = __IAVF_INIT_VERSION_CHECK; - goto restart; - case __IAVF_INIT_VERSION_CHECK: - if (!iavf_asq_done(hw)) { - dev_err(&pdev->dev, "Admin queue command never completed\n"); - iavf_shutdown_adminq(hw); - adapter->state = __IAVF_STARTUP; - goto err; - } - - /* aq msg sent, awaiting reply */ - err = iavf_verify_api_ver(adapter); - if (err) { - if (err == I40E_ERR_ADMIN_QUEUE_NO_WORK) - err = iavf_send_api_ver(adapter); - else - dev_err(&pdev->dev, "Unsupported PF API version %d.%d, expected %d.%d\n", - adapter->pf_version.major, - adapter->pf_version.minor, - VIRTCHNL_VERSION_MAJOR, - VIRTCHNL_VERSION_MINOR); - goto err; - } - err = iavf_send_vf_config_msg(adapter); - if (err) { - dev_err(&pdev->dev, "Unable to send config request (%d)\n", - err); - goto err; - } - adapter->state = __IAVF_INIT_GET_RESOURCES; - goto restart; - case __IAVF_INIT_GET_RESOURCES: - /* aq msg sent, awaiting reply */ - if (!adapter->vf_res) { - bufsz = sizeof(struct virtchnl_vf_resource) + - (I40E_MAX_VF_VSI * - sizeof(struct virtchnl_vsi_resource)); - adapter->vf_res = kzalloc(bufsz, GFP_KERNEL); - if (!adapter->vf_res) - goto err; - } - err = iavf_get_vf_config(adapter); - if (err == I40E_ERR_ADMIN_QUEUE_NO_WORK) { - err = iavf_send_vf_config_msg(adapter); - goto err; - } else if (err == I40E_ERR_PARAM) { - /* We only get ERR_PARAM if the device is in a very bad - * state or if we've been disabled for previous bad - * behavior. Either way, we're done now. - */ - iavf_shutdown_adminq(hw); - dev_err(&pdev->dev, "Unable to get VF config due to PF error condition, not retrying\n"); - return; - } - if (err) { - dev_err(&pdev->dev, "Unable to get VF config (%d)\n", - err); - goto err_alloc; - } - adapter->state = __IAVF_INIT_SW; - break; - default: - goto err_alloc; - } - - if (iavf_process_config(adapter)) - goto err_alloc; - adapter->current_op = VIRTCHNL_OP_UNKNOWN; - - adapter->flags |= IAVF_FLAG_RX_CSUM_ENABLED; - - netdev->netdev_ops = &iavf_netdev_ops; - iavf_set_ethtool_ops(netdev); - netdev->watchdog_timeo = 5 * HZ; - - /* MTU range: 68 - 9710 */ - netdev->min_mtu = ETH_MIN_MTU; - netdev->max_mtu = I40E_MAX_RXBUFFER - I40E_PACKET_HDR_PAD; - - if (!is_valid_ether_addr(adapter->hw.mac.addr)) { - dev_info(&pdev->dev, "Invalid MAC address %pM, using random\n", - adapter->hw.mac.addr); - eth_hw_addr_random(netdev); - ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr); - } else { - adapter->flags |= IAVF_FLAG_ADDR_SET_BY_PF; - ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr); - ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr); - } - - timer_setup(&adapter->watchdog_timer, iavf_watchdog_timer, 0); - mod_timer(&adapter->watchdog_timer, jiffies + 1); - - adapter->tx_desc_count = IAVF_DEFAULT_TXD; - adapter->rx_desc_count = IAVF_DEFAULT_RXD; - err = iavf_init_interrupt_scheme(adapter); - if (err) - goto err_sw_init; - iavf_map_rings_to_vectors(adapter); - if (adapter->vf_res->vf_cap_flags & - VIRTCHNL_VF_OFFLOAD_WB_ON_ITR) - adapter->flags |= IAVF_FLAG_WB_ON_ITR_CAPABLE; - - err = iavf_request_misc_irq(adapter); - if (err) - goto err_sw_init; - - netif_carrier_off(netdev); - adapter->link_up = false; - - if (!adapter->netdev_registered) { - err = register_netdev(netdev); - if (err) - goto err_register; - } - - adapter->netdev_registered = true; - - netif_tx_stop_all_queues(netdev); - if (CLIENT_ALLOWED(adapter)) { - err = iavf_lan_add_device(adapter); - if (err) - dev_info(&pdev->dev, "Failed to add VF to client API service list: %d\n", - err); - } - - dev_info(&pdev->dev, "MAC address: %pM\n", adapter->hw.mac.addr); - if (netdev->features & NETIF_F_GRO) - dev_info(&pdev->dev, "GRO is enabled\n"); - - adapter->state = __IAVF_DOWN; - set_bit(__I40E_VSI_DOWN, adapter->vsi.state); - iavf_misc_irq_enable(adapter); - wake_up(&adapter->down_waitqueue); - - adapter->rss_key = kzalloc(adapter->rss_key_size, GFP_KERNEL); - adapter->rss_lut = kzalloc(adapter->rss_lut_size, GFP_KERNEL); - if (!adapter->rss_key || !adapter->rss_lut) - goto err_mem; - - if (RSS_AQ(adapter)) { - adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_RSS; - mod_timer_pending(&adapter->watchdog_timer, jiffies + 1); - } else { - iavf_init_rss(adapter); - } - return; -restart: - schedule_delayed_work(&adapter->init_task, msecs_to_jiffies(30)); - return; -err_mem: - iavf_free_rss(adapter); -err_register: - iavf_free_misc_irq(adapter); -err_sw_init: - iavf_reset_interrupt_capability(adapter); -err_alloc: - kfree(adapter->vf_res); - adapter->vf_res = NULL; -err: - /* Things went into the weeds, so try again later */ - if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) { - dev_err(&pdev->dev, "Failed to communicate with PF; waiting before retry\n"); - adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED; - iavf_shutdown_adminq(hw); - adapter->state = __IAVF_STARTUP; - schedule_delayed_work(&adapter->init_task, HZ * 5); - return; - } - schedule_delayed_work(&adapter->init_task, HZ); -} - -/** - * iavf_shutdown - Shutdown the device in preparation for a reboot - * @pdev: pci device structure - **/ -static void iavf_shutdown(struct pci_dev *pdev) -{ - struct net_device *netdev = pci_get_drvdata(pdev); - struct iavf_adapter *adapter = netdev_priv(netdev); - - netif_device_detach(netdev); - - if (netif_running(netdev)) - iavf_close(netdev); - - /* Prevent the watchdog from running. */ - adapter->state = __IAVF_REMOVE; - adapter->aq_required = 0; - -#ifdef CONFIG_PM - pci_save_state(pdev); - -#endif - pci_disable_device(pdev); -} - -/** - * iavf_probe - Device Initialization Routine - * @pdev: PCI device information struct - * @ent: entry in iavf_pci_tbl - * - * Returns 0 on success, negative on failure - * - * iavf_probe initializes an adapter identified by a pci_dev structure. - * The OS initialization, configuring of the adapter private structure, - * and a hardware reset occur. - **/ -static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) -{ - struct net_device *netdev; - struct iavf_adapter *adapter = NULL; - struct i40e_hw *hw = NULL; - int err; - - err = pci_enable_device(pdev); - if (err) - return err; - - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (err) { - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, - "DMA configuration failed: 0x%x\n", err); - goto err_dma; - } - } - - err = pci_request_regions(pdev, iavf_driver_name); - if (err) { - dev_err(&pdev->dev, - "pci_request_regions failed 0x%x\n", err); - goto err_pci_reg; - } - - pci_enable_pcie_error_reporting(pdev); - - pci_set_master(pdev); - - netdev = alloc_etherdev_mq(sizeof(struct iavf_adapter), - IAVF_MAX_REQ_QUEUES); - if (!netdev) { - err = -ENOMEM; - goto err_alloc_etherdev; - } - - SET_NETDEV_DEV(netdev, &pdev->dev); - - pci_set_drvdata(pdev, netdev); - adapter = netdev_priv(netdev); - - adapter->netdev = netdev; - adapter->pdev = pdev; - - hw = &adapter->hw; - hw->back = adapter; - - adapter->msg_enable = BIT(DEFAULT_DEBUG_LEVEL_SHIFT) - 1; - adapter->state = __IAVF_STARTUP; - - /* Call save state here because it relies on the adapter struct. */ - pci_save_state(pdev); - - hw->hw_addr = ioremap(pci_resource_start(pdev, 0), - pci_resource_len(pdev, 0)); - if (!hw->hw_addr) { - err = -EIO; - goto err_ioremap; - } - hw->vendor_id = pdev->vendor; - hw->device_id = pdev->device; - pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id); - hw->subsystem_vendor_id = pdev->subsystem_vendor; - hw->subsystem_device_id = pdev->subsystem_device; - hw->bus.device = PCI_SLOT(pdev->devfn); - hw->bus.func = PCI_FUNC(pdev->devfn); - hw->bus.bus_id = pdev->bus->number; - - /* set up the locks for the AQ, do this only once in probe - * and destroy them only once in remove - */ - mutex_init(&hw->aq.asq_mutex); - mutex_init(&hw->aq.arq_mutex); - - spin_lock_init(&adapter->mac_vlan_list_lock); - spin_lock_init(&adapter->cloud_filter_list_lock); - - INIT_LIST_HEAD(&adapter->mac_filter_list); - INIT_LIST_HEAD(&adapter->vlan_filter_list); - INIT_LIST_HEAD(&adapter->cloud_filter_list); - - INIT_WORK(&adapter->reset_task, iavf_reset_task); - INIT_WORK(&adapter->adminq_task, iavf_adminq_task); - INIT_WORK(&adapter->watchdog_task, iavf_watchdog_task); - INIT_DELAYED_WORK(&adapter->client_task, iavf_client_task); - INIT_DELAYED_WORK(&adapter->init_task, iavf_init_task); - schedule_delayed_work(&adapter->init_task, - msecs_to_jiffies(5 * (pdev->devfn & 0x07))); - - /* Setup the wait queue for indicating transition to down status */ - init_waitqueue_head(&adapter->down_waitqueue); - - return 0; - -err_ioremap: - free_netdev(netdev); -err_alloc_etherdev: - pci_release_regions(pdev); -err_pci_reg: -err_dma: - pci_disable_device(pdev); - return err; -} - -#ifdef CONFIG_PM -/** - * iavf_suspend - Power management suspend routine - * @pdev: PCI device information struct - * @state: unused - * - * Called when the system (VM) is entering sleep/suspend. - **/ -static int iavf_suspend(struct pci_dev *pdev, pm_message_t state) -{ - struct net_device *netdev = pci_get_drvdata(pdev); - struct iavf_adapter *adapter = netdev_priv(netdev); - int retval = 0; - - netif_device_detach(netdev); - - while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK, - &adapter->crit_section)) - usleep_range(500, 1000); - - if (netif_running(netdev)) { - rtnl_lock(); - iavf_down(adapter); - rtnl_unlock(); - } - iavf_free_misc_irq(adapter); - iavf_reset_interrupt_capability(adapter); - - clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); - - retval = pci_save_state(pdev); - if (retval) - return retval; - - pci_disable_device(pdev); - - return 0; -} - -/** - * iavf_resume - Power management resume routine - * @pdev: PCI device information struct - * - * Called when the system (VM) is resumed from sleep/suspend. - **/ -static int iavf_resume(struct pci_dev *pdev) -{ - struct iavf_adapter *adapter = pci_get_drvdata(pdev); - struct net_device *netdev = adapter->netdev; - u32 err; - - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - /* pci_restore_state clears dev->state_saved so call - * pci_save_state to restore it. - */ - pci_save_state(pdev); - - err = pci_enable_device_mem(pdev); - if (err) { - dev_err(&pdev->dev, "Cannot enable PCI device from suspend.\n"); - return err; - } - pci_set_master(pdev); - - rtnl_lock(); - err = iavf_set_interrupt_capability(adapter); - if (err) { - rtnl_unlock(); - dev_err(&pdev->dev, "Cannot enable MSI-X interrupts.\n"); - return err; - } - err = iavf_request_misc_irq(adapter); - rtnl_unlock(); - if (err) { - dev_err(&pdev->dev, "Cannot get interrupt vector.\n"); - return err; - } - - schedule_work(&adapter->reset_task); - - netif_device_attach(netdev); - - return err; -} - -#endif /* CONFIG_PM */ -/** - * iavf_remove - Device Removal Routine - * @pdev: PCI device information struct - * - * iavf_remove is called by the PCI subsystem to alert the driver - * that it should release a PCI device. The could be caused by a - * Hot-Plug event, or because the driver is going to be removed from - * memory. - **/ -static void iavf_remove(struct pci_dev *pdev) -{ - struct net_device *netdev = pci_get_drvdata(pdev); - struct iavf_adapter *adapter = netdev_priv(netdev); - struct iavf_vlan_filter *vlf, *vlftmp; - struct iavf_mac_filter *f, *ftmp; - struct iavf_cloud_filter *cf, *cftmp; - struct i40e_hw *hw = &adapter->hw; - int err; - /* Indicate we are in remove and not to run reset_task */ - set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section); - cancel_delayed_work_sync(&adapter->init_task); - cancel_work_sync(&adapter->reset_task); - cancel_delayed_work_sync(&adapter->client_task); - if (adapter->netdev_registered) { - unregister_netdev(netdev); - adapter->netdev_registered = false; - } - if (CLIENT_ALLOWED(adapter)) { - err = iavf_lan_del_device(adapter); - if (err) - dev_warn(&pdev->dev, "Failed to delete client device: %d\n", - err); - } - - /* Shut down all the garbage mashers on the detention level */ - adapter->state = __IAVF_REMOVE; - adapter->aq_required = 0; - adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; - iavf_request_reset(adapter); - msleep(50); - /* If the FW isn't responding, kick it once, but only once. */ - if (!iavf_asq_done(hw)) { - iavf_request_reset(adapter); - msleep(50); - } - iavf_free_all_tx_resources(adapter); - iavf_free_all_rx_resources(adapter); - iavf_misc_irq_disable(adapter); - iavf_free_misc_irq(adapter); - iavf_reset_interrupt_capability(adapter); - iavf_free_q_vectors(adapter); - - if (adapter->watchdog_timer.function) - del_timer_sync(&adapter->watchdog_timer); - - cancel_work_sync(&adapter->adminq_task); - - iavf_free_rss(adapter); - - if (hw->aq.asq.count) - iavf_shutdown_adminq(hw); - - /* destroy the locks only once, here */ - mutex_destroy(&hw->aq.arq_mutex); - mutex_destroy(&hw->aq.asq_mutex); - - iounmap(hw->hw_addr); - pci_release_regions(pdev); - iavf_free_all_tx_resources(adapter); - iavf_free_all_rx_resources(adapter); - iavf_free_queues(adapter); - kfree(adapter->vf_res); - spin_lock_bh(&adapter->mac_vlan_list_lock); - /* If we got removed before an up/down sequence, we've got a filter - * hanging out there that we need to get rid of. - */ - list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) { - list_del(&f->list); - kfree(f); - } - list_for_each_entry_safe(vlf, vlftmp, &adapter->vlan_filter_list, - list) { - list_del(&vlf->list); - kfree(vlf); - } - - spin_unlock_bh(&adapter->mac_vlan_list_lock); - - spin_lock_bh(&adapter->cloud_filter_list_lock); - list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) { - list_del(&cf->list); - kfree(cf); - } - spin_unlock_bh(&adapter->cloud_filter_list_lock); - - free_netdev(netdev); - - pci_disable_pcie_error_reporting(pdev); - - pci_disable_device(pdev); -} - -static struct pci_driver iavf_driver = { - .name = iavf_driver_name, - .id_table = iavf_pci_tbl, - .probe = iavf_probe, - .remove = iavf_remove, -#ifdef CONFIG_PM - .suspend = iavf_suspend, - .resume = iavf_resume, -#endif - .shutdown = iavf_shutdown, -}; - -/** - * i40e_init_module - Driver Registration Routine - * - * i40e_init_module is the first routine called when the driver is - * loaded. All it does is register with the PCI subsystem. - **/ -static int __init iavf_init_module(void) -{ - int ret; - - pr_info("iavf: %s - version %s\n", iavf_driver_string, - iavf_driver_version); - - pr_info("%s\n", iavf_copyright); - - iavf_wq = alloc_workqueue("%s", WQ_UNBOUND | WQ_MEM_RECLAIM, 1, - iavf_driver_name); - if (!iavf_wq) { - pr_err("%s: Failed to create workqueue\n", iavf_driver_name); - return -ENOMEM; - } - ret = pci_register_driver(&iavf_driver); - return ret; -} - -module_init(iavf_init_module); - -/** - * i40e_exit_module - Driver Exit Cleanup Routine - * - * i40e_exit_module is called just before the driver is removed - * from memory. - **/ -static void __exit iavf_exit_module(void) -{ - pci_unregister_driver(&iavf_driver); - destroy_workqueue(iavf_wq); -} - -module_exit(iavf_exit_module); - -/* iavf_main.c */ diff --git a/drivers/net/ethernet/intel/iavf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/iavf/i40evf_virtchnl.c deleted file mode 100644 index a1fa7a22c19a1..0000000000000 --- a/drivers/net/ethernet/intel/iavf/i40evf_virtchnl.c +++ /dev/null @@ -1,1452 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright(c) 2013 - 2018 Intel Corporation. */ - -#include "i40evf.h" -#include "i40e_prototype.h" -#include "i40evf_client.h" - -/* busy wait delay in msec */ -#define IAVF_BUSY_WAIT_DELAY 10 -#define IAVF_BUSY_WAIT_COUNT 50 - -/** - * iavf_send_pf_msg - * @adapter: adapter structure - * @op: virtual channel opcode - * @msg: pointer to message buffer - * @len: message length - * - * Send message to PF and print status if failure. - **/ -static int iavf_send_pf_msg(struct iavf_adapter *adapter, - enum virtchnl_ops op, u8 *msg, u16 len) -{ - struct i40e_hw *hw = &adapter->hw; - iavf_status err; - - if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) - return 0; /* nothing to see here, move along */ - - err = iavf_aq_send_msg_to_pf(hw, op, 0, msg, len, NULL); - if (err) - dev_dbg(&adapter->pdev->dev, "Unable to send opcode %d to PF, err %s, aq_err %s\n", - op, iavf_stat_str(hw, err), - iavf_aq_str(hw, hw->aq.asq_last_status)); - return err; -} - -/** - * iavf_send_api_ver - * @adapter: adapter structure - * - * Send API version admin queue message to the PF. The reply is not checked - * in this function. Returns 0 if the message was successfully - * sent, or one of the I40E_ADMIN_QUEUE_ERROR_ statuses if not. - **/ -int iavf_send_api_ver(struct iavf_adapter *adapter) -{ - struct virtchnl_version_info vvi; - - vvi.major = VIRTCHNL_VERSION_MAJOR; - vvi.minor = VIRTCHNL_VERSION_MINOR; - - return iavf_send_pf_msg(adapter, VIRTCHNL_OP_VERSION, (u8 *)&vvi, - sizeof(vvi)); -} - -/** - * iavf_verify_api_ver - * @adapter: adapter structure - * - * Compare API versions with the PF. Must be called after admin queue is - * initialized. Returns 0 if API versions match, -EIO if they do not, - * I40E_ERR_ADMIN_QUEUE_NO_WORK if the admin queue is empty, and any errors - * from the firmware are propagated. - **/ -int iavf_verify_api_ver(struct iavf_adapter *adapter) -{ - struct virtchnl_version_info *pf_vvi; - struct i40e_hw *hw = &adapter->hw; - struct i40e_arq_event_info event; - enum virtchnl_ops op; - iavf_status err; - - event.buf_len = IAVF_MAX_AQ_BUF_SIZE; - event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL); - if (!event.msg_buf) { - err = -ENOMEM; - goto out; - } - - while (1) { - err = iavf_clean_arq_element(hw, &event, NULL); - /* When the AQ is empty, iavf_clean_arq_element will return - * nonzero and this loop will terminate. - */ - if (err) - goto out_alloc; - op = - (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high); - if (op == VIRTCHNL_OP_VERSION) - break; - } - - - err = (iavf_status)le32_to_cpu(event.desc.cookie_low); - if (err) - goto out_alloc; - - if (op != VIRTCHNL_OP_VERSION) { - dev_info(&adapter->pdev->dev, "Invalid reply type %d from PF\n", - op); - err = -EIO; - goto out_alloc; - } - - pf_vvi = (struct virtchnl_version_info *)event.msg_buf; - adapter->pf_version = *pf_vvi; - - if ((pf_vvi->major > VIRTCHNL_VERSION_MAJOR) || - ((pf_vvi->major == VIRTCHNL_VERSION_MAJOR) && - (pf_vvi->minor > VIRTCHNL_VERSION_MINOR))) - err = -EIO; - -out_alloc: - kfree(event.msg_buf); -out: - return err; -} - -/** - * iavf_send_vf_config_msg - * @adapter: adapter structure - * - * Send VF configuration request admin queue message to the PF. The reply - * is not checked in this function. Returns 0 if the message was - * successfully sent, or one of the I40E_ADMIN_QUEUE_ERROR_ statuses if not. - **/ -int iavf_send_vf_config_msg(struct iavf_adapter *adapter) -{ - u32 caps; - - caps = VIRTCHNL_VF_OFFLOAD_L2 | - VIRTCHNL_VF_OFFLOAD_RSS_PF | - VIRTCHNL_VF_OFFLOAD_RSS_AQ | - VIRTCHNL_VF_OFFLOAD_RSS_REG | - VIRTCHNL_VF_OFFLOAD_VLAN | - VIRTCHNL_VF_OFFLOAD_WB_ON_ITR | - VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 | - VIRTCHNL_VF_OFFLOAD_ENCAP | - VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM | - VIRTCHNL_VF_OFFLOAD_REQ_QUEUES | - VIRTCHNL_VF_OFFLOAD_ADQ; - - adapter->current_op = VIRTCHNL_OP_GET_VF_RESOURCES; - adapter->aq_required &= ~IAVF_FLAG_AQ_GET_CONFIG; - if (PF_IS_V11(adapter)) - return iavf_send_pf_msg(adapter, VIRTCHNL_OP_GET_VF_RESOURCES, - (u8 *)&caps, sizeof(caps)); - else - return iavf_send_pf_msg(adapter, VIRTCHNL_OP_GET_VF_RESOURCES, - NULL, 0); -} - -/** - * iavf_validate_num_queues - * @adapter: adapter structure - * - * Validate that the number of queues the PF has sent in - * VIRTCHNL_OP_GET_VF_RESOURCES is not larger than the VF can handle. - **/ -static void iavf_validate_num_queues(struct iavf_adapter *adapter) -{ - if (adapter->vf_res->num_queue_pairs > IAVF_MAX_REQ_QUEUES) { - struct virtchnl_vsi_resource *vsi_res; - int i; - - dev_info(&adapter->pdev->dev, "Received %d queues, but can only have a max of %d\n", - adapter->vf_res->num_queue_pairs, - IAVF_MAX_REQ_QUEUES); - dev_info(&adapter->pdev->dev, "Fixing by reducing queues to %d\n", - IAVF_MAX_REQ_QUEUES); - adapter->vf_res->num_queue_pairs = IAVF_MAX_REQ_QUEUES; - for (i = 0; i < adapter->vf_res->num_vsis; i++) { - vsi_res = &adapter->vf_res->vsi_res[i]; - vsi_res->num_queue_pairs = IAVF_MAX_REQ_QUEUES; - } - } -} - -/** - * iavf_get_vf_config - * @adapter: private adapter structure - * - * Get VF configuration from PF and populate hw structure. Must be called after - * admin queue is initialized. Busy waits until response is received from PF, - * with maximum timeout. Response from PF is returned in the buffer for further - * processing by the caller. - **/ -int iavf_get_vf_config(struct iavf_adapter *adapter) -{ - struct i40e_hw *hw = &adapter->hw; - struct i40e_arq_event_info event; - enum virtchnl_ops op; - iavf_status err; - u16 len; - - len = sizeof(struct virtchnl_vf_resource) + - I40E_MAX_VF_VSI * sizeof(struct virtchnl_vsi_resource); - event.buf_len = len; - event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL); - if (!event.msg_buf) { - err = -ENOMEM; - goto out; - } - - while (1) { - /* When the AQ is empty, iavf_clean_arq_element will return - * nonzero and this loop will terminate. - */ - err = iavf_clean_arq_element(hw, &event, NULL); - if (err) - goto out_alloc; - op = - (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high); - if (op == VIRTCHNL_OP_GET_VF_RESOURCES) - break; - } - - err = (iavf_status)le32_to_cpu(event.desc.cookie_low); - memcpy(adapter->vf_res, event.msg_buf, min(event.msg_len, len)); - - /* some PFs send more queues than we should have so validate that - * we aren't getting too many queues - */ - if (!err) - iavf_validate_num_queues(adapter); - iavf_vf_parse_hw_config(hw, adapter->vf_res); -out_alloc: - kfree(event.msg_buf); -out: - return err; -} - -/** - * iavf_configure_queues - * @adapter: adapter structure - * - * Request that the PF set up our (previously allocated) queues. - **/ -void iavf_configure_queues(struct iavf_adapter *adapter) -{ - struct virtchnl_vsi_queue_config_info *vqci; - struct virtchnl_queue_pair_info *vqpi; - int pairs = adapter->num_active_queues; - int i, len, max_frame = I40E_MAX_RXBUFFER; - - if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { - /* bail because we already have a command pending */ - dev_err(&adapter->pdev->dev, "Cannot configure queues, command %d pending\n", - adapter->current_op); - return; - } - adapter->current_op = VIRTCHNL_OP_CONFIG_VSI_QUEUES; - len = sizeof(struct virtchnl_vsi_queue_config_info) + - (sizeof(struct virtchnl_queue_pair_info) * pairs); - vqci = kzalloc(len, GFP_KERNEL); - if (!vqci) - return; - - /* Limit maximum frame size when jumbo frames is not enabled */ - if (!(adapter->flags & IAVF_FLAG_LEGACY_RX) && - (adapter->netdev->mtu <= ETH_DATA_LEN)) - max_frame = I40E_RXBUFFER_1536 - NET_IP_ALIGN; - - vqci->vsi_id = adapter->vsi_res->vsi_id; - vqci->num_queue_pairs = pairs; - vqpi = vqci->qpair; - /* Size check is not needed here - HW max is 16 queue pairs, and we - * can fit info for 31 of them into the AQ buffer before it overflows. - */ - for (i = 0; i < pairs; i++) { - vqpi->txq.vsi_id = vqci->vsi_id; - vqpi->txq.queue_id = i; - vqpi->txq.ring_len = adapter->tx_rings[i].count; - vqpi->txq.dma_ring_addr = adapter->tx_rings[i].dma; - vqpi->rxq.vsi_id = vqci->vsi_id; - vqpi->rxq.queue_id = i; - vqpi->rxq.ring_len = adapter->rx_rings[i].count; - vqpi->rxq.dma_ring_addr = adapter->rx_rings[i].dma; - vqpi->rxq.max_pkt_size = max_frame; - vqpi->rxq.databuffer_size = - ALIGN(adapter->rx_rings[i].rx_buf_len, - BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT)); - vqpi++; - } - - adapter->aq_required &= ~IAVF_FLAG_AQ_CONFIGURE_QUEUES; - iavf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_VSI_QUEUES, - (u8 *)vqci, len); - kfree(vqci); -} - -/** - * iavf_enable_queues - * @adapter: adapter structure - * - * Request that the PF enable all of our queues. - **/ -void iavf_enable_queues(struct iavf_adapter *adapter) -{ - struct virtchnl_queue_select vqs; - - if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { - /* bail because we already have a command pending */ - dev_err(&adapter->pdev->dev, "Cannot enable queues, command %d pending\n", - adapter->current_op); - return; - } - adapter->current_op = VIRTCHNL_OP_ENABLE_QUEUES; - vqs.vsi_id = adapter->vsi_res->vsi_id; - vqs.tx_queues = BIT(adapter->num_active_queues) - 1; - vqs.rx_queues = vqs.tx_queues; - adapter->aq_required &= ~IAVF_FLAG_AQ_ENABLE_QUEUES; - iavf_send_pf_msg(adapter, VIRTCHNL_OP_ENABLE_QUEUES, - (u8 *)&vqs, sizeof(vqs)); -} - -/** - * iavf_disable_queues - * @adapter: adapter structure - * - * Request that the PF disable all of our queues. - **/ -void iavf_disable_queues(struct iavf_adapter *adapter) -{ - struct virtchnl_queue_select vqs; - - if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { - /* bail because we already have a command pending */ - dev_err(&adapter->pdev->dev, "Cannot disable queues, command %d pending\n", - adapter->current_op); - return; - } - adapter->current_op = VIRTCHNL_OP_DISABLE_QUEUES; - vqs.vsi_id = adapter->vsi_res->vsi_id; - vqs.tx_queues = BIT(adapter->num_active_queues) - 1; - vqs.rx_queues = vqs.tx_queues; - adapter->aq_required &= ~IAVF_FLAG_AQ_DISABLE_QUEUES; - iavf_send_pf_msg(adapter, VIRTCHNL_OP_DISABLE_QUEUES, - (u8 *)&vqs, sizeof(vqs)); -} - -/** - * iavf_map_queues - * @adapter: adapter structure - * - * Request that the PF map queues to interrupt vectors. Misc causes, including - * admin queue, are always mapped to vector 0. - **/ -void iavf_map_queues(struct iavf_adapter *adapter) -{ - struct virtchnl_irq_map_info *vimi; - struct virtchnl_vector_map *vecmap; - int v_idx, q_vectors, len; - struct i40e_q_vector *q_vector; - - if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { - /* bail because we already have a command pending */ - dev_err(&adapter->pdev->dev, "Cannot map queues to vectors, command %d pending\n", - adapter->current_op); - return; - } - adapter->current_op = VIRTCHNL_OP_CONFIG_IRQ_MAP; - - q_vectors = adapter->num_msix_vectors - NONQ_VECS; - - len = sizeof(struct virtchnl_irq_map_info) + - (adapter->num_msix_vectors * - sizeof(struct virtchnl_vector_map)); - vimi = kzalloc(len, GFP_KERNEL); - if (!vimi) - return; - - vimi->num_vectors = adapter->num_msix_vectors; - /* Queue vectors first */ - for (v_idx = 0; v_idx < q_vectors; v_idx++) { - q_vector = &adapter->q_vectors[v_idx]; - vecmap = &vimi->vecmap[v_idx]; - - vecmap->vsi_id = adapter->vsi_res->vsi_id; - vecmap->vector_id = v_idx + NONQ_VECS; - vecmap->txq_map = q_vector->ring_mask; - vecmap->rxq_map = q_vector->ring_mask; - vecmap->rxitr_idx = I40E_RX_ITR; - vecmap->txitr_idx = I40E_TX_ITR; - } - /* Misc vector last - this is only for AdminQ messages */ - vecmap = &vimi->vecmap[v_idx]; - vecmap->vsi_id = adapter->vsi_res->vsi_id; - vecmap->vector_id = 0; - vecmap->txq_map = 0; - vecmap->rxq_map = 0; - - adapter->aq_required &= ~IAVF_FLAG_AQ_MAP_VECTORS; - iavf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_IRQ_MAP, - (u8 *)vimi, len); - kfree(vimi); -} - -/** - * iavf_request_queues - * @adapter: adapter structure - * @num: number of requested queues - * - * We get a default number of queues from the PF. This enables us to request a - * different number. Returns 0 on success, negative on failure - **/ -int iavf_request_queues(struct iavf_adapter *adapter, int num) -{ - struct virtchnl_vf_res_request vfres; - - if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { - /* bail because we already have a command pending */ - dev_err(&adapter->pdev->dev, "Cannot request queues, command %d pending\n", - adapter->current_op); - return -EBUSY; - } - - vfres.num_queue_pairs = num; - - adapter->current_op = VIRTCHNL_OP_REQUEST_QUEUES; - adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED; - return iavf_send_pf_msg(adapter, VIRTCHNL_OP_REQUEST_QUEUES, - (u8 *)&vfres, sizeof(vfres)); -} - -/** - * iavf_add_ether_addrs - * @adapter: adapter structure - * - * Request that the PF add one or more addresses to our filters. - **/ -void iavf_add_ether_addrs(struct iavf_adapter *adapter) -{ - struct virtchnl_ether_addr_list *veal; - int len, i = 0, count = 0; - struct iavf_mac_filter *f; - bool more = false; - - if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { - /* bail because we already have a command pending */ - dev_err(&adapter->pdev->dev, "Cannot add filters, command %d pending\n", - adapter->current_op); - return; - } - - spin_lock_bh(&adapter->mac_vlan_list_lock); - - list_for_each_entry(f, &adapter->mac_filter_list, list) { - if (f->add) - count++; - } - if (!count) { - adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_MAC_FILTER; - spin_unlock_bh(&adapter->mac_vlan_list_lock); - return; - } - adapter->current_op = VIRTCHNL_OP_ADD_ETH_ADDR; - - len = sizeof(struct virtchnl_ether_addr_list) + - (count * sizeof(struct virtchnl_ether_addr)); - if (len > IAVF_MAX_AQ_BUF_SIZE) { - dev_warn(&adapter->pdev->dev, "Too many add MAC changes in one request\n"); - count = (IAVF_MAX_AQ_BUF_SIZE - - sizeof(struct virtchnl_ether_addr_list)) / - sizeof(struct virtchnl_ether_addr); - len = sizeof(struct virtchnl_ether_addr_list) + - (count * sizeof(struct virtchnl_ether_addr)); - more = true; - } - - veal = kzalloc(len, GFP_ATOMIC); - if (!veal) { - spin_unlock_bh(&adapter->mac_vlan_list_lock); - return; - } - - veal->vsi_id = adapter->vsi_res->vsi_id; - veal->num_elements = count; - list_for_each_entry(f, &adapter->mac_filter_list, list) { - if (f->add) { - ether_addr_copy(veal->list[i].addr, f->macaddr); - i++; - f->add = false; - if (i == count) - break; - } - } - if (!more) - adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_MAC_FILTER; - - spin_unlock_bh(&adapter->mac_vlan_list_lock); - - iavf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_ETH_ADDR, (u8 *)veal, len); - kfree(veal); -} - -/** - * iavf_del_ether_addrs - * @adapter: adapter structure - * - * Request that the PF remove one or more addresses from our filters. - **/ -void iavf_del_ether_addrs(struct iavf_adapter *adapter) -{ - struct virtchnl_ether_addr_list *veal; - struct iavf_mac_filter *f, *ftmp; - int len, i = 0, count = 0; - bool more = false; - - if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { - /* bail because we already have a command pending */ - dev_err(&adapter->pdev->dev, "Cannot remove filters, command %d pending\n", - adapter->current_op); - return; - } - - spin_lock_bh(&adapter->mac_vlan_list_lock); - - list_for_each_entry(f, &adapter->mac_filter_list, list) { - if (f->remove) - count++; - } - if (!count) { - adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_MAC_FILTER; - spin_unlock_bh(&adapter->mac_vlan_list_lock); - return; - } - adapter->current_op = VIRTCHNL_OP_DEL_ETH_ADDR; - - len = sizeof(struct virtchnl_ether_addr_list) + - (count * sizeof(struct virtchnl_ether_addr)); - if (len > IAVF_MAX_AQ_BUF_SIZE) { - dev_warn(&adapter->pdev->dev, "Too many delete MAC changes in one request\n"); - count = (IAVF_MAX_AQ_BUF_SIZE - - sizeof(struct virtchnl_ether_addr_list)) / - sizeof(struct virtchnl_ether_addr); - len = sizeof(struct virtchnl_ether_addr_list) + - (count * sizeof(struct virtchnl_ether_addr)); - more = true; - } - veal = kzalloc(len, GFP_ATOMIC); - if (!veal) { - spin_unlock_bh(&adapter->mac_vlan_list_lock); - return; - } - - veal->vsi_id = adapter->vsi_res->vsi_id; - veal->num_elements = count; - list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) { - if (f->remove) { - ether_addr_copy(veal->list[i].addr, f->macaddr); - i++; - list_del(&f->list); - kfree(f); - if (i == count) - break; - } - } - if (!more) - adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_MAC_FILTER; - - spin_unlock_bh(&adapter->mac_vlan_list_lock); - - iavf_send_pf_msg(adapter, VIRTCHNL_OP_DEL_ETH_ADDR, (u8 *)veal, len); - kfree(veal); -} - -/** - * iavf_add_vlans - * @adapter: adapter structure - * - * Request that the PF add one or more VLAN filters to our VSI. - **/ -void iavf_add_vlans(struct iavf_adapter *adapter) -{ - struct virtchnl_vlan_filter_list *vvfl; - int len, i = 0, count = 0; - struct iavf_vlan_filter *f; - bool more = false; - - if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { - /* bail because we already have a command pending */ - dev_err(&adapter->pdev->dev, "Cannot add VLANs, command %d pending\n", - adapter->current_op); - return; - } - - spin_lock_bh(&adapter->mac_vlan_list_lock); - - list_for_each_entry(f, &adapter->vlan_filter_list, list) { - if (f->add) - count++; - } - if (!count) { - adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_VLAN_FILTER; - spin_unlock_bh(&adapter->mac_vlan_list_lock); - return; - } - adapter->current_op = VIRTCHNL_OP_ADD_VLAN; - - len = sizeof(struct virtchnl_vlan_filter_list) + - (count * sizeof(u16)); - if (len > IAVF_MAX_AQ_BUF_SIZE) { - dev_warn(&adapter->pdev->dev, "Too many add VLAN changes in one request\n"); - count = (IAVF_MAX_AQ_BUF_SIZE - - sizeof(struct virtchnl_vlan_filter_list)) / - sizeof(u16); - len = sizeof(struct virtchnl_vlan_filter_list) + - (count * sizeof(u16)); - more = true; - } - vvfl = kzalloc(len, GFP_ATOMIC); - if (!vvfl) { - spin_unlock_bh(&adapter->mac_vlan_list_lock); - return; - } - - vvfl->vsi_id = adapter->vsi_res->vsi_id; - vvfl->num_elements = count; - list_for_each_entry(f, &adapter->vlan_filter_list, list) { - if (f->add) { - vvfl->vlan_id[i] = f->vlan; - i++; - f->add = false; - if (i == count) - break; - } - } - if (!more) - adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_VLAN_FILTER; - - spin_unlock_bh(&adapter->mac_vlan_list_lock); - - iavf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_VLAN, (u8 *)vvfl, len); - kfree(vvfl); -} - -/** - * iavf_del_vlans - * @adapter: adapter structure - * - * Request that the PF remove one or more VLAN filters from our VSI. - **/ -void iavf_del_vlans(struct iavf_adapter *adapter) -{ - struct virtchnl_vlan_filter_list *vvfl; - struct iavf_vlan_filter *f, *ftmp; - int len, i = 0, count = 0; - bool more = false; - - if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { - /* bail because we already have a command pending */ - dev_err(&adapter->pdev->dev, "Cannot remove VLANs, command %d pending\n", - adapter->current_op); - return; - } - - spin_lock_bh(&adapter->mac_vlan_list_lock); - - list_for_each_entry(f, &adapter->vlan_filter_list, list) { - if (f->remove) - count++; - } - if (!count) { - adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_VLAN_FILTER; - spin_unlock_bh(&adapter->mac_vlan_list_lock); - return; - } - adapter->current_op = VIRTCHNL_OP_DEL_VLAN; - - len = sizeof(struct virtchnl_vlan_filter_list) + - (count * sizeof(u16)); - if (len > IAVF_MAX_AQ_BUF_SIZE) { - dev_warn(&adapter->pdev->dev, "Too many delete VLAN changes in one request\n"); - count = (IAVF_MAX_AQ_BUF_SIZE - - sizeof(struct virtchnl_vlan_filter_list)) / - sizeof(u16); - len = sizeof(struct virtchnl_vlan_filter_list) + - (count * sizeof(u16)); - more = true; - } - vvfl = kzalloc(len, GFP_ATOMIC); - if (!vvfl) { - spin_unlock_bh(&adapter->mac_vlan_list_lock); - return; - } - - vvfl->vsi_id = adapter->vsi_res->vsi_id; - vvfl->num_elements = count; - list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) { - if (f->remove) { - vvfl->vlan_id[i] = f->vlan; - i++; - list_del(&f->list); - kfree(f); - if (i == count) - break; - } - } - if (!more) - adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_VLAN_FILTER; - - spin_unlock_bh(&adapter->mac_vlan_list_lock); - - iavf_send_pf_msg(adapter, VIRTCHNL_OP_DEL_VLAN, (u8 *)vvfl, len); - kfree(vvfl); -} - -/** - * iavf_set_promiscuous - * @adapter: adapter structure - * @flags: bitmask to control unicast/multicast promiscuous. - * - * Request that the PF enable promiscuous mode for our VSI. - **/ -void iavf_set_promiscuous(struct iavf_adapter *adapter, int flags) -{ - struct virtchnl_promisc_info vpi; - int promisc_all; - - if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { - /* bail because we already have a command pending */ - dev_err(&adapter->pdev->dev, "Cannot set promiscuous mode, command %d pending\n", - adapter->current_op); - return; - } - - promisc_all = FLAG_VF_UNICAST_PROMISC | - FLAG_VF_MULTICAST_PROMISC; - if ((flags & promisc_all) == promisc_all) { - adapter->flags |= IAVF_FLAG_PROMISC_ON; - adapter->aq_required &= ~IAVF_FLAG_AQ_REQUEST_PROMISC; - dev_info(&adapter->pdev->dev, "Entering promiscuous mode\n"); - } - - if (flags & FLAG_VF_MULTICAST_PROMISC) { - adapter->flags |= IAVF_FLAG_ALLMULTI_ON; - adapter->aq_required &= ~IAVF_FLAG_AQ_REQUEST_ALLMULTI; - dev_info(&adapter->pdev->dev, "Entering multicast promiscuous mode\n"); - } - - if (!flags) { - adapter->flags &= ~(IAVF_FLAG_PROMISC_ON | - IAVF_FLAG_ALLMULTI_ON); - adapter->aq_required &= ~(IAVF_FLAG_AQ_RELEASE_PROMISC | - IAVF_FLAG_AQ_RELEASE_ALLMULTI); - dev_info(&adapter->pdev->dev, "Leaving promiscuous mode\n"); - } - - adapter->current_op = VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE; - vpi.vsi_id = adapter->vsi_res->vsi_id; - vpi.flags = flags; - iavf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE, - (u8 *)&vpi, sizeof(vpi)); -} - -/** - * iavf_request_stats - * @adapter: adapter structure - * - * Request VSI statistics from PF. - **/ -void iavf_request_stats(struct iavf_adapter *adapter) -{ - struct virtchnl_queue_select vqs; - - if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { - /* no error message, this isn't crucial */ - return; - } - adapter->current_op = VIRTCHNL_OP_GET_STATS; - vqs.vsi_id = adapter->vsi_res->vsi_id; - /* queue maps are ignored for this message - only the vsi is used */ - if (iavf_send_pf_msg(adapter, VIRTCHNL_OP_GET_STATS, (u8 *)&vqs, - sizeof(vqs))) - /* if the request failed, don't lock out others */ - adapter->current_op = VIRTCHNL_OP_UNKNOWN; -} - -/** - * iavf_get_hena - * @adapter: adapter structure - * - * Request hash enable capabilities from PF - **/ -void iavf_get_hena(struct iavf_adapter *adapter) -{ - if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { - /* bail because we already have a command pending */ - dev_err(&adapter->pdev->dev, "Cannot get RSS hash capabilities, command %d pending\n", - adapter->current_op); - return; - } - adapter->current_op = VIRTCHNL_OP_GET_RSS_HENA_CAPS; - adapter->aq_required &= ~IAVF_FLAG_AQ_GET_HENA; - iavf_send_pf_msg(adapter, VIRTCHNL_OP_GET_RSS_HENA_CAPS, NULL, 0); -} - -/** - * iavf_set_hena - * @adapter: adapter structure - * - * Request the PF to set our RSS hash capabilities - **/ -void iavf_set_hena(struct iavf_adapter *adapter) -{ - struct virtchnl_rss_hena vrh; - - if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { - /* bail because we already have a command pending */ - dev_err(&adapter->pdev->dev, "Cannot set RSS hash enable, command %d pending\n", - adapter->current_op); - return; - } - vrh.hena = adapter->hena; - adapter->current_op = VIRTCHNL_OP_SET_RSS_HENA; - adapter->aq_required &= ~IAVF_FLAG_AQ_SET_HENA; - iavf_send_pf_msg(adapter, VIRTCHNL_OP_SET_RSS_HENA, (u8 *)&vrh, - sizeof(vrh)); -} - -/** - * iavf_set_rss_key - * @adapter: adapter structure - * - * Request the PF to set our RSS hash key - **/ -void iavf_set_rss_key(struct iavf_adapter *adapter) -{ - struct virtchnl_rss_key *vrk; - int len; - - if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { - /* bail because we already have a command pending */ - dev_err(&adapter->pdev->dev, "Cannot set RSS key, command %d pending\n", - adapter->current_op); - return; - } - len = sizeof(struct virtchnl_rss_key) + - (adapter->rss_key_size * sizeof(u8)) - 1; - vrk = kzalloc(len, GFP_KERNEL); - if (!vrk) - return; - vrk->vsi_id = adapter->vsi.id; - vrk->key_len = adapter->rss_key_size; - memcpy(vrk->key, adapter->rss_key, adapter->rss_key_size); - - adapter->current_op = VIRTCHNL_OP_CONFIG_RSS_KEY; - adapter->aq_required &= ~IAVF_FLAG_AQ_SET_RSS_KEY; - iavf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_RSS_KEY, (u8 *)vrk, len); - kfree(vrk); -} - -/** - * iavf_set_rss_lut - * @adapter: adapter structure - * - * Request the PF to set our RSS lookup table - **/ -void iavf_set_rss_lut(struct iavf_adapter *adapter) -{ - struct virtchnl_rss_lut *vrl; - int len; - - if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { - /* bail because we already have a command pending */ - dev_err(&adapter->pdev->dev, "Cannot set RSS LUT, command %d pending\n", - adapter->current_op); - return; - } - len = sizeof(struct virtchnl_rss_lut) + - (adapter->rss_lut_size * sizeof(u8)) - 1; - vrl = kzalloc(len, GFP_KERNEL); - if (!vrl) - return; - vrl->vsi_id = adapter->vsi.id; - vrl->lut_entries = adapter->rss_lut_size; - memcpy(vrl->lut, adapter->rss_lut, adapter->rss_lut_size); - adapter->current_op = VIRTCHNL_OP_CONFIG_RSS_LUT; - adapter->aq_required &= ~IAVF_FLAG_AQ_SET_RSS_LUT; - iavf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_RSS_LUT, (u8 *)vrl, len); - kfree(vrl); -} - -/** - * iavf_enable_vlan_stripping - * @adapter: adapter structure - * - * Request VLAN header stripping to be enabled - **/ -void iavf_enable_vlan_stripping(struct iavf_adapter *adapter) -{ - if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { - /* bail because we already have a command pending */ - dev_err(&adapter->pdev->dev, "Cannot enable stripping, command %d pending\n", - adapter->current_op); - return; - } - adapter->current_op = VIRTCHNL_OP_ENABLE_VLAN_STRIPPING; - adapter->aq_required &= ~IAVF_FLAG_AQ_ENABLE_VLAN_STRIPPING; - iavf_send_pf_msg(adapter, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING, NULL, 0); -} - -/** - * iavf_disable_vlan_stripping - * @adapter: adapter structure - * - * Request VLAN header stripping to be disabled - **/ -void iavf_disable_vlan_stripping(struct iavf_adapter *adapter) -{ - if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { - /* bail because we already have a command pending */ - dev_err(&adapter->pdev->dev, "Cannot disable stripping, command %d pending\n", - adapter->current_op); - return; - } - adapter->current_op = VIRTCHNL_OP_DISABLE_VLAN_STRIPPING; - adapter->aq_required &= ~IAVF_FLAG_AQ_DISABLE_VLAN_STRIPPING; - iavf_send_pf_msg(adapter, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING, NULL, 0); -} - -/** - * iavf_print_link_message - print link up or down - * @adapter: adapter structure - * - * Log a message telling the world of our wonderous link status - */ -static void iavf_print_link_message(struct iavf_adapter *adapter) -{ - struct net_device *netdev = adapter->netdev; - char *speed = "Unknown "; - - if (!adapter->link_up) { - netdev_info(netdev, "NIC Link is Down\n"); - return; - } - - switch (adapter->link_speed) { - case I40E_LINK_SPEED_40GB: - speed = "40 G"; - break; - case I40E_LINK_SPEED_25GB: - speed = "25 G"; - break; - case I40E_LINK_SPEED_20GB: - speed = "20 G"; - break; - case I40E_LINK_SPEED_10GB: - speed = "10 G"; - break; - case I40E_LINK_SPEED_1GB: - speed = "1000 M"; - break; - case I40E_LINK_SPEED_100MB: - speed = "100 M"; - break; - default: - break; - } - - netdev_info(netdev, "NIC Link is Up %sbps Full Duplex\n", speed); -} - -/** - * iavf_enable_channel - * @adapter: adapter structure - * - * Request that the PF enable channels as specified by - * the user via tc tool. - **/ -void iavf_enable_channels(struct iavf_adapter *adapter) -{ - struct virtchnl_tc_info *vti = NULL; - u16 len; - int i; - - if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { - /* bail because we already have a command pending */ - dev_err(&adapter->pdev->dev, "Cannot configure mqprio, command %d pending\n", - adapter->current_op); - return; - } - - len = (adapter->num_tc * sizeof(struct virtchnl_channel_info)) + - sizeof(struct virtchnl_tc_info); - - vti = kzalloc(len, GFP_KERNEL); - if (!vti) - return; - vti->num_tc = adapter->num_tc; - for (i = 0; i < vti->num_tc; i++) { - vti->list[i].count = adapter->ch_config.ch_info[i].count; - vti->list[i].offset = adapter->ch_config.ch_info[i].offset; - vti->list[i].pad = 0; - vti->list[i].max_tx_rate = - adapter->ch_config.ch_info[i].max_tx_rate; - } - - adapter->ch_config.state = __IAVF_TC_RUNNING; - adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED; - adapter->current_op = VIRTCHNL_OP_ENABLE_CHANNELS; - adapter->aq_required &= ~IAVF_FLAG_AQ_ENABLE_CHANNELS; - iavf_send_pf_msg(adapter, VIRTCHNL_OP_ENABLE_CHANNELS, (u8 *)vti, len); - kfree(vti); -} - -/** - * iavf_disable_channel - * @adapter: adapter structure - * - * Request that the PF disable channels that are configured - **/ -void iavf_disable_channels(struct iavf_adapter *adapter) -{ - if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { - /* bail because we already have a command pending */ - dev_err(&adapter->pdev->dev, "Cannot configure mqprio, command %d pending\n", - adapter->current_op); - return; - } - - adapter->ch_config.state = __IAVF_TC_INVALID; - adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED; - adapter->current_op = VIRTCHNL_OP_DISABLE_CHANNELS; - adapter->aq_required &= ~IAVF_FLAG_AQ_DISABLE_CHANNELS; - iavf_send_pf_msg(adapter, VIRTCHNL_OP_DISABLE_CHANNELS, NULL, 0); -} - -/** - * iavf_print_cloud_filter - * @adapter: adapter structure - * @f: cloud filter to print - * - * Print the cloud filter - **/ -static void iavf_print_cloud_filter(struct iavf_adapter *adapter, - struct virtchnl_filter *f) -{ - switch (f->flow_type) { - case VIRTCHNL_TCP_V4_FLOW: - dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI4 src_ip %pI4 dst_port %hu src_port %hu\n", - &f->data.tcp_spec.dst_mac, - &f->data.tcp_spec.src_mac, - ntohs(f->data.tcp_spec.vlan_id), - &f->data.tcp_spec.dst_ip[0], - &f->data.tcp_spec.src_ip[0], - ntohs(f->data.tcp_spec.dst_port), - ntohs(f->data.tcp_spec.src_port)); - break; - case VIRTCHNL_TCP_V6_FLOW: - dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI6 src_ip %pI6 dst_port %hu src_port %hu\n", - &f->data.tcp_spec.dst_mac, - &f->data.tcp_spec.src_mac, - ntohs(f->data.tcp_spec.vlan_id), - &f->data.tcp_spec.dst_ip, - &f->data.tcp_spec.src_ip, - ntohs(f->data.tcp_spec.dst_port), - ntohs(f->data.tcp_spec.src_port)); - break; - } -} - -/** - * iavf_add_cloud_filter - * @adapter: adapter structure - * - * Request that the PF add cloud filters as specified - * by the user via tc tool. - **/ -void iavf_add_cloud_filter(struct iavf_adapter *adapter) -{ - struct iavf_cloud_filter *cf; - struct virtchnl_filter *f; - int len = 0, count = 0; - - if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { - /* bail because we already have a command pending */ - dev_err(&adapter->pdev->dev, "Cannot add cloud filter, command %d pending\n", - adapter->current_op); - return; - } - list_for_each_entry(cf, &adapter->cloud_filter_list, list) { - if (cf->add) { - count++; - break; - } - } - if (!count) { - adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_CLOUD_FILTER; - return; - } - adapter->current_op = VIRTCHNL_OP_ADD_CLOUD_FILTER; - - len = sizeof(struct virtchnl_filter); - f = kzalloc(len, GFP_KERNEL); - if (!f) - return; - - list_for_each_entry(cf, &adapter->cloud_filter_list, list) { - if (cf->add) { - memcpy(f, &cf->f, sizeof(struct virtchnl_filter)); - cf->add = false; - cf->state = __IAVF_CF_ADD_PENDING; - iavf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_CLOUD_FILTER, - (u8 *)f, len); - } - } - kfree(f); -} - -/** - * iavf_del_cloud_filter - * @adapter: adapter structure - * - * Request that the PF delete cloud filters as specified - * by the user via tc tool. - **/ -void iavf_del_cloud_filter(struct iavf_adapter *adapter) -{ - struct iavf_cloud_filter *cf, *cftmp; - struct virtchnl_filter *f; - int len = 0, count = 0; - - if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { - /* bail because we already have a command pending */ - dev_err(&adapter->pdev->dev, "Cannot remove cloud filter, command %d pending\n", - adapter->current_op); - return; - } - list_for_each_entry(cf, &adapter->cloud_filter_list, list) { - if (cf->del) { - count++; - break; - } - } - if (!count) { - adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_CLOUD_FILTER; - return; - } - adapter->current_op = VIRTCHNL_OP_DEL_CLOUD_FILTER; - - len = sizeof(struct virtchnl_filter); - f = kzalloc(len, GFP_KERNEL); - if (!f) - return; - - list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) { - if (cf->del) { - memcpy(f, &cf->f, sizeof(struct virtchnl_filter)); - cf->del = false; - cf->state = __IAVF_CF_DEL_PENDING; - iavf_send_pf_msg(adapter, VIRTCHNL_OP_DEL_CLOUD_FILTER, - (u8 *)f, len); - } - } - kfree(f); -} - -/** - * iavf_request_reset - * @adapter: adapter structure - * - * Request that the PF reset this VF. No response is expected. - **/ -void iavf_request_reset(struct iavf_adapter *adapter) -{ - /* Don't check CURRENT_OP - this is always higher priority */ - iavf_send_pf_msg(adapter, VIRTCHNL_OP_RESET_VF, NULL, 0); - adapter->current_op = VIRTCHNL_OP_UNKNOWN; -} - -/** - * iavf_virtchnl_completion - * @adapter: adapter structure - * @v_opcode: opcode sent by PF - * @v_retval: retval sent by PF - * @msg: message sent by PF - * @msglen: message length - * - * Asynchronous completion function for admin queue messages. Rather than busy - * wait, we fire off our requests and assume that no errors will be returned. - * This function handles the reply messages. - **/ -void iavf_virtchnl_completion(struct iavf_adapter *adapter, - enum virtchnl_ops v_opcode, iavf_status v_retval, - u8 *msg, u16 msglen) -{ - struct net_device *netdev = adapter->netdev; - - if (v_opcode == VIRTCHNL_OP_EVENT) { - struct virtchnl_pf_event *vpe = - (struct virtchnl_pf_event *)msg; - bool link_up = vpe->event_data.link_event.link_status; - - switch (vpe->event) { - case VIRTCHNL_EVENT_LINK_CHANGE: - adapter->link_speed = - vpe->event_data.link_event.link_speed; - - /* we've already got the right link status, bail */ - if (adapter->link_up == link_up) - break; - - if (link_up) { - /* If we get link up message and start queues - * before our queues are configured it will - * trigger a TX hang. In that case, just ignore - * the link status message,we'll get another one - * after we enable queues and actually prepared - * to send traffic. - */ - if (adapter->state != __IAVF_RUNNING) - break; - - /* For ADq enabled VF, we reconfigure VSIs and - * re-allocate queues. Hence wait till all - * queues are enabled. - */ - if (adapter->flags & - IAVF_FLAG_QUEUES_DISABLED) - break; - } - - adapter->link_up = link_up; - if (link_up) { - netif_tx_start_all_queues(netdev); - netif_carrier_on(netdev); - } else { - netif_tx_stop_all_queues(netdev); - netif_carrier_off(netdev); - } - iavf_print_link_message(adapter); - break; - case VIRTCHNL_EVENT_RESET_IMPENDING: - dev_info(&adapter->pdev->dev, "Reset warning received from the PF\n"); - if (!(adapter->flags & IAVF_FLAG_RESET_PENDING)) { - adapter->flags |= IAVF_FLAG_RESET_PENDING; - dev_info(&adapter->pdev->dev, "Scheduling reset task\n"); - schedule_work(&adapter->reset_task); - } - break; - default: - dev_err(&adapter->pdev->dev, "Unknown event %d from PF\n", - vpe->event); - break; - } - return; - } - if (v_retval) { - switch (v_opcode) { - case VIRTCHNL_OP_ADD_VLAN: - dev_err(&adapter->pdev->dev, "Failed to add VLAN filter, error %s\n", - iavf_stat_str(&adapter->hw, v_retval)); - break; - case VIRTCHNL_OP_ADD_ETH_ADDR: - dev_err(&adapter->pdev->dev, "Failed to add MAC filter, error %s\n", - iavf_stat_str(&adapter->hw, v_retval)); - break; - case VIRTCHNL_OP_DEL_VLAN: - dev_err(&adapter->pdev->dev, "Failed to delete VLAN filter, error %s\n", - iavf_stat_str(&adapter->hw, v_retval)); - break; - case VIRTCHNL_OP_DEL_ETH_ADDR: - dev_err(&adapter->pdev->dev, "Failed to delete MAC filter, error %s\n", - iavf_stat_str(&adapter->hw, v_retval)); - break; - case VIRTCHNL_OP_ENABLE_CHANNELS: - dev_err(&adapter->pdev->dev, "Failed to configure queue channels, error %s\n", - iavf_stat_str(&adapter->hw, v_retval)); - adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; - adapter->ch_config.state = __IAVF_TC_INVALID; - netdev_reset_tc(netdev); - netif_tx_start_all_queues(netdev); - break; - case VIRTCHNL_OP_DISABLE_CHANNELS: - dev_err(&adapter->pdev->dev, "Failed to disable queue channels, error %s\n", - iavf_stat_str(&adapter->hw, v_retval)); - adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; - adapter->ch_config.state = __IAVF_TC_RUNNING; - netif_tx_start_all_queues(netdev); - break; - case VIRTCHNL_OP_ADD_CLOUD_FILTER: { - struct iavf_cloud_filter *cf, *cftmp; - - list_for_each_entry_safe(cf, cftmp, - &adapter->cloud_filter_list, - list) { - if (cf->state == __IAVF_CF_ADD_PENDING) { - cf->state = __IAVF_CF_INVALID; - dev_info(&adapter->pdev->dev, "Failed to add cloud filter, error %s\n", - iavf_stat_str(&adapter->hw, - v_retval)); - iavf_print_cloud_filter(adapter, - &cf->f); - list_del(&cf->list); - kfree(cf); - adapter->num_cloud_filters--; - } - } - } - break; - case VIRTCHNL_OP_DEL_CLOUD_FILTER: { - struct iavf_cloud_filter *cf; - - list_for_each_entry(cf, &adapter->cloud_filter_list, - list) { - if (cf->state == __IAVF_CF_DEL_PENDING) { - cf->state = __IAVF_CF_ACTIVE; - dev_info(&adapter->pdev->dev, "Failed to del cloud filter, error %s\n", - iavf_stat_str(&adapter->hw, - v_retval)); - iavf_print_cloud_filter(adapter, - &cf->f); - } - } - } - break; - default: - dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n", - v_retval, iavf_stat_str(&adapter->hw, v_retval), - v_opcode); - } - } - switch (v_opcode) { - case VIRTCHNL_OP_GET_STATS: { - struct i40e_eth_stats *stats = - (struct i40e_eth_stats *)msg; - netdev->stats.rx_packets = stats->rx_unicast + - stats->rx_multicast + - stats->rx_broadcast; - netdev->stats.tx_packets = stats->tx_unicast + - stats->tx_multicast + - stats->tx_broadcast; - netdev->stats.rx_bytes = stats->rx_bytes; - netdev->stats.tx_bytes = stats->tx_bytes; - netdev->stats.tx_errors = stats->tx_errors; - netdev->stats.rx_dropped = stats->rx_discards; - netdev->stats.tx_dropped = stats->tx_discards; - adapter->current_stats = *stats; - } - break; - case VIRTCHNL_OP_GET_VF_RESOURCES: { - u16 len = sizeof(struct virtchnl_vf_resource) + - I40E_MAX_VF_VSI * - sizeof(struct virtchnl_vsi_resource); - memcpy(adapter->vf_res, msg, min(msglen, len)); - iavf_validate_num_queues(adapter); - iavf_vf_parse_hw_config(&adapter->hw, adapter->vf_res); - if (is_zero_ether_addr(adapter->hw.mac.addr)) { - /* restore current mac address */ - ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr); - } else { - /* refresh current mac address if changed */ - ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr); - ether_addr_copy(netdev->perm_addr, - adapter->hw.mac.addr); - } - iavf_process_config(adapter); - } - break; - case VIRTCHNL_OP_ENABLE_QUEUES: - /* enable transmits */ - iavf_irq_enable(adapter, true); - adapter->flags &= ~IAVF_FLAG_QUEUES_DISABLED; - break; - case VIRTCHNL_OP_DISABLE_QUEUES: - iavf_free_all_tx_resources(adapter); - iavf_free_all_rx_resources(adapter); - if (adapter->state == __IAVF_DOWN_PENDING) { - adapter->state = __IAVF_DOWN; - wake_up(&adapter->down_waitqueue); - } - break; - case VIRTCHNL_OP_VERSION: - case VIRTCHNL_OP_CONFIG_IRQ_MAP: - /* Don't display an error if we get these out of sequence. - * If the firmware needed to get kicked, we'll get these and - * it's no problem. - */ - if (v_opcode != adapter->current_op) - return; - break; - case VIRTCHNL_OP_IWARP: - /* Gobble zero-length replies from the PF. They indicate that - * a previous message was received OK, and the client doesn't - * care about that. - */ - if (msglen && CLIENT_ENABLED(adapter)) - iavf_notify_client_message(&adapter->vsi, msg, msglen); - break; - - case VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP: - adapter->client_pending &= - ~(BIT(VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP)); - break; - case VIRTCHNL_OP_GET_RSS_HENA_CAPS: { - struct virtchnl_rss_hena *vrh = (struct virtchnl_rss_hena *)msg; - - if (msglen == sizeof(*vrh)) - adapter->hena = vrh->hena; - else - dev_warn(&adapter->pdev->dev, - "Invalid message %d from PF\n", v_opcode); - } - break; - case VIRTCHNL_OP_REQUEST_QUEUES: { - struct virtchnl_vf_res_request *vfres = - (struct virtchnl_vf_res_request *)msg; - - if (vfres->num_queue_pairs != adapter->num_req_queues) { - dev_info(&adapter->pdev->dev, - "Requested %d queues, PF can support %d\n", - adapter->num_req_queues, - vfres->num_queue_pairs); - adapter->num_req_queues = 0; - adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; - } - } - break; - case VIRTCHNL_OP_ADD_CLOUD_FILTER: { - struct iavf_cloud_filter *cf; - - list_for_each_entry(cf, &adapter->cloud_filter_list, list) { - if (cf->state == __IAVF_CF_ADD_PENDING) - cf->state = __IAVF_CF_ACTIVE; - } - } - break; - case VIRTCHNL_OP_DEL_CLOUD_FILTER: { - struct iavf_cloud_filter *cf, *cftmp; - - list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, - list) { - if (cf->state == __IAVF_CF_DEL_PENDING) { - cf->state = __IAVF_CF_INVALID; - list_del(&cf->list); - kfree(cf); - adapter->num_cloud_filters--; - } - } - } - break; - default: - if (adapter->current_op && (v_opcode != adapter->current_op)) - dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n", - adapter->current_op, v_opcode); - break; - } /* switch v_opcode */ - adapter->current_op = VIRTCHNL_OP_UNKNOWN; -} diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h new file mode 100644 index 0000000000000..ab1f4f9f1a8fd --- /dev/null +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -0,0 +1,427 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ + +#ifndef _IAVF_H_ +#define _IAVF_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "i40e_type.h" +#include +#include "iavf_txrx.h" + +#define DEFAULT_DEBUG_LEVEL_SHIFT 3 +#define PFX "iavf: " + +/* VSI state flags shared with common code */ +enum iavf_vsi_state_t { + __I40E_VSI_DOWN, + /* This must be last as it determines the size of the BITMAP */ + __I40E_VSI_STATE_SIZE__, +}; + +/* dummy struct to make common code less painful */ +struct i40e_vsi { + struct iavf_adapter *back; + struct net_device *netdev; + unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; + u16 seid; + u16 id; + DECLARE_BITMAP(state, __I40E_VSI_STATE_SIZE__); + int base_vector; + u16 work_limit; + u16 qs_handle; + void *priv; /* client driver data reference. */ +}; + +/* How many Rx Buffers do we bundle into one write to the hardware ? */ +#define IAVF_RX_BUFFER_WRITE 16 /* Must be power of 2 */ +#define IAVF_DEFAULT_TXD 512 +#define IAVF_DEFAULT_RXD 512 +#define IAVF_MAX_TXD 4096 +#define IAVF_MIN_TXD 64 +#define IAVF_MAX_RXD 4096 +#define IAVF_MIN_RXD 64 +#define IAVF_REQ_DESCRIPTOR_MULTIPLE 32 +#define IAVF_MAX_AQ_BUF_SIZE 4096 +#define IAVF_AQ_LEN 32 +#define IAVF_AQ_MAX_ERR 20 /* times to try before resetting AQ */ + +#define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN) + +#define I40E_RX_DESC(R, i) (&(((union i40e_32byte_rx_desc *)((R)->desc))[i])) +#define I40E_TX_DESC(R, i) (&(((struct i40e_tx_desc *)((R)->desc))[i])) +#define I40E_TX_CTXTDESC(R, i) \ + (&(((struct i40e_tx_context_desc *)((R)->desc))[i])) +#define IAVF_MAX_REQ_QUEUES 4 + +#define IAVF_HKEY_ARRAY_SIZE ((I40E_VFQF_HKEY_MAX_INDEX + 1) * 4) +#define IAVF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT_MAX_INDEX + 1) * 4) +#define IAVF_MBPS_DIVISOR 125000 /* divisor to convert to Mbps */ + +/* MAX_MSIX_Q_VECTORS of these are allocated, + * but we only use one per queue-specific vector. + */ +struct i40e_q_vector { + struct iavf_adapter *adapter; + struct i40e_vsi *vsi; + struct napi_struct napi; + struct i40e_ring_container rx; + struct i40e_ring_container tx; + u32 ring_mask; + u8 itr_countdown; /* when 0 should adjust adaptive ITR */ + u8 num_ringpairs; /* total number of ring pairs in vector */ + u16 v_idx; /* index in the vsi->q_vector array. */ + u16 reg_idx; /* register index of the interrupt */ + char name[IFNAMSIZ + 15]; + bool arm_wb_state; + cpumask_t affinity_mask; + struct irq_affinity_notify affinity_notify; +}; + +/* Helper macros to switch between ints/sec and what the register uses. + * And yes, it's the same math going both ways. The lowest value + * supported by all of the i40e hardware is 8. + */ +#define EITR_INTS_PER_SEC_TO_REG(_eitr) \ + ((_eitr) ? (1000000000 / ((_eitr) * 256)) : 8) +#define EITR_REG_TO_INTS_PER_SEC EITR_INTS_PER_SEC_TO_REG + +#define IAVF_DESC_UNUSED(R) \ + ((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ + (R)->next_to_clean - (R)->next_to_use - 1) + +#define IAVF_RX_DESC_ADV(R, i) \ + (&(((union i40e_adv_rx_desc *)((R).desc))[i])) +#define IAVF_TX_DESC_ADV(R, i) \ + (&(((union i40e_adv_tx_desc *)((R).desc))[i])) +#define IAVF_TX_CTXTDESC_ADV(R, i) \ + (&(((struct i40e_adv_tx_context_desc *)((R).desc))[i])) + +#define OTHER_VECTOR 1 +#define NONQ_VECS (OTHER_VECTOR) + +#define MIN_MSIX_Q_VECTORS 1 +#define MIN_MSIX_COUNT (MIN_MSIX_Q_VECTORS + NONQ_VECS) + +#define IAVF_QUEUE_END_OF_LIST 0x7FF +#define IAVF_FREE_VECTOR 0x7FFF +struct iavf_mac_filter { + struct list_head list; + u8 macaddr[ETH_ALEN]; + bool remove; /* filter needs to be removed */ + bool add; /* filter needs to be added */ +}; + +struct iavf_vlan_filter { + struct list_head list; + u16 vlan; + bool remove; /* filter needs to be removed */ + bool add; /* filter needs to be added */ +}; + +#define IAVF_MAX_TRAFFIC_CLASS 4 +/* State of traffic class creation */ +enum iavf_tc_state_t { + __IAVF_TC_INVALID, /* no traffic class, default state */ + __IAVF_TC_RUNNING, /* traffic classes have been created */ +}; + +/* channel info */ +struct iavf_channel_config { + struct virtchnl_channel_info ch_info[IAVF_MAX_TRAFFIC_CLASS]; + enum iavf_tc_state_t state; + u8 total_qps; +}; + +/* State of cloud filter */ +enum iavf_cloud_filter_state_t { + __IAVF_CF_INVALID, /* cloud filter not added */ + __IAVF_CF_ADD_PENDING, /* cloud filter pending add by the PF */ + __IAVF_CF_DEL_PENDING, /* cloud filter pending del by the PF */ + __IAVF_CF_ACTIVE, /* cloud filter is active */ +}; + +/* Driver state. The order of these is important! */ +enum iavf_state_t { + __IAVF_STARTUP, /* driver loaded, probe complete */ + __IAVF_REMOVE, /* driver is being unloaded */ + __IAVF_INIT_VERSION_CHECK, /* aq msg sent, awaiting reply */ + __IAVF_INIT_GET_RESOURCES, /* aq msg sent, awaiting reply */ + __IAVF_INIT_SW, /* got resources, setting up structs */ + __IAVF_RESETTING, /* in reset */ + /* Below here, watchdog is running */ + __IAVF_DOWN, /* ready, can be opened */ + __IAVF_DOWN_PENDING, /* descending, waiting for watchdog */ + __IAVF_TESTING, /* in ethtool self-test */ + __IAVF_RUNNING, /* opened, working */ +}; + +enum iavf_critical_section_t { + __IAVF_IN_CRITICAL_TASK, /* cannot be interrupted */ + __IAVF_IN_CLIENT_TASK, + __IAVF_IN_REMOVE_TASK, /* device being removed */ +}; + +#define IAVF_CLOUD_FIELD_OMAC 0x01 +#define IAVF_CLOUD_FIELD_IMAC 0x02 +#define IAVF_CLOUD_FIELD_IVLAN 0x04 +#define IAVF_CLOUD_FIELD_TEN_ID 0x08 +#define IAVF_CLOUD_FIELD_IIP 0x10 + +#define IAVF_CF_FLAGS_OMAC IAVF_CLOUD_FIELD_OMAC +#define IAVF_CF_FLAGS_IMAC IAVF_CLOUD_FIELD_IMAC +#define IAVF_CF_FLAGS_IMAC_IVLAN (IAVF_CLOUD_FIELD_IMAC |\ + IAVF_CLOUD_FIELD_IVLAN) +#define IAVF_CF_FLAGS_IMAC_TEN_ID (IAVF_CLOUD_FIELD_IMAC |\ + IAVF_CLOUD_FIELD_TEN_ID) +#define IAVF_CF_FLAGS_OMAC_TEN_ID_IMAC (IAVF_CLOUD_FIELD_OMAC |\ + IAVF_CLOUD_FIELD_IMAC |\ + IAVF_CLOUD_FIELD_TEN_ID) +#define IAVF_CF_FLAGS_IMAC_IVLAN_TEN_ID (IAVF_CLOUD_FIELD_IMAC |\ + IAVF_CLOUD_FIELD_IVLAN |\ + IAVF_CLOUD_FIELD_TEN_ID) +#define IAVF_CF_FLAGS_IIP I40E_CLOUD_FIELD_IIP + +/* bookkeeping of cloud filters */ +struct iavf_cloud_filter { + enum iavf_cloud_filter_state_t state; + struct list_head list; + struct virtchnl_filter f; + unsigned long cookie; + bool del; /* filter needs to be deleted */ + bool add; /* filter needs to be added */ +}; + +/* board specific private data structure */ +struct iavf_adapter { + struct timer_list watchdog_timer; + struct work_struct reset_task; + struct work_struct adminq_task; + struct delayed_work client_task; + struct delayed_work init_task; + wait_queue_head_t down_waitqueue; + struct i40e_q_vector *q_vectors; + struct list_head vlan_filter_list; + struct list_head mac_filter_list; + /* Lock to protect accesses to MAC and VLAN lists */ + spinlock_t mac_vlan_list_lock; + char misc_vector_name[IFNAMSIZ + 9]; + int num_active_queues; + int num_req_queues; + + /* TX */ + struct i40e_ring *tx_rings; + u32 tx_timeout_count; + u32 tx_desc_count; + + /* RX */ + struct i40e_ring *rx_rings; + u64 hw_csum_rx_error; + u32 rx_desc_count; + int num_msix_vectors; + int num_iwarp_msix; + int iwarp_base_vector; + u32 client_pending; + struct i40e_client_instance *cinst; + struct msix_entry *msix_entries; + + u32 flags; +#define IAVF_FLAG_RX_CSUM_ENABLED BIT(0) +#define IAVF_FLAG_PF_COMMS_FAILED BIT(3) +#define IAVF_FLAG_RESET_PENDING BIT(4) +#define IAVF_FLAG_RESET_NEEDED BIT(5) +#define IAVF_FLAG_WB_ON_ITR_CAPABLE BIT(6) +#define IAVF_FLAG_ADDR_SET_BY_PF BIT(8) +#define IAVF_FLAG_SERVICE_CLIENT_REQUESTED BIT(9) +#define IAVF_FLAG_CLIENT_NEEDS_OPEN BIT(10) +#define IAVF_FLAG_CLIENT_NEEDS_CLOSE BIT(11) +#define IAVF_FLAG_CLIENT_NEEDS_L2_PARAMS BIT(12) +#define IAVF_FLAG_PROMISC_ON BIT(13) +#define IAVF_FLAG_ALLMULTI_ON BIT(14) +#define IAVF_FLAG_LEGACY_RX BIT(15) +#define IAVF_FLAG_REINIT_ITR_NEEDED BIT(16) +#define IAVF_FLAG_QUEUES_DISABLED BIT(17) +/* duplicates for common code */ +#define I40E_FLAG_DCB_ENABLED 0 +#define I40E_FLAG_RX_CSUM_ENABLED IAVF_FLAG_RX_CSUM_ENABLED +#define I40E_FLAG_LEGACY_RX IAVF_FLAG_LEGACY_RX + /* flags for admin queue service task */ + u32 aq_required; +#define IAVF_FLAG_AQ_ENABLE_QUEUES BIT(0) +#define IAVF_FLAG_AQ_DISABLE_QUEUES BIT(1) +#define IAVF_FLAG_AQ_ADD_MAC_FILTER BIT(2) +#define IAVF_FLAG_AQ_ADD_VLAN_FILTER BIT(3) +#define IAVF_FLAG_AQ_DEL_MAC_FILTER BIT(4) +#define IAVF_FLAG_AQ_DEL_VLAN_FILTER BIT(5) +#define IAVF_FLAG_AQ_CONFIGURE_QUEUES BIT(6) +#define IAVF_FLAG_AQ_MAP_VECTORS BIT(7) +#define IAVF_FLAG_AQ_HANDLE_RESET BIT(8) +#define IAVF_FLAG_AQ_CONFIGURE_RSS BIT(9) /* direct AQ config */ +#define IAVF_FLAG_AQ_GET_CONFIG BIT(10) +/* Newer style, RSS done by the PF so we can ignore hardware vagaries. */ +#define IAVF_FLAG_AQ_GET_HENA BIT(11) +#define IAVF_FLAG_AQ_SET_HENA BIT(12) +#define IAVF_FLAG_AQ_SET_RSS_KEY BIT(13) +#define IAVF_FLAG_AQ_SET_RSS_LUT BIT(14) +#define IAVF_FLAG_AQ_REQUEST_PROMISC BIT(15) +#define IAVF_FLAG_AQ_RELEASE_PROMISC BIT(16) +#define IAVF_FLAG_AQ_REQUEST_ALLMULTI BIT(17) +#define IAVF_FLAG_AQ_RELEASE_ALLMULTI BIT(18) +#define IAVF_FLAG_AQ_ENABLE_VLAN_STRIPPING BIT(19) +#define IAVF_FLAG_AQ_DISABLE_VLAN_STRIPPING BIT(20) +#define IAVF_FLAG_AQ_ENABLE_CHANNELS BIT(21) +#define IAVF_FLAG_AQ_DISABLE_CHANNELS BIT(22) +#define IAVF_FLAG_AQ_ADD_CLOUD_FILTER BIT(23) +#define IAVF_FLAG_AQ_DEL_CLOUD_FILTER BIT(24) + + /* OS defined structs */ + struct net_device *netdev; + struct pci_dev *pdev; + + struct i40e_hw hw; /* defined in i40e_type.h */ + + enum iavf_state_t state; + unsigned long crit_section; + + struct work_struct watchdog_task; + bool netdev_registered; + bool link_up; + enum virtchnl_link_speed link_speed; + enum virtchnl_ops current_op; +#define CLIENT_ALLOWED(_a) ((_a)->vf_res ? \ + (_a)->vf_res->vf_cap_flags & \ + VIRTCHNL_VF_OFFLOAD_IWARP : \ + 0) +#define CLIENT_ENABLED(_a) ((_a)->cinst) +/* RSS by the PF should be preferred over RSS via other methods. */ +#define RSS_PF(_a) ((_a)->vf_res->vf_cap_flags & \ + VIRTCHNL_VF_OFFLOAD_RSS_PF) +#define RSS_AQ(_a) ((_a)->vf_res->vf_cap_flags & \ + VIRTCHNL_VF_OFFLOAD_RSS_AQ) +#define RSS_REG(_a) (!((_a)->vf_res->vf_cap_flags & \ + (VIRTCHNL_VF_OFFLOAD_RSS_AQ | \ + VIRTCHNL_VF_OFFLOAD_RSS_PF))) +#define VLAN_ALLOWED(_a) ((_a)->vf_res->vf_cap_flags & \ + VIRTCHNL_VF_OFFLOAD_VLAN) + struct virtchnl_vf_resource *vf_res; /* incl. all VSIs */ + struct virtchnl_vsi_resource *vsi_res; /* our LAN VSI */ + struct virtchnl_version_info pf_version; +#define PF_IS_V11(_a) (((_a)->pf_version.major == 1) && \ + ((_a)->pf_version.minor == 1)) + u16 msg_enable; + struct i40e_eth_stats current_stats; + struct i40e_vsi vsi; + u32 aq_wait_count; + /* RSS stuff */ + u64 hena; + u16 rss_key_size; + u16 rss_lut_size; + u8 *rss_key; + u8 *rss_lut; + /* ADQ related members */ + struct iavf_channel_config ch_config; + u8 num_tc; + struct list_head cloud_filter_list; + /* lock to protest access to the cloud filter list */ + spinlock_t cloud_filter_list_lock; + u16 num_cloud_filters; +}; + + +/* Ethtool Private Flags */ + +/* lan device */ +struct i40e_device { + struct list_head list; + struct iavf_adapter *vf; +}; + +/* needed by iavf_ethtool.c */ +extern char iavf_driver_name[]; +extern const char iavf_driver_version[]; + +int iavf_up(struct iavf_adapter *adapter); +void iavf_down(struct iavf_adapter *adapter); +int iavf_process_config(struct iavf_adapter *adapter); +void iavf_schedule_reset(struct iavf_adapter *adapter); +void iavf_reset(struct iavf_adapter *adapter); +void iavf_set_ethtool_ops(struct net_device *netdev); +void iavf_update_stats(struct iavf_adapter *adapter); +void iavf_reset_interrupt_capability(struct iavf_adapter *adapter); +int iavf_init_interrupt_scheme(struct iavf_adapter *adapter); +void iavf_irq_enable_queues(struct iavf_adapter *adapter, u32 mask); +void iavf_free_all_tx_resources(struct iavf_adapter *adapter); +void iavf_free_all_rx_resources(struct iavf_adapter *adapter); + +void i40e_napi_add_all(struct iavf_adapter *adapter); +void i40e_napi_del_all(struct iavf_adapter *adapter); + +int iavf_send_api_ver(struct iavf_adapter *adapter); +int iavf_verify_api_ver(struct iavf_adapter *adapter); +int iavf_send_vf_config_msg(struct iavf_adapter *adapter); +int iavf_get_vf_config(struct iavf_adapter *adapter); +void iavf_irq_enable(struct iavf_adapter *adapter, bool flush); +void iavf_configure_queues(struct iavf_adapter *adapter); +void iavf_deconfigure_queues(struct iavf_adapter *adapter); +void iavf_enable_queues(struct iavf_adapter *adapter); +void iavf_disable_queues(struct iavf_adapter *adapter); +void iavf_map_queues(struct iavf_adapter *adapter); +int iavf_request_queues(struct iavf_adapter *adapter, int num); +void iavf_add_ether_addrs(struct iavf_adapter *adapter); +void iavf_del_ether_addrs(struct iavf_adapter *adapter); +void iavf_add_vlans(struct iavf_adapter *adapter); +void iavf_del_vlans(struct iavf_adapter *adapter); +void iavf_set_promiscuous(struct iavf_adapter *adapter, int flags); +void iavf_request_stats(struct iavf_adapter *adapter); +void iavf_request_reset(struct iavf_adapter *adapter); +void iavf_get_hena(struct iavf_adapter *adapter); +void iavf_set_hena(struct iavf_adapter *adapter); +void iavf_set_rss_key(struct iavf_adapter *adapter); +void iavf_set_rss_lut(struct iavf_adapter *adapter); +void iavf_enable_vlan_stripping(struct iavf_adapter *adapter); +void iavf_disable_vlan_stripping(struct iavf_adapter *adapter); +void iavf_virtchnl_completion(struct iavf_adapter *adapter, + enum virtchnl_ops v_opcode, + iavf_status v_retval, u8 *msg, u16 msglen); +int iavf_config_rss(struct iavf_adapter *adapter); +int iavf_lan_add_device(struct iavf_adapter *adapter); +int iavf_lan_del_device(struct iavf_adapter *adapter); +void iavf_client_subtask(struct iavf_adapter *adapter); +void iavf_notify_client_message(struct i40e_vsi *vsi, u8 *msg, u16 len); +void iavf_notify_client_l2_params(struct i40e_vsi *vsi); +void iavf_notify_client_open(struct i40e_vsi *vsi); +void iavf_notify_client_close(struct i40e_vsi *vsi, bool reset); +void iavf_enable_channels(struct iavf_adapter *adapter); +void iavf_disable_channels(struct iavf_adapter *adapter); +void iavf_add_cloud_filter(struct iavf_adapter *adapter); +void iavf_del_cloud_filter(struct iavf_adapter *adapter); +#endif /* _IAVF_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_client.c b/drivers/net/ethernet/intel/iavf/iavf_client.c new file mode 100644 index 0000000000000..500c090678221 --- /dev/null +++ b/drivers/net/ethernet/intel/iavf/iavf_client.c @@ -0,0 +1,579 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2013 - 2018 Intel Corporation. */ + +#include +#include + +#include "iavf.h" +#include "i40e_prototype.h" +#include "iavf_client.h" + +static +const char iavf_client_interface_version_str[] = IAVF_CLIENT_VERSION_STR; +static struct i40e_client *vf_registered_client; +static LIST_HEAD(iavf_devices); +static DEFINE_MUTEX(iavf_device_mutex); + +static u32 iavf_client_virtchnl_send(struct i40e_info *ldev, + struct i40e_client *client, + u8 *msg, u16 len); + +static int iavf_client_setup_qvlist(struct i40e_info *ldev, + struct i40e_client *client, + struct i40e_qvlist_info *qvlist_info); + +static struct i40e_ops iavf_lan_ops = { + .virtchnl_send = iavf_client_virtchnl_send, + .setup_qvlist = iavf_client_setup_qvlist, +}; + +/** + * iavf_client_get_params - retrieve relevant client parameters + * @vsi: VSI with parameters + * @params: client param struct + **/ +static +void iavf_client_get_params(struct i40e_vsi *vsi, struct i40e_params *params) +{ + int i; + + memset(params, 0, sizeof(struct i40e_params)); + params->mtu = vsi->netdev->mtu; + params->link_up = vsi->back->link_up; + + for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) { + params->qos.prio_qos[i].tc = 0; + params->qos.prio_qos[i].qs_handle = vsi->qs_handle; + } +} + +/** + * iavf_notify_client_message - call the client message receive callback + * @vsi: the VSI associated with this client + * @msg: message buffer + * @len: length of message + * + * If there is a client to this VSI, call the client + **/ +void iavf_notify_client_message(struct i40e_vsi *vsi, u8 *msg, u16 len) +{ + struct i40e_client_instance *cinst; + + if (!vsi) + return; + + cinst = vsi->back->cinst; + if (!cinst || !cinst->client || !cinst->client->ops || + !cinst->client->ops->virtchnl_receive) { + dev_dbg(&vsi->back->pdev->dev, + "Cannot locate client instance virtchnl_receive function\n"); + return; + } + cinst->client->ops->virtchnl_receive(&cinst->lan_info, cinst->client, + msg, len); +} + +/** + * iavf_notify_client_l2_params - call the client notify callback + * @vsi: the VSI with l2 param changes + * + * If there is a client to this VSI, call the client + **/ +void iavf_notify_client_l2_params(struct i40e_vsi *vsi) +{ + struct i40e_client_instance *cinst; + struct i40e_params params; + + if (!vsi) + return; + + cinst = vsi->back->cinst; + + if (!cinst || !cinst->client || !cinst->client->ops || + !cinst->client->ops->l2_param_change) { + dev_dbg(&vsi->back->pdev->dev, + "Cannot locate client instance l2_param_change function\n"); + return; + } + iavf_client_get_params(vsi, ¶ms); + cinst->lan_info.params = params; + cinst->client->ops->l2_param_change(&cinst->lan_info, cinst->client, + ¶ms); +} + +/** + * iavf_notify_client_open - call the client open callback + * @vsi: the VSI with netdev opened + * + * If there is a client to this netdev, call the client with open + **/ +void iavf_notify_client_open(struct i40e_vsi *vsi) +{ + struct iavf_adapter *adapter = vsi->back; + struct i40e_client_instance *cinst = adapter->cinst; + int ret; + + if (!cinst || !cinst->client || !cinst->client->ops || + !cinst->client->ops->open) { + dev_dbg(&vsi->back->pdev->dev, + "Cannot locate client instance open function\n"); + return; + } + if (!(test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state))) { + ret = cinst->client->ops->open(&cinst->lan_info, cinst->client); + if (!ret) + set_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state); + } +} + +/** + * iavf_client_release_qvlist - send a message to the PF to release iwarp qv map + * @ldev: pointer to L2 context. + * + * Return 0 on success or < 0 on error + **/ +static int iavf_client_release_qvlist(struct i40e_info *ldev) +{ + struct iavf_adapter *adapter = ldev->vf; + iavf_status err; + + if (adapter->aq_required) + return -EAGAIN; + + err = iavf_aq_send_msg_to_pf(&adapter->hw, + VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP, + I40E_SUCCESS, NULL, 0, NULL); + + if (err) + dev_err(&adapter->pdev->dev, + "Unable to send iWarp vector release message to PF, error %d, aq status %d\n", + err, adapter->hw.aq.asq_last_status); + + return err; +} + +/** + * iavf_notify_client_close - call the client close callback + * @vsi: the VSI with netdev closed + * @reset: true when close called due to reset pending + * + * If there is a client to this netdev, call the client with close + **/ +void iavf_notify_client_close(struct i40e_vsi *vsi, bool reset) +{ + struct iavf_adapter *adapter = vsi->back; + struct i40e_client_instance *cinst = adapter->cinst; + + if (!cinst || !cinst->client || !cinst->client->ops || + !cinst->client->ops->close) { + dev_dbg(&vsi->back->pdev->dev, + "Cannot locate client instance close function\n"); + return; + } + cinst->client->ops->close(&cinst->lan_info, cinst->client, reset); + iavf_client_release_qvlist(&cinst->lan_info); + clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state); +} + +/** + * iavf_client_add_instance - add a client instance to the instance list + * @adapter: pointer to the board struct + * + * Returns cinst ptr on success, NULL on failure + **/ +static struct i40e_client_instance * +iavf_client_add_instance(struct iavf_adapter *adapter) +{ + struct i40e_client_instance *cinst = NULL; + struct i40e_vsi *vsi = &adapter->vsi; + struct netdev_hw_addr *mac = NULL; + struct i40e_params params; + + if (!vf_registered_client) + goto out; + + if (adapter->cinst) { + cinst = adapter->cinst; + goto out; + } + + cinst = kzalloc(sizeof(*cinst), GFP_KERNEL); + if (!cinst) + goto out; + + cinst->lan_info.vf = (void *)adapter; + cinst->lan_info.netdev = vsi->netdev; + cinst->lan_info.pcidev = adapter->pdev; + cinst->lan_info.fid = 0; + cinst->lan_info.ftype = I40E_CLIENT_FTYPE_VF; + cinst->lan_info.hw_addr = adapter->hw.hw_addr; + cinst->lan_info.ops = &iavf_lan_ops; + cinst->lan_info.version.major = IAVF_CLIENT_VERSION_MAJOR; + cinst->lan_info.version.minor = IAVF_CLIENT_VERSION_MINOR; + cinst->lan_info.version.build = IAVF_CLIENT_VERSION_BUILD; + iavf_client_get_params(vsi, ¶ms); + cinst->lan_info.params = params; + set_bit(__I40E_CLIENT_INSTANCE_NONE, &cinst->state); + + cinst->lan_info.msix_count = adapter->num_iwarp_msix; + cinst->lan_info.msix_entries = + &adapter->msix_entries[adapter->iwarp_base_vector]; + + mac = list_first_entry(&cinst->lan_info.netdev->dev_addrs.list, + struct netdev_hw_addr, list); + if (mac) + ether_addr_copy(cinst->lan_info.lanmac, mac->addr); + else + dev_err(&adapter->pdev->dev, "MAC address list is empty!\n"); + + cinst->client = vf_registered_client; + adapter->cinst = cinst; +out: + return cinst; +} + +/** + * iavf_client_del_instance - removes a client instance from the list + * @adapter: pointer to the board struct + * + **/ +static +void iavf_client_del_instance(struct iavf_adapter *adapter) +{ + kfree(adapter->cinst); + adapter->cinst = NULL; +} + +/** + * iavf_client_subtask - client maintenance work + * @adapter: board private structure + **/ +void iavf_client_subtask(struct iavf_adapter *adapter) +{ + struct i40e_client *client = vf_registered_client; + struct i40e_client_instance *cinst; + int ret = 0; + + if (adapter->state < __IAVF_DOWN) + return; + + /* first check client is registered */ + if (!client) + return; + + /* Add the client instance to the instance list */ + cinst = iavf_client_add_instance(adapter); + if (!cinst) + return; + + dev_info(&adapter->pdev->dev, "Added instance of Client %s\n", + client->name); + + if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state)) { + /* Send an Open request to the client */ + + if (client->ops && client->ops->open) + ret = client->ops->open(&cinst->lan_info, client); + if (!ret) + set_bit(__I40E_CLIENT_INSTANCE_OPENED, + &cinst->state); + else + /* remove client instance */ + iavf_client_del_instance(adapter); + } +} + +/** + * iavf_lan_add_device - add a lan device struct to the list of lan devices + * @adapter: pointer to the board struct + * + * Returns 0 on success or none 0 on error + **/ +int iavf_lan_add_device(struct iavf_adapter *adapter) +{ + struct i40e_device *ldev; + int ret = 0; + + mutex_lock(&iavf_device_mutex); + list_for_each_entry(ldev, &iavf_devices, list) { + if (ldev->vf == adapter) { + ret = -EEXIST; + goto out; + } + } + ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); + if (!ldev) { + ret = -ENOMEM; + goto out; + } + ldev->vf = adapter; + INIT_LIST_HEAD(&ldev->list); + list_add(&ldev->list, &iavf_devices); + dev_info(&adapter->pdev->dev, "Added LAN device bus=0x%02x dev=0x%02x func=0x%02x\n", + adapter->hw.bus.bus_id, adapter->hw.bus.device, + adapter->hw.bus.func); + + /* Since in some cases register may have happened before a device gets + * added, we can schedule a subtask to go initiate the clients. + */ + adapter->flags |= IAVF_FLAG_SERVICE_CLIENT_REQUESTED; + +out: + mutex_unlock(&iavf_device_mutex); + return ret; +} + +/** + * iavf_lan_del_device - removes a lan device from the device list + * @adapter: pointer to the board struct + * + * Returns 0 on success or non-0 on error + **/ +int iavf_lan_del_device(struct iavf_adapter *adapter) +{ + struct i40e_device *ldev, *tmp; + int ret = -ENODEV; + + mutex_lock(&iavf_device_mutex); + list_for_each_entry_safe(ldev, tmp, &iavf_devices, list) { + if (ldev->vf == adapter) { + dev_info(&adapter->pdev->dev, + "Deleted LAN device bus=0x%02x dev=0x%02x func=0x%02x\n", + adapter->hw.bus.bus_id, adapter->hw.bus.device, + adapter->hw.bus.func); + list_del(&ldev->list); + kfree(ldev); + ret = 0; + break; + } + } + + mutex_unlock(&iavf_device_mutex); + return ret; +} + +/** + * iavf_client_release - release client specific resources + * @client: pointer to the registered client + * + **/ +static void iavf_client_release(struct i40e_client *client) +{ + struct i40e_client_instance *cinst; + struct i40e_device *ldev; + struct iavf_adapter *adapter; + + mutex_lock(&iavf_device_mutex); + list_for_each_entry(ldev, &iavf_devices, list) { + adapter = ldev->vf; + cinst = adapter->cinst; + if (!cinst) + continue; + if (test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state)) { + if (client->ops && client->ops->close) + client->ops->close(&cinst->lan_info, client, + false); + iavf_client_release_qvlist(&cinst->lan_info); + clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state); + + dev_warn(&adapter->pdev->dev, + "Client %s instance closed\n", client->name); + } + /* delete the client instance */ + iavf_client_del_instance(adapter); + dev_info(&adapter->pdev->dev, "Deleted client instance of Client %s\n", + client->name); + } + mutex_unlock(&iavf_device_mutex); +} + +/** + * iavf_client_prepare - prepare client specific resources + * @client: pointer to the registered client + * + **/ +static void iavf_client_prepare(struct i40e_client *client) +{ + struct i40e_device *ldev; + struct iavf_adapter *adapter; + + mutex_lock(&iavf_device_mutex); + list_for_each_entry(ldev, &iavf_devices, list) { + adapter = ldev->vf; + /* Signal the watchdog to service the client */ + adapter->flags |= IAVF_FLAG_SERVICE_CLIENT_REQUESTED; + } + mutex_unlock(&iavf_device_mutex); +} + +/** + * iavf_client_virtchnl_send - send a message to the PF instance + * @ldev: pointer to L2 context. + * @client: Client pointer. + * @msg: pointer to message buffer + * @len: message length + * + * Return 0 on success or < 0 on error + **/ +static u32 iavf_client_virtchnl_send(struct i40e_info *ldev, + struct i40e_client *client, + u8 *msg, u16 len) +{ + struct iavf_adapter *adapter = ldev->vf; + iavf_status err; + + if (adapter->aq_required) + return -EAGAIN; + + err = iavf_aq_send_msg_to_pf(&adapter->hw, VIRTCHNL_OP_IWARP, + I40E_SUCCESS, msg, len, NULL); + if (err) + dev_err(&adapter->pdev->dev, "Unable to send iWarp message to PF, error %d, aq status %d\n", + err, adapter->hw.aq.asq_last_status); + + return err; +} + +/** + * iavf_client_setup_qvlist - send a message to the PF to setup iwarp qv map + * @ldev: pointer to L2 context. + * @client: Client pointer. + * @qvlist_info: queue and vector list + * + * Return 0 on success or < 0 on error + **/ +static int iavf_client_setup_qvlist(struct i40e_info *ldev, + struct i40e_client *client, + struct i40e_qvlist_info *qvlist_info) +{ + struct virtchnl_iwarp_qvlist_info *v_qvlist_info; + struct iavf_adapter *adapter = ldev->vf; + struct i40e_qv_info *qv_info; + iavf_status err; + u32 v_idx, i; + u32 msg_size; + + if (adapter->aq_required) + return -EAGAIN; + + /* A quick check on whether the vectors belong to the client */ + for (i = 0; i < qvlist_info->num_vectors; i++) { + qv_info = &qvlist_info->qv_info[i]; + if (!qv_info) + continue; + v_idx = qv_info->v_idx; + if ((v_idx >= + (adapter->iwarp_base_vector + adapter->num_iwarp_msix)) || + (v_idx < adapter->iwarp_base_vector)) + return -EINVAL; + } + + v_qvlist_info = (struct virtchnl_iwarp_qvlist_info *)qvlist_info; + msg_size = sizeof(struct virtchnl_iwarp_qvlist_info) + + (sizeof(struct virtchnl_iwarp_qv_info) * + (v_qvlist_info->num_vectors - 1)); + + adapter->client_pending |= BIT(VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP); + err = iavf_aq_send_msg_to_pf(&adapter->hw, + VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP, I40E_SUCCESS, + (u8 *)v_qvlist_info, msg_size, NULL); + + if (err) { + dev_err(&adapter->pdev->dev, + "Unable to send iWarp vector config message to PF, error %d, aq status %d\n", + err, adapter->hw.aq.asq_last_status); + goto out; + } + + err = -EBUSY; + for (i = 0; i < 5; i++) { + msleep(100); + if (!(adapter->client_pending & + BIT(VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP))) { + err = 0; + break; + } + } +out: + return err; +} + +/** + * iavf_register_client - Register a i40e client driver with the L2 driver + * @client: pointer to the i40e_client struct + * + * Returns 0 on success or non-0 on error + **/ +int iavf_register_client(struct i40e_client *client) +{ + int ret = 0; + + if (!client) { + ret = -EIO; + goto out; + } + + if (strlen(client->name) == 0) { + pr_info("iavf: Failed to register client with no name\n"); + ret = -EIO; + goto out; + } + + if (vf_registered_client) { + pr_info("iavf: Client %s has already been registered!\n", + client->name); + ret = -EEXIST; + goto out; + } + + if ((client->version.major != IAVF_CLIENT_VERSION_MAJOR) || + (client->version.minor != IAVF_CLIENT_VERSION_MINOR)) { + pr_info("iavf: Failed to register client %s due to mismatched client interface version\n", + client->name); + pr_info("Client is using version: %02d.%02d.%02d while LAN driver supports %s\n", + client->version.major, client->version.minor, + client->version.build, + iavf_client_interface_version_str); + ret = -EIO; + goto out; + } + + vf_registered_client = client; + + iavf_client_prepare(client); + + pr_info("iavf: Registered client %s with return code %d\n", + client->name, ret); +out: + return ret; +} +EXPORT_SYMBOL(iavf_register_client); + +/** + * iavf_unregister_client - Unregister a i40e client driver with the L2 driver + * @client: pointer to the i40e_client struct + * + * Returns 0 on success or non-0 on error + **/ +int iavf_unregister_client(struct i40e_client *client) +{ + int ret = 0; + + /* When a unregister request comes through we would have to send + * a close for each of the client instances that were opened. + * client_release function is called to handle this. + */ + iavf_client_release(client); + + if (vf_registered_client != client) { + pr_info("iavf: Client %s has not been registered\n", + client->name); + ret = -ENODEV; + goto out; + } + vf_registered_client = NULL; + pr_info("iavf: Unregistered client %s\n", client->name); +out: + return ret; +} +EXPORT_SYMBOL(iavf_unregister_client); diff --git a/drivers/net/ethernet/intel/iavf/iavf_client.h b/drivers/net/ethernet/intel/iavf/iavf_client.h new file mode 100644 index 0000000000000..e216fc9dfd81a --- /dev/null +++ b/drivers/net/ethernet/intel/iavf/iavf_client.h @@ -0,0 +1,169 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ + +#ifndef _IAVF_CLIENT_H_ +#define _IAVF_CLIENT_H_ + +#define IAVF_CLIENT_STR_LENGTH 10 + +/* Client interface version should be updated anytime there is a change in the + * existing APIs or data structures. + */ +#define IAVF_CLIENT_VERSION_MAJOR 0 +#define IAVF_CLIENT_VERSION_MINOR 01 +#define IAVF_CLIENT_VERSION_BUILD 00 +#define IAVF_CLIENT_VERSION_STR \ + __stringify(IAVF_CLIENT_VERSION_MAJOR) "." \ + __stringify(IAVF_CLIENT_VERSION_MINOR) "." \ + __stringify(IAVF_CLIENT_VERSION_BUILD) + +struct i40e_client_version { + u8 major; + u8 minor; + u8 build; + u8 rsvd; +}; + +enum i40e_client_state { + __I40E_CLIENT_NULL, + __I40E_CLIENT_REGISTERED +}; + +enum i40e_client_instance_state { + __I40E_CLIENT_INSTANCE_NONE, + __I40E_CLIENT_INSTANCE_OPENED, +}; + +struct i40e_ops; +struct i40e_client; + +/* HW does not define a type value for AEQ; only for RX/TX and CEQ. + * In order for us to keep the interface simple, SW will define a + * unique type value for AEQ. + */ +#define I40E_QUEUE_TYPE_PE_AEQ 0x80 +#define I40E_QUEUE_INVALID_IDX 0xFFFF + +struct i40e_qv_info { + u32 v_idx; /* msix_vector */ + u16 ceq_idx; + u16 aeq_idx; + u8 itr_idx; +}; + +struct i40e_qvlist_info { + u32 num_vectors; + struct i40e_qv_info qv_info[1]; +}; + +#define I40E_CLIENT_MSIX_ALL 0xFFFFFFFF + +/* set of LAN parameters useful for clients managed by LAN */ + +/* Struct to hold per priority info */ +struct i40e_prio_qos_params { + u16 qs_handle; /* qs handle for prio */ + u8 tc; /* TC mapped to prio */ + u8 reserved; +}; + +#define I40E_CLIENT_MAX_USER_PRIORITY 8 +/* Struct to hold Client QoS */ +struct i40e_qos_params { + struct i40e_prio_qos_params prio_qos[I40E_CLIENT_MAX_USER_PRIORITY]; +}; + +struct i40e_params { + struct i40e_qos_params qos; + u16 mtu; + u16 link_up; /* boolean */ +}; + +/* Structure to hold LAN device info for a client device */ +struct i40e_info { + struct i40e_client_version version; + u8 lanmac[6]; + struct net_device *netdev; + struct pci_dev *pcidev; + u8 __iomem *hw_addr; + u8 fid; /* function id, PF id or VF id */ +#define I40E_CLIENT_FTYPE_PF 0 +#define I40E_CLIENT_FTYPE_VF 1 + u8 ftype; /* function type, PF or VF */ + void *vf; /* cast to iavf_adapter */ + + /* All L2 params that could change during the life span of the device + * and needs to be communicated to the client when they change + */ + struct i40e_params params; + struct i40e_ops *ops; + + u16 msix_count; /* number of msix vectors*/ + /* Array down below will be dynamically allocated based on msix_count */ + struct msix_entry *msix_entries; + u16 itr_index; /* Which ITR index the PE driver is suppose to use */ +}; + +struct i40e_ops { + /* setup_q_vector_list enables queues with a particular vector */ + int (*setup_qvlist)(struct i40e_info *ldev, struct i40e_client *client, + struct i40e_qvlist_info *qv_info); + + u32 (*virtchnl_send)(struct i40e_info *ldev, struct i40e_client *client, + u8 *msg, u16 len); + + /* If the PE Engine is unresponsive, RDMA driver can request a reset.*/ + void (*request_reset)(struct i40e_info *ldev, + struct i40e_client *client); +}; + +struct i40e_client_ops { + /* Should be called from register_client() or whenever the driver is + * ready to create a specific client instance. + */ + int (*open)(struct i40e_info *ldev, struct i40e_client *client); + + /* Should be closed when netdev is unavailable or when unregister + * call comes in. If the close happens due to a reset, set the reset + * bit to true. + */ + void (*close)(struct i40e_info *ldev, struct i40e_client *client, + bool reset); + + /* called when some l2 managed parameters changes - mss */ + void (*l2_param_change)(struct i40e_info *ldev, + struct i40e_client *client, + struct i40e_params *params); + + /* called when a message is received from the PF */ + int (*virtchnl_receive)(struct i40e_info *ldev, + struct i40e_client *client, + u8 *msg, u16 len); +}; + +/* Client device */ +struct i40e_client_instance { + struct list_head list; + struct i40e_info lan_info; + struct i40e_client *client; + unsigned long state; +}; + +struct i40e_client { + struct list_head list; /* list of registered clients */ + char name[IAVF_CLIENT_STR_LENGTH]; + struct i40e_client_version version; + unsigned long state; /* client state */ + atomic_t ref_cnt; /* Count of all the client devices of this kind */ + u32 flags; +#define I40E_CLIENT_FLAGS_LAUNCH_ON_PROBE BIT(0) +#define I40E_TX_FLAGS_NOTIFY_OTHER_EVENTS BIT(2) + u8 type; +#define I40E_CLIENT_IWARP 0 + struct i40e_client_ops *ops; /* client ops provided by the client */ +}; + +/* used by clients */ +int iavf_register_client(struct i40e_client *client); +int iavf_unregister_client(struct i40e_client *client); +#endif /* _IAVF_CLIENT_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c new file mode 100644 index 0000000000000..57142bd5dc094 --- /dev/null +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -0,0 +1,1042 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2013 - 2018 Intel Corporation. */ + +/* ethtool support for iavf */ +#include "iavf.h" + +#include + +/* ethtool statistics helpers */ + +/** + * struct i40e_stats - definition for an ethtool statistic + * @stat_string: statistic name to display in ethtool -S output + * @sizeof_stat: the sizeof() the stat, must be no greater than sizeof(u64) + * @stat_offset: offsetof() the stat from a base pointer + * + * This structure defines a statistic to be added to the ethtool stats buffer. + * It defines a statistic as offset from a common base pointer. Stats should + * be defined in constant arrays using the I40E_STAT macro, with every element + * of the array using the same _type for calculating the sizeof_stat and + * stat_offset. + * + * The @sizeof_stat is expected to be sizeof(u8), sizeof(u16), sizeof(u32) or + * sizeof(u64). Other sizes are not expected and will produce a WARN_ONCE from + * the i40e_add_ethtool_stat() helper function. + * + * The @stat_string is interpreted as a format string, allowing formatted + * values to be inserted while looping over multiple structures for a given + * statistics array. Thus, every statistic string in an array should have the + * same type and number of format specifiers, to be formatted by variadic + * arguments to the i40e_add_stat_string() helper function. + **/ +struct i40e_stats { + char stat_string[ETH_GSTRING_LEN]; + int sizeof_stat; + int stat_offset; +}; + +/* Helper macro to define an i40e_stat structure with proper size and type. + * Use this when defining constant statistics arrays. Note that @_type expects + * only a type name and is used multiple times. + */ +#define I40E_STAT(_type, _name, _stat) { \ + .stat_string = _name, \ + .sizeof_stat = FIELD_SIZEOF(_type, _stat), \ + .stat_offset = offsetof(_type, _stat) \ +} + +/* Helper macro for defining some statistics directly copied from the netdev + * stats structure. + */ +#define I40E_NETDEV_STAT(_net_stat) \ + I40E_STAT(struct rtnl_link_stats64, #_net_stat, _net_stat) + +/* Helper macro for defining some statistics related to queues */ +#define I40E_QUEUE_STAT(_name, _stat) \ + I40E_STAT(struct i40e_ring, _name, _stat) + +/* Stats associated with a Tx or Rx ring */ +static const struct i40e_stats i40e_gstrings_queue_stats[] = { + I40E_QUEUE_STAT("%s-%u.packets", stats.packets), + I40E_QUEUE_STAT("%s-%u.bytes", stats.bytes), +}; + +/** + * iavf_add_one_ethtool_stat - copy the stat into the supplied buffer + * @data: location to store the stat value + * @pointer: basis for where to copy from + * @stat: the stat definition + * + * Copies the stat data defined by the pointer and stat structure pair into + * the memory supplied as data. Used to implement i40e_add_ethtool_stats and + * iavf_add_queue_stats. If the pointer is null, data will be zero'd. + */ +static void +iavf_add_one_ethtool_stat(u64 *data, void *pointer, + const struct i40e_stats *stat) +{ + char *p; + + if (!pointer) { + /* ensure that the ethtool data buffer is zero'd for any stats + * which don't have a valid pointer. + */ + *data = 0; + return; + } + + p = (char *)pointer + stat->stat_offset; + switch (stat->sizeof_stat) { + case sizeof(u64): + *data = *((u64 *)p); + break; + case sizeof(u32): + *data = *((u32 *)p); + break; + case sizeof(u16): + *data = *((u16 *)p); + break; + case sizeof(u8): + *data = *((u8 *)p); + break; + default: + WARN_ONCE(1, "unexpected stat size for %s", + stat->stat_string); + *data = 0; + } +} + +/** + * __iavf_add_ethtool_stats - copy stats into the ethtool supplied buffer + * @data: ethtool stats buffer + * @pointer: location to copy stats from + * @stats: array of stats to copy + * @size: the size of the stats definition + * + * Copy the stats defined by the stats array using the pointer as a base into + * the data buffer supplied by ethtool. Updates the data pointer to point to + * the next empty location for successive calls to __iavf_add_ethtool_stats. + * If pointer is null, set the data values to zero and update the pointer to + * skip these stats. + **/ +static void +__iavf_add_ethtool_stats(u64 **data, void *pointer, + const struct i40e_stats stats[], + const unsigned int size) +{ + unsigned int i; + + for (i = 0; i < size; i++) + iavf_add_one_ethtool_stat((*data)++, pointer, &stats[i]); +} + +/** + * i40e_add_ethtool_stats - copy stats into ethtool supplied buffer + * @data: ethtool stats buffer + * @pointer: location where stats are stored + * @stats: static const array of stat definitions + * + * Macro to ease the use of __iavf_add_ethtool_stats by taking a static + * constant stats array and passing the ARRAY_SIZE(). This avoids typos by + * ensuring that we pass the size associated with the given stats array. + * + * The parameter @stats is evaluated twice, so parameters with side effects + * should be avoided. + **/ +#define i40e_add_ethtool_stats(data, pointer, stats) \ + __iavf_add_ethtool_stats(data, pointer, stats, ARRAY_SIZE(stats)) + +/** + * iavf_add_queue_stats - copy queue statistics into supplied buffer + * @data: ethtool stats buffer + * @ring: the ring to copy + * + * Queue statistics must be copied while protected by + * u64_stats_fetch_begin_irq, so we can't directly use i40e_add_ethtool_stats. + * Assumes that queue stats are defined in i40e_gstrings_queue_stats. If the + * ring pointer is null, zero out the queue stat values and update the data + * pointer. Otherwise safely copy the stats from the ring into the supplied + * buffer and update the data pointer when finished. + * + * This function expects to be called while under rcu_read_lock(). + **/ +static void +iavf_add_queue_stats(u64 **data, struct i40e_ring *ring) +{ + const unsigned int size = ARRAY_SIZE(i40e_gstrings_queue_stats); + const struct i40e_stats *stats = i40e_gstrings_queue_stats; + unsigned int start; + unsigned int i; + + /* To avoid invalid statistics values, ensure that we keep retrying + * the copy until we get a consistent value according to + * u64_stats_fetch_retry_irq. But first, make sure our ring is + * non-null before attempting to access its syncp. + */ + do { + start = !ring ? 0 : u64_stats_fetch_begin_irq(&ring->syncp); + for (i = 0; i < size; i++) + iavf_add_one_ethtool_stat(&(*data)[i], ring, &stats[i]); + } while (ring && u64_stats_fetch_retry_irq(&ring->syncp, start)); + + /* Once we successfully copy the stats in, update the data pointer */ + *data += size; +} + +/** + * __i40e_add_stat_strings - copy stat strings into ethtool buffer + * @p: ethtool supplied buffer + * @stats: stat definitions array + * @size: size of the stats array + * + * Format and copy the strings described by stats into the buffer pointed at + * by p. + **/ +static void __i40e_add_stat_strings(u8 **p, const struct i40e_stats stats[], + const unsigned int size, ...) +{ + unsigned int i; + + for (i = 0; i < size; i++) { + va_list args; + + va_start(args, size); + vsnprintf(*p, ETH_GSTRING_LEN, stats[i].stat_string, args); + *p += ETH_GSTRING_LEN; + va_end(args); + } +} + +/** + * i40e_add_stat_strings - copy stat strings into ethtool buffer + * @p: ethtool supplied buffer + * @stats: stat definitions array + * + * Format and copy the strings described by the const static stats value into + * the buffer pointed at by p. + * + * The parameter @stats is evaluated twice, so parameters with side effects + * should be avoided. Additionally, stats must be an array such that + * ARRAY_SIZE can be called on it. + **/ +#define i40e_add_stat_strings(p, stats, ...) \ + __i40e_add_stat_strings(p, stats, ARRAY_SIZE(stats), ## __VA_ARGS__) + +#define IAVF_STAT(_name, _stat) \ + I40E_STAT(struct iavf_adapter, _name, _stat) + +static const struct i40e_stats iavf_gstrings_stats[] = { + IAVF_STAT("rx_bytes", current_stats.rx_bytes), + IAVF_STAT("rx_unicast", current_stats.rx_unicast), + IAVF_STAT("rx_multicast", current_stats.rx_multicast), + IAVF_STAT("rx_broadcast", current_stats.rx_broadcast), + IAVF_STAT("rx_discards", current_stats.rx_discards), + IAVF_STAT("rx_unknown_protocol", current_stats.rx_unknown_protocol), + IAVF_STAT("tx_bytes", current_stats.tx_bytes), + IAVF_STAT("tx_unicast", current_stats.tx_unicast), + IAVF_STAT("tx_multicast", current_stats.tx_multicast), + IAVF_STAT("tx_broadcast", current_stats.tx_broadcast), + IAVF_STAT("tx_discards", current_stats.tx_discards), + IAVF_STAT("tx_errors", current_stats.tx_errors), +}; + +#define IAVF_STATS_LEN ARRAY_SIZE(iavf_gstrings_stats) + +#define IAVF_QUEUE_STATS_LEN ARRAY_SIZE(i40e_gstrings_queue_stats) + +/* For now we have one and only one private flag and it is only defined + * when we have support for the SKIP_CPU_SYNC DMA attribute. Instead + * of leaving all this code sitting around empty we will strip it unless + * our one private flag is actually available. + */ +struct iavf_priv_flags { + char flag_string[ETH_GSTRING_LEN]; + u32 flag; + bool read_only; +}; + +#define IAVF_PRIV_FLAG(_name, _flag, _read_only) { \ + .flag_string = _name, \ + .flag = _flag, \ + .read_only = _read_only, \ +} + +static const struct iavf_priv_flags iavf_gstrings_priv_flags[] = { + IAVF_PRIV_FLAG("legacy-rx", IAVF_FLAG_LEGACY_RX, 0), +}; + +#define IAVF_PRIV_FLAGS_STR_LEN ARRAY_SIZE(iavf_gstrings_priv_flags) + +/** + * iavf_get_link_ksettings - Get Link Speed and Duplex settings + * @netdev: network interface device structure + * @cmd: ethtool command + * + * Reports speed/duplex settings. Because this is a VF, we don't know what + * kind of link we really have, so we fake it. + **/ +static int iavf_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + + ethtool_link_ksettings_zero_link_mode(cmd, supported); + cmd->base.autoneg = AUTONEG_DISABLE; + cmd->base.port = PORT_NONE; + /* Set speed and duplex */ + switch (adapter->link_speed) { + case I40E_LINK_SPEED_40GB: + cmd->base.speed = SPEED_40000; + break; + case I40E_LINK_SPEED_25GB: +#ifdef SPEED_25000 + cmd->base.speed = SPEED_25000; +#else + netdev_info(netdev, + "Speed is 25G, display not supported by this version of ethtool.\n"); +#endif + break; + case I40E_LINK_SPEED_20GB: + cmd->base.speed = SPEED_20000; + break; + case I40E_LINK_SPEED_10GB: + cmd->base.speed = SPEED_10000; + break; + case I40E_LINK_SPEED_1GB: + cmd->base.speed = SPEED_1000; + break; + case I40E_LINK_SPEED_100MB: + cmd->base.speed = SPEED_100; + break; + default: + break; + } + cmd->base.duplex = DUPLEX_FULL; + + return 0; +} + +/** + * iavf_get_sset_count - Get length of string set + * @netdev: network interface device structure + * @sset: id of string set + * + * Reports size of various string tables. + **/ +static int iavf_get_sset_count(struct net_device *netdev, int sset) +{ + if (sset == ETH_SS_STATS) + return IAVF_STATS_LEN + + (IAVF_QUEUE_STATS_LEN * 2 * IAVF_MAX_REQ_QUEUES); + else if (sset == ETH_SS_PRIV_FLAGS) + return IAVF_PRIV_FLAGS_STR_LEN; + else + return -EINVAL; +} + +/** + * iavf_get_ethtool_stats - report device statistics + * @netdev: network interface device structure + * @stats: ethtool statistics structure + * @data: pointer to data buffer + * + * All statistics are added to the data buffer as an array of u64. + **/ +static void iavf_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats *stats, u64 *data) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + unsigned int i; + + i40e_add_ethtool_stats(&data, adapter, iavf_gstrings_stats); + + rcu_read_lock(); + for (i = 0; i < IAVF_MAX_REQ_QUEUES; i++) { + struct i40e_ring *ring; + + /* Avoid accessing un-allocated queues */ + ring = (i < adapter->num_active_queues ? + &adapter->tx_rings[i] : NULL); + iavf_add_queue_stats(&data, ring); + + /* Avoid accessing un-allocated queues */ + ring = (i < adapter->num_active_queues ? + &adapter->rx_rings[i] : NULL); + iavf_add_queue_stats(&data, ring); + } + rcu_read_unlock(); +} + +/** + * iavf_get_priv_flag_strings - Get private flag strings + * @netdev: network interface device structure + * @data: buffer for string data + * + * Builds the private flags string table + **/ +static void iavf_get_priv_flag_strings(struct net_device *netdev, u8 *data) +{ + unsigned int i; + + for (i = 0; i < IAVF_PRIV_FLAGS_STR_LEN; i++) { + snprintf(data, ETH_GSTRING_LEN, "%s", + iavf_gstrings_priv_flags[i].flag_string); + data += ETH_GSTRING_LEN; + } +} + +/** + * iavf_get_stat_strings - Get stat strings + * @netdev: network interface device structure + * @data: buffer for string data + * + * Builds the statistics string table + **/ +static void iavf_get_stat_strings(struct net_device *netdev, u8 *data) +{ + unsigned int i; + + i40e_add_stat_strings(&data, iavf_gstrings_stats); + + /* Queues are always allocated in pairs, so we just use num_tx_queues + * for both Tx and Rx queues. + */ + for (i = 0; i < netdev->num_tx_queues; i++) { + i40e_add_stat_strings(&data, i40e_gstrings_queue_stats, + "tx", i); + i40e_add_stat_strings(&data, i40e_gstrings_queue_stats, + "rx", i); + } +} + +/** + * iavf_get_strings - Get string set + * @netdev: network interface device structure + * @sset: id of string set + * @data: buffer for string data + * + * Builds string tables for various string sets + **/ +static void iavf_get_strings(struct net_device *netdev, u32 sset, u8 *data) +{ + switch (sset) { + case ETH_SS_STATS: + iavf_get_stat_strings(netdev, data); + break; + case ETH_SS_PRIV_FLAGS: + iavf_get_priv_flag_strings(netdev, data); + break; + default: + break; + } +} + +/** + * iavf_get_priv_flags - report device private flags + * @netdev: network interface device structure + * + * The get string set count and the string set should be matched for each + * flag returned. Add new strings for each flag to the i40e_gstrings_priv_flags + * array. + * + * Returns a u32 bitmap of flags. + **/ +static u32 iavf_get_priv_flags(struct net_device *netdev) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + u32 i, ret_flags = 0; + + for (i = 0; i < IAVF_PRIV_FLAGS_STR_LEN; i++) { + const struct iavf_priv_flags *priv_flags; + + priv_flags = &iavf_gstrings_priv_flags[i]; + + if (priv_flags->flag & adapter->flags) + ret_flags |= BIT(i); + } + + return ret_flags; +} + +/** + * iavf_set_priv_flags - set private flags + * @netdev: network interface device structure + * @flags: bit flags to be set + **/ +static int iavf_set_priv_flags(struct net_device *netdev, u32 flags) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + u32 orig_flags, new_flags, changed_flags; + u32 i; + + orig_flags = READ_ONCE(adapter->flags); + new_flags = orig_flags; + + for (i = 0; i < IAVF_PRIV_FLAGS_STR_LEN; i++) { + const struct iavf_priv_flags *priv_flags; + + priv_flags = &iavf_gstrings_priv_flags[i]; + + if (flags & BIT(i)) + new_flags |= priv_flags->flag; + else + new_flags &= ~(priv_flags->flag); + + if (priv_flags->read_only && + ((orig_flags ^ new_flags) & ~BIT(i))) + return -EOPNOTSUPP; + } + + /* Before we finalize any flag changes, any checks which we need to + * perform to determine if the new flags will be supported should go + * here... + */ + + /* Compare and exchange the new flags into place. If we failed, that + * is if cmpxchg returns anything but the old value, this means + * something else must have modified the flags variable since we + * copied it. We'll just punt with an error and log something in the + * message buffer. + */ + if (cmpxchg(&adapter->flags, orig_flags, new_flags) != orig_flags) { + dev_warn(&adapter->pdev->dev, + "Unable to update adapter->flags as it was modified by another thread...\n"); + return -EAGAIN; + } + + changed_flags = orig_flags ^ new_flags; + + /* Process any additional changes needed as a result of flag changes. + * The changed_flags value reflects the list of bits that were changed + * in the code above. + */ + + /* issue a reset to force legacy-rx change to take effect */ + if (changed_flags & IAVF_FLAG_LEGACY_RX) { + if (netif_running(netdev)) { + adapter->flags |= IAVF_FLAG_RESET_NEEDED; + schedule_work(&adapter->reset_task); + } + } + + return 0; +} + +/** + * iavf_get_msglevel - Get debug message level + * @netdev: network interface device structure + * + * Returns current debug message level. + **/ +static u32 iavf_get_msglevel(struct net_device *netdev) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + + return adapter->msg_enable; +} + +/** + * iavf_set_msglevel - Set debug message level + * @netdev: network interface device structure + * @data: message level + * + * Set current debug message level. Higher values cause the driver to + * be noisier. + **/ +static void iavf_set_msglevel(struct net_device *netdev, u32 data) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + + if (I40E_DEBUG_USER & data) + adapter->hw.debug_mask = data; + adapter->msg_enable = data; +} + +/** + * iavf_get_drvinfo - Get driver info + * @netdev: network interface device structure + * @drvinfo: ethool driver info structure + * + * Returns information about the driver and device for display to the user. + **/ +static void iavf_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *drvinfo) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + + strlcpy(drvinfo->driver, iavf_driver_name, 32); + strlcpy(drvinfo->version, iavf_driver_version, 32); + strlcpy(drvinfo->fw_version, "N/A", 4); + strlcpy(drvinfo->bus_info, pci_name(adapter->pdev), 32); + drvinfo->n_priv_flags = IAVF_PRIV_FLAGS_STR_LEN; +} + +/** + * iavf_get_ringparam - Get ring parameters + * @netdev: network interface device structure + * @ring: ethtool ringparam structure + * + * Returns current ring parameters. TX and RX rings are reported separately, + * but the number of rings is not reported. + **/ +static void iavf_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + + ring->rx_max_pending = IAVF_MAX_RXD; + ring->tx_max_pending = IAVF_MAX_TXD; + ring->rx_pending = adapter->rx_desc_count; + ring->tx_pending = adapter->tx_desc_count; +} + +/** + * iavf_set_ringparam - Set ring parameters + * @netdev: network interface device structure + * @ring: ethtool ringparam structure + * + * Sets ring parameters. TX and RX rings are controlled separately, but the + * number of rings is not specified, so all rings get the same settings. + **/ +static int iavf_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + u32 new_rx_count, new_tx_count; + + if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending)) + return -EINVAL; + + new_tx_count = clamp_t(u32, ring->tx_pending, + IAVF_MIN_TXD, + IAVF_MAX_TXD); + new_tx_count = ALIGN(new_tx_count, IAVF_REQ_DESCRIPTOR_MULTIPLE); + + new_rx_count = clamp_t(u32, ring->rx_pending, + IAVF_MIN_RXD, + IAVF_MAX_RXD); + new_rx_count = ALIGN(new_rx_count, IAVF_REQ_DESCRIPTOR_MULTIPLE); + + /* if nothing to do return success */ + if ((new_tx_count == adapter->tx_desc_count) && + (new_rx_count == adapter->rx_desc_count)) + return 0; + + adapter->tx_desc_count = new_tx_count; + adapter->rx_desc_count = new_rx_count; + + if (netif_running(netdev)) { + adapter->flags |= IAVF_FLAG_RESET_NEEDED; + schedule_work(&adapter->reset_task); + } + + return 0; +} + +/** + * __iavf_get_coalesce - get per-queue coalesce settings + * @netdev: the netdev to check + * @ec: ethtool coalesce data structure + * @queue: which queue to pick + * + * Gets the per-queue settings for coalescence. Specifically Rx and Tx usecs + * are per queue. If queue is <0 then we default to queue 0 as the + * representative value. + **/ +static int __iavf_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, int queue) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + struct i40e_vsi *vsi = &adapter->vsi; + struct i40e_ring *rx_ring, *tx_ring; + + ec->tx_max_coalesced_frames = vsi->work_limit; + ec->rx_max_coalesced_frames = vsi->work_limit; + + /* Rx and Tx usecs per queue value. If user doesn't specify the + * queue, return queue 0's value to represent. + */ + if (queue < 0) + queue = 0; + else if (queue >= adapter->num_active_queues) + return -EINVAL; + + rx_ring = &adapter->rx_rings[queue]; + tx_ring = &adapter->tx_rings[queue]; + + if (ITR_IS_DYNAMIC(rx_ring->itr_setting)) + ec->use_adaptive_rx_coalesce = 1; + + if (ITR_IS_DYNAMIC(tx_ring->itr_setting)) + ec->use_adaptive_tx_coalesce = 1; + + ec->rx_coalesce_usecs = rx_ring->itr_setting & ~I40E_ITR_DYNAMIC; + ec->tx_coalesce_usecs = tx_ring->itr_setting & ~I40E_ITR_DYNAMIC; + + return 0; +} + +/** + * iavf_get_coalesce - Get interrupt coalescing settings + * @netdev: network interface device structure + * @ec: ethtool coalesce structure + * + * Returns current coalescing settings. This is referred to elsewhere in the + * driver as Interrupt Throttle Rate, as this is how the hardware describes + * this functionality. Note that if per-queue settings have been modified this + * only represents the settings of queue 0. + **/ +static int iavf_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + return __iavf_get_coalesce(netdev, ec, -1); +} + +/** + * iavf_get_per_queue_coalesce - get coalesce values for specific queue + * @netdev: netdev to read + * @ec: coalesce settings from ethtool + * @queue: the queue to read + * + * Read specific queue's coalesce settings. + **/ +static int iavf_get_per_queue_coalesce(struct net_device *netdev, u32 queue, + struct ethtool_coalesce *ec) +{ + return __iavf_get_coalesce(netdev, ec, queue); +} + +/** + * iavf_set_itr_per_queue - set ITR values for specific queue + * @adapter: the VF adapter struct to set values for + * @ec: coalesce settings from ethtool + * @queue: the queue to modify + * + * Change the ITR settings for a specific queue. + **/ +static void iavf_set_itr_per_queue(struct iavf_adapter *adapter, + struct ethtool_coalesce *ec, int queue) +{ + struct i40e_ring *rx_ring = &adapter->rx_rings[queue]; + struct i40e_ring *tx_ring = &adapter->tx_rings[queue]; + struct i40e_q_vector *q_vector; + + rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs); + tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs); + + rx_ring->itr_setting |= I40E_ITR_DYNAMIC; + if (!ec->use_adaptive_rx_coalesce) + rx_ring->itr_setting ^= I40E_ITR_DYNAMIC; + + tx_ring->itr_setting |= I40E_ITR_DYNAMIC; + if (!ec->use_adaptive_tx_coalesce) + tx_ring->itr_setting ^= I40E_ITR_DYNAMIC; + + q_vector = rx_ring->q_vector; + q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting); + + q_vector = tx_ring->q_vector; + q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting); + + /* The interrupt handler itself will take care of programming + * the Tx and Rx ITR values based on the values we have entered + * into the q_vector, no need to write the values now. + */ +} + +/** + * __iavf_set_coalesce - set coalesce settings for particular queue + * @netdev: the netdev to change + * @ec: ethtool coalesce settings + * @queue: the queue to change + * + * Sets the coalesce settings for a particular queue. + **/ +static int __iavf_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, int queue) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + struct i40e_vsi *vsi = &adapter->vsi; + int i; + + if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq) + vsi->work_limit = ec->tx_max_coalesced_frames_irq; + + if (ec->rx_coalesce_usecs == 0) { + if (ec->use_adaptive_rx_coalesce) + netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n"); + } else if ((ec->rx_coalesce_usecs < I40E_MIN_ITR) || + (ec->rx_coalesce_usecs > I40E_MAX_ITR)) { + netif_info(adapter, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n"); + return -EINVAL; + } else if (ec->tx_coalesce_usecs == 0) { + if (ec->use_adaptive_tx_coalesce) + netif_info(adapter, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n"); + } else if ((ec->tx_coalesce_usecs < I40E_MIN_ITR) || + (ec->tx_coalesce_usecs > I40E_MAX_ITR)) { + netif_info(adapter, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n"); + return -EINVAL; + } + + /* Rx and Tx usecs has per queue value. If user doesn't specify the + * queue, apply to all queues. + */ + if (queue < 0) { + for (i = 0; i < adapter->num_active_queues; i++) + iavf_set_itr_per_queue(adapter, ec, i); + } else if (queue < adapter->num_active_queues) { + iavf_set_itr_per_queue(adapter, ec, queue); + } else { + netif_info(adapter, drv, netdev, "Invalid queue value, queue range is 0 - %d\n", + adapter->num_active_queues - 1); + return -EINVAL; + } + + return 0; +} + +/** + * iavf_set_coalesce - Set interrupt coalescing settings + * @netdev: network interface device structure + * @ec: ethtool coalesce structure + * + * Change current coalescing settings for every queue. + **/ +static int iavf_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + return __iavf_set_coalesce(netdev, ec, -1); +} + +/** + * iavf_set_per_queue_coalesce - set specific queue's coalesce settings + * @netdev: the netdev to change + * @ec: ethtool's coalesce settings + * @queue: the queue to modify + * + * Modifies a specific queue's coalesce settings. + */ +static int iavf_set_per_queue_coalesce(struct net_device *netdev, u32 queue, + struct ethtool_coalesce *ec) +{ + return __iavf_set_coalesce(netdev, ec, queue); +} + +/** + * iavf_get_rxnfc - command to get RX flow classification rules + * @netdev: network interface device structure + * @cmd: ethtool rxnfc command + * @rule_locs: pointer to store rule locations + * + * Returns Success if the command is supported. + **/ +static int iavf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, + u32 *rule_locs) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + int ret = -EOPNOTSUPP; + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = adapter->num_active_queues; + ret = 0; + break; + case ETHTOOL_GRXFH: + netdev_info(netdev, + "RSS hash info is not available to vf, use pf.\n"); + break; + default: + break; + } + + return ret; +} +/** + * iavf_get_channels: get the number of channels supported by the device + * @netdev: network interface device structure + * @ch: channel information structure + * + * For the purposes of our device, we only use combined channels, i.e. a tx/rx + * queue pair. Report one extra channel to match our "other" MSI-X vector. + **/ +static void iavf_get_channels(struct net_device *netdev, + struct ethtool_channels *ch) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + + /* Report maximum channels */ + ch->max_combined = IAVF_MAX_REQ_QUEUES; + + ch->max_other = NONQ_VECS; + ch->other_count = NONQ_VECS; + + ch->combined_count = adapter->num_active_queues; +} + +/** + * iavf_set_channels: set the new channel count + * @netdev: network interface device structure + * @ch: channel information structure + * + * Negotiate a new number of channels with the PF then do a reset. During + * reset we'll realloc queues and fix the RSS table. Returns 0 on success, + * negative on failure. + **/ +static int iavf_set_channels(struct net_device *netdev, + struct ethtool_channels *ch) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + int num_req = ch->combined_count; + + if (num_req != adapter->num_active_queues && + !(adapter->vf_res->vf_cap_flags & + VIRTCHNL_VF_OFFLOAD_REQ_QUEUES)) { + dev_info(&adapter->pdev->dev, "PF is not capable of queue negotiation.\n"); + return -EINVAL; + } + + if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) && + adapter->num_tc) { + dev_info(&adapter->pdev->dev, "Cannot set channels since ADq is enabled.\n"); + return -EINVAL; + } + + /* All of these should have already been checked by ethtool before this + * even gets to us, but just to be sure. + */ + if (num_req <= 0 || num_req > IAVF_MAX_REQ_QUEUES) + return -EINVAL; + + if (ch->rx_count || ch->tx_count || ch->other_count != NONQ_VECS) + return -EINVAL; + + adapter->num_req_queues = num_req; + return iavf_request_queues(adapter, num_req); +} + +/** + * iavf_get_rxfh_key_size - get the RSS hash key size + * @netdev: network interface device structure + * + * Returns the table size. + **/ +static u32 iavf_get_rxfh_key_size(struct net_device *netdev) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + + return adapter->rss_key_size; +} + +/** + * iavf_get_rxfh_indir_size - get the rx flow hash indirection table size + * @netdev: network interface device structure + * + * Returns the table size. + **/ +static u32 iavf_get_rxfh_indir_size(struct net_device *netdev) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + + return adapter->rss_lut_size; +} + +/** + * iavf_get_rxfh - get the rx flow hash indirection table + * @netdev: network interface device structure + * @indir: indirection table + * @key: hash key + * @hfunc: hash function in use + * + * Reads the indirection table directly from the hardware. Always returns 0. + **/ +static int iavf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + u16 i; + + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + if (!indir) + return 0; + + memcpy(key, adapter->rss_key, adapter->rss_key_size); + + /* Each 32 bits pointed by 'indir' is stored with a lut entry */ + for (i = 0; i < adapter->rss_lut_size; i++) + indir[i] = (u32)adapter->rss_lut[i]; + + return 0; +} + +/** + * iavf_set_rxfh - set the rx flow hash indirection table + * @netdev: network interface device structure + * @indir: indirection table + * @key: hash key + * @hfunc: hash function to use + * + * Returns -EINVAL if the table specifies an inavlid queue id, otherwise + * returns 0 after programming the table. + **/ +static int iavf_set_rxfh(struct net_device *netdev, const u32 *indir, + const u8 *key, const u8 hfunc) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + u16 i; + + /* We do not allow change in unsupported parameters */ + if (key || + (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)) + return -EOPNOTSUPP; + if (!indir) + return 0; + + if (key) + memcpy(adapter->rss_key, key, adapter->rss_key_size); + + /* Each 32 bits pointed by 'indir' is stored with a lut entry */ + for (i = 0; i < adapter->rss_lut_size; i++) + adapter->rss_lut[i] = (u8)(indir[i]); + + return iavf_config_rss(adapter); +} + +static const struct ethtool_ops iavf_ethtool_ops = { + .get_drvinfo = iavf_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_ringparam = iavf_get_ringparam, + .set_ringparam = iavf_set_ringparam, + .get_strings = iavf_get_strings, + .get_ethtool_stats = iavf_get_ethtool_stats, + .get_sset_count = iavf_get_sset_count, + .get_priv_flags = iavf_get_priv_flags, + .set_priv_flags = iavf_set_priv_flags, + .get_msglevel = iavf_get_msglevel, + .set_msglevel = iavf_set_msglevel, + .get_coalesce = iavf_get_coalesce, + .set_coalesce = iavf_set_coalesce, + .get_per_queue_coalesce = iavf_get_per_queue_coalesce, + .set_per_queue_coalesce = iavf_set_per_queue_coalesce, + .get_rxnfc = iavf_get_rxnfc, + .get_rxfh_indir_size = iavf_get_rxfh_indir_size, + .get_rxfh = iavf_get_rxfh, + .set_rxfh = iavf_set_rxfh, + .get_channels = iavf_get_channels, + .set_channels = iavf_set_channels, + .get_rxfh_key_size = iavf_get_rxfh_key_size, + .get_link_ksettings = iavf_get_link_ksettings, +}; + +/** + * iavf_set_ethtool_ops - Initialize ethtool ops struct + * @netdev: network interface device structure + * + * Sets ethtool ops struct in our netdev so that ethtool can call + * our functions. + **/ +void iavf_set_ethtool_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &iavf_ethtool_ops; +} diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c new file mode 100644 index 0000000000000..260d0150ce5dc --- /dev/null +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -0,0 +1,4015 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2013 - 2018 Intel Corporation. */ + +#include "iavf.h" +#include "i40e_prototype.h" +#include "iavf_client.h" +/* All iavf tracepoints are defined by the include below, which must + * be included exactly once across the whole kernel with + * CREATE_TRACE_POINTS defined + */ +#define CREATE_TRACE_POINTS +#include "i40e_trace.h" + +static int iavf_setup_all_tx_resources(struct iavf_adapter *adapter); +static int iavf_setup_all_rx_resources(struct iavf_adapter *adapter); +static int iavf_close(struct net_device *netdev); + +char iavf_driver_name[] = "iavf"; +static const char iavf_driver_string[] = + "Intel(R) Ethernet Adaptive Virtual Function Network Driver"; + +#define DRV_KERN "-k" + +#define DRV_VERSION_MAJOR 3 +#define DRV_VERSION_MINOR 2 +#define DRV_VERSION_BUILD 3 +#define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ + __stringify(DRV_VERSION_MINOR) "." \ + __stringify(DRV_VERSION_BUILD) \ + DRV_KERN +const char iavf_driver_version[] = DRV_VERSION; +static const char iavf_copyright[] = + "Copyright (c) 2013 - 2018 Intel Corporation."; + +/* iavf_pci_tbl - PCI Device ID Table + * + * Wildcard entries (PCI_ANY_ID) should come last + * Last entry must be all 0s + * + * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, + * Class, Class Mask, private data (not used) } + */ +static const struct pci_device_id iavf_pci_tbl[] = { + {PCI_VDEVICE(INTEL, I40E_DEV_ID_VF), 0}, + {PCI_VDEVICE(INTEL, I40E_DEV_ID_VF_HV), 0}, + {PCI_VDEVICE(INTEL, I40E_DEV_ID_X722_VF), 0}, + {PCI_VDEVICE(INTEL, I40E_DEV_ID_ADAPTIVE_VF), 0}, + /* required last entry */ + {0, } +}; + +MODULE_DEVICE_TABLE(pci, iavf_pci_tbl); + +MODULE_ALIAS("i40evf"); +MODULE_AUTHOR("Intel Corporation, "); +MODULE_DESCRIPTION("Intel(R) XL710 X710 Virtual Function Network Driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_VERSION); + +static struct workqueue_struct *iavf_wq; + +/** + * iavf_allocate_dma_mem_d - OS specific memory alloc for shared code + * @hw: pointer to the HW structure + * @mem: ptr to mem struct to fill out + * @size: size of memory requested + * @alignment: what to align the allocation to + **/ +iavf_status iavf_allocate_dma_mem_d(struct i40e_hw *hw, + struct i40e_dma_mem *mem, + u64 size, u32 alignment) +{ + struct iavf_adapter *adapter = (struct iavf_adapter *)hw->back; + + if (!mem) + return I40E_ERR_PARAM; + + mem->size = ALIGN(size, alignment); + mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size, + (dma_addr_t *)&mem->pa, GFP_KERNEL); + if (mem->va) + return 0; + else + return I40E_ERR_NO_MEMORY; +} + +/** + * iavf_free_dma_mem_d - OS specific memory free for shared code + * @hw: pointer to the HW structure + * @mem: ptr to mem struct to free + **/ +iavf_status iavf_free_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem) +{ + struct iavf_adapter *adapter = (struct iavf_adapter *)hw->back; + + if (!mem || !mem->va) + return I40E_ERR_PARAM; + dma_free_coherent(&adapter->pdev->dev, mem->size, + mem->va, (dma_addr_t)mem->pa); + return 0; +} + +/** + * iavf_allocate_virt_mem_d - OS specific memory alloc for shared code + * @hw: pointer to the HW structure + * @mem: ptr to mem struct to fill out + * @size: size of memory requested + **/ +iavf_status iavf_allocate_virt_mem_d(struct i40e_hw *hw, + struct i40e_virt_mem *mem, u32 size) +{ + if (!mem) + return I40E_ERR_PARAM; + + mem->size = size; + mem->va = kzalloc(size, GFP_KERNEL); + + if (mem->va) + return 0; + else + return I40E_ERR_NO_MEMORY; +} + +/** + * iavf_free_virt_mem_d - OS specific memory free for shared code + * @hw: pointer to the HW structure + * @mem: ptr to mem struct to free + **/ +iavf_status iavf_free_virt_mem_d(struct i40e_hw *hw, + struct i40e_virt_mem *mem) +{ + if (!mem) + return I40E_ERR_PARAM; + + /* it's ok to kfree a NULL pointer */ + kfree(mem->va); + + return 0; +} + +/** + * iavf_debug_d - OS dependent version of debug printing + * @hw: pointer to the HW structure + * @mask: debug level mask + * @fmt_str: printf-type format description + **/ +void iavf_debug_d(void *hw, u32 mask, char *fmt_str, ...) +{ + char buf[512]; + va_list argptr; + + if (!(mask & ((struct i40e_hw *)hw)->debug_mask)) + return; + + va_start(argptr, fmt_str); + vsnprintf(buf, sizeof(buf), fmt_str, argptr); + va_end(argptr); + + /* the debug string is already formatted with a newline */ + pr_info("%s", buf); +} + +/** + * iavf_schedule_reset - Set the flags and schedule a reset event + * @adapter: board private structure + **/ +void iavf_schedule_reset(struct iavf_adapter *adapter) +{ + if (!(adapter->flags & + (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED))) { + adapter->flags |= IAVF_FLAG_RESET_NEEDED; + schedule_work(&adapter->reset_task); + } +} + +/** + * iavf_tx_timeout - Respond to a Tx Hang + * @netdev: network interface device structure + **/ +static void iavf_tx_timeout(struct net_device *netdev) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + + adapter->tx_timeout_count++; + iavf_schedule_reset(adapter); +} + +/** + * iavf_misc_irq_disable - Mask off interrupt generation on the NIC + * @adapter: board private structure + **/ +static void iavf_misc_irq_disable(struct iavf_adapter *adapter) +{ + struct i40e_hw *hw = &adapter->hw; + + if (!adapter->msix_entries) + return; + + wr32(hw, I40E_VFINT_DYN_CTL01, 0); + + /* read flush */ + rd32(hw, I40E_VFGEN_RSTAT); + + synchronize_irq(adapter->msix_entries[0].vector); +} + +/** + * iavf_misc_irq_enable - Enable default interrupt generation settings + * @adapter: board private structure + **/ +static void iavf_misc_irq_enable(struct iavf_adapter *adapter) +{ + struct i40e_hw *hw = &adapter->hw; + + wr32(hw, I40E_VFINT_DYN_CTL01, I40E_VFINT_DYN_CTL01_INTENA_MASK | + I40E_VFINT_DYN_CTL01_ITR_INDX_MASK); + wr32(hw, I40E_VFINT_ICR0_ENA1, I40E_VFINT_ICR0_ENA1_ADMINQ_MASK); + + /* read flush */ + rd32(hw, I40E_VFGEN_RSTAT); +} + +/** + * iavf_irq_disable - Mask off interrupt generation on the NIC + * @adapter: board private structure + **/ +static void iavf_irq_disable(struct iavf_adapter *adapter) +{ + int i; + struct i40e_hw *hw = &adapter->hw; + + if (!adapter->msix_entries) + return; + + for (i = 1; i < adapter->num_msix_vectors; i++) { + wr32(hw, I40E_VFINT_DYN_CTLN1(i - 1), 0); + synchronize_irq(adapter->msix_entries[i].vector); + } + /* read flush */ + rd32(hw, I40E_VFGEN_RSTAT); +} + +/** + * iavf_irq_enable_queues - Enable interrupt for specified queues + * @adapter: board private structure + * @mask: bitmap of queues to enable + **/ +void iavf_irq_enable_queues(struct iavf_adapter *adapter, u32 mask) +{ + struct i40e_hw *hw = &adapter->hw; + int i; + + for (i = 1; i < adapter->num_msix_vectors; i++) { + if (mask & BIT(i - 1)) { + wr32(hw, I40E_VFINT_DYN_CTLN1(i - 1), + I40E_VFINT_DYN_CTLN1_INTENA_MASK | + I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK); + } + } +} + +/** + * iavf_irq_enable - Enable default interrupt generation settings + * @adapter: board private structure + * @flush: boolean value whether to run rd32() + **/ +void iavf_irq_enable(struct iavf_adapter *adapter, bool flush) +{ + struct i40e_hw *hw = &adapter->hw; + + iavf_misc_irq_enable(adapter); + iavf_irq_enable_queues(adapter, ~0); + + if (flush) + rd32(hw, I40E_VFGEN_RSTAT); +} + +/** + * iavf_msix_aq - Interrupt handler for vector 0 + * @irq: interrupt number + * @data: pointer to netdev + **/ +static irqreturn_t iavf_msix_aq(int irq, void *data) +{ + struct net_device *netdev = data; + struct iavf_adapter *adapter = netdev_priv(netdev); + struct i40e_hw *hw = &adapter->hw; + + /* handle non-queue interrupts, these reads clear the registers */ + rd32(hw, I40E_VFINT_ICR01); + rd32(hw, I40E_VFINT_ICR0_ENA1); + + /* schedule work on the private workqueue */ + schedule_work(&adapter->adminq_task); + + return IRQ_HANDLED; +} + +/** + * iavf_msix_clean_rings - MSIX mode Interrupt Handler + * @irq: interrupt number + * @data: pointer to a q_vector + **/ +static irqreturn_t iavf_msix_clean_rings(int irq, void *data) +{ + struct i40e_q_vector *q_vector = data; + + if (!q_vector->tx.ring && !q_vector->rx.ring) + return IRQ_HANDLED; + + napi_schedule_irqoff(&q_vector->napi); + + return IRQ_HANDLED; +} + +/** + * iavf_map_vector_to_rxq - associate irqs with rx queues + * @adapter: board private structure + * @v_idx: interrupt number + * @r_idx: queue number + **/ +static void +iavf_map_vector_to_rxq(struct iavf_adapter *adapter, int v_idx, int r_idx) +{ + struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx]; + struct i40e_ring *rx_ring = &adapter->rx_rings[r_idx]; + struct i40e_hw *hw = &adapter->hw; + + rx_ring->q_vector = q_vector; + rx_ring->next = q_vector->rx.ring; + rx_ring->vsi = &adapter->vsi; + q_vector->rx.ring = rx_ring; + q_vector->rx.count++; + q_vector->rx.next_update = jiffies + 1; + q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting); + q_vector->ring_mask |= BIT(r_idx); + wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, q_vector->reg_idx), + q_vector->rx.current_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; +} + +/** + * iavf_map_vector_to_txq - associate irqs with tx queues + * @adapter: board private structure + * @v_idx: interrupt number + * @t_idx: queue number + **/ +static void +iavf_map_vector_to_txq(struct iavf_adapter *adapter, int v_idx, int t_idx) +{ + struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx]; + struct i40e_ring *tx_ring = &adapter->tx_rings[t_idx]; + struct i40e_hw *hw = &adapter->hw; + + tx_ring->q_vector = q_vector; + tx_ring->next = q_vector->tx.ring; + tx_ring->vsi = &adapter->vsi; + q_vector->tx.ring = tx_ring; + q_vector->tx.count++; + q_vector->tx.next_update = jiffies + 1; + q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting); + q_vector->num_ringpairs++; + wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, q_vector->reg_idx), + q_vector->tx.target_itr); + q_vector->tx.current_itr = q_vector->tx.target_itr; +} + +/** + * iavf_map_rings_to_vectors - Maps descriptor rings to vectors + * @adapter: board private structure to initialize + * + * This function maps descriptor rings to the queue-specific vectors + * we were allotted through the MSI-X enabling code. Ideally, we'd have + * one vector per ring/queue, but on a constrained vector budget, we + * group the rings as "efficiently" as possible. You would add new + * mapping configurations in here. + **/ +static void iavf_map_rings_to_vectors(struct iavf_adapter *adapter) +{ + int rings_remaining = adapter->num_active_queues; + int ridx = 0, vidx = 0; + int q_vectors; + + q_vectors = adapter->num_msix_vectors - NONQ_VECS; + + for (; ridx < rings_remaining; ridx++) { + iavf_map_vector_to_rxq(adapter, vidx, ridx); + iavf_map_vector_to_txq(adapter, vidx, ridx); + + /* In the case where we have more queues than vectors, continue + * round-robin on vectors until all queues are mapped. + */ + if (++vidx >= q_vectors) + vidx = 0; + } + + adapter->aq_required |= IAVF_FLAG_AQ_MAP_VECTORS; +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +/** + * iavf_netpoll - A Polling 'interrupt' handler + * @netdev: network interface device structure + * + * This is used by netconsole to send skbs without having to re-enable + * interrupts. It's not called while the normal interrupt routine is executing. + **/ +static void iavf_netpoll(struct net_device *netdev) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + int q_vectors = adapter->num_msix_vectors - NONQ_VECS; + int i; + + /* if interface is down do nothing */ + if (test_bit(__I40E_VSI_DOWN, adapter->vsi.state)) + return; + + for (i = 0; i < q_vectors; i++) + iavf_msix_clean_rings(0, &adapter->q_vectors[i]); +} + +#endif +/** + * iavf_irq_affinity_notify - Callback for affinity changes + * @notify: context as to what irq was changed + * @mask: the new affinity mask + * + * This is a callback function used by the irq_set_affinity_notifier function + * so that we may register to receive changes to the irq affinity masks. + **/ +static void iavf_irq_affinity_notify(struct irq_affinity_notify *notify, + const cpumask_t *mask) +{ + struct i40e_q_vector *q_vector = + container_of(notify, struct i40e_q_vector, affinity_notify); + + cpumask_copy(&q_vector->affinity_mask, mask); +} + +/** + * iavf_irq_affinity_release - Callback for affinity notifier release + * @ref: internal core kernel usage + * + * This is a callback function used by the irq_set_affinity_notifier function + * to inform the current notification subscriber that they will no longer + * receive notifications. + **/ +static void iavf_irq_affinity_release(struct kref *ref) {} + +/** + * iavf_request_traffic_irqs - Initialize MSI-X interrupts + * @adapter: board private structure + * @basename: device basename + * + * Allocates MSI-X vectors for tx and rx handling, and requests + * interrupts from the kernel. + **/ +static int +iavf_request_traffic_irqs(struct iavf_adapter *adapter, char *basename) +{ + unsigned int vector, q_vectors; + unsigned int rx_int_idx = 0, tx_int_idx = 0; + int irq_num, err; + int cpu; + + iavf_irq_disable(adapter); + /* Decrement for Other and TCP Timer vectors */ + q_vectors = adapter->num_msix_vectors - NONQ_VECS; + + for (vector = 0; vector < q_vectors; vector++) { + struct i40e_q_vector *q_vector = &adapter->q_vectors[vector]; + + irq_num = adapter->msix_entries[vector + NONQ_VECS].vector; + + if (q_vector->tx.ring && q_vector->rx.ring) { + snprintf(q_vector->name, sizeof(q_vector->name), + "iavf-%s-TxRx-%d", basename, rx_int_idx++); + tx_int_idx++; + } else if (q_vector->rx.ring) { + snprintf(q_vector->name, sizeof(q_vector->name), + "iavf-%s-rx-%d", basename, rx_int_idx++); + } else if (q_vector->tx.ring) { + snprintf(q_vector->name, sizeof(q_vector->name), + "iavf-%s-tx-%d", basename, tx_int_idx++); + } else { + /* skip this unused q_vector */ + continue; + } + err = request_irq(irq_num, + iavf_msix_clean_rings, + 0, + q_vector->name, + q_vector); + if (err) { + dev_info(&adapter->pdev->dev, + "Request_irq failed, error: %d\n", err); + goto free_queue_irqs; + } + /* register for affinity change notifications */ + q_vector->affinity_notify.notify = iavf_irq_affinity_notify; + q_vector->affinity_notify.release = + iavf_irq_affinity_release; + irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify); + /* Spread the IRQ affinity hints across online CPUs. Note that + * get_cpu_mask returns a mask with a permanent lifetime so + * it's safe to use as a hint for irq_set_affinity_hint. + */ + cpu = cpumask_local_spread(q_vector->v_idx, -1); + irq_set_affinity_hint(irq_num, get_cpu_mask(cpu)); + } + + return 0; + +free_queue_irqs: + while (vector) { + vector--; + irq_num = adapter->msix_entries[vector + NONQ_VECS].vector; + irq_set_affinity_notifier(irq_num, NULL); + irq_set_affinity_hint(irq_num, NULL); + free_irq(irq_num, &adapter->q_vectors[vector]); + } + return err; +} + +/** + * iavf_request_misc_irq - Initialize MSI-X interrupts + * @adapter: board private structure + * + * Allocates MSI-X vector 0 and requests interrupts from the kernel. This + * vector is only for the admin queue, and stays active even when the netdev + * is closed. + **/ +static int iavf_request_misc_irq(struct iavf_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + int err; + + snprintf(adapter->misc_vector_name, + sizeof(adapter->misc_vector_name) - 1, "iavf-%s:mbx", + dev_name(&adapter->pdev->dev)); + err = request_irq(adapter->msix_entries[0].vector, + &iavf_msix_aq, 0, + adapter->misc_vector_name, netdev); + if (err) { + dev_err(&adapter->pdev->dev, + "request_irq for %s failed: %d\n", + adapter->misc_vector_name, err); + free_irq(adapter->msix_entries[0].vector, netdev); + } + return err; +} + +/** + * iavf_free_traffic_irqs - Free MSI-X interrupts + * @adapter: board private structure + * + * Frees all MSI-X vectors other than 0. + **/ +static void iavf_free_traffic_irqs(struct iavf_adapter *adapter) +{ + int vector, irq_num, q_vectors; + + if (!adapter->msix_entries) + return; + + q_vectors = adapter->num_msix_vectors - NONQ_VECS; + + for (vector = 0; vector < q_vectors; vector++) { + irq_num = adapter->msix_entries[vector + NONQ_VECS].vector; + irq_set_affinity_notifier(irq_num, NULL); + irq_set_affinity_hint(irq_num, NULL); + free_irq(irq_num, &adapter->q_vectors[vector]); + } +} + +/** + * iavf_free_misc_irq - Free MSI-X miscellaneous vector + * @adapter: board private structure + * + * Frees MSI-X vector 0. + **/ +static void iavf_free_misc_irq(struct iavf_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + + if (!adapter->msix_entries) + return; + + free_irq(adapter->msix_entries[0].vector, netdev); +} + +/** + * iavf_configure_tx - Configure Transmit Unit after Reset + * @adapter: board private structure + * + * Configure the Tx unit of the MAC after a reset. + **/ +static void iavf_configure_tx(struct iavf_adapter *adapter) +{ + struct i40e_hw *hw = &adapter->hw; + int i; + + for (i = 0; i < adapter->num_active_queues; i++) + adapter->tx_rings[i].tail = hw->hw_addr + I40E_QTX_TAIL1(i); +} + +/** + * iavf_configure_rx - Configure Receive Unit after Reset + * @adapter: board private structure + * + * Configure the Rx unit of the MAC after a reset. + **/ +static void iavf_configure_rx(struct iavf_adapter *adapter) +{ + unsigned int rx_buf_len = I40E_RXBUFFER_2048; + struct i40e_hw *hw = &adapter->hw; + int i; + + /* Legacy Rx will always default to a 2048 buffer size. */ +#if (PAGE_SIZE < 8192) + if (!(adapter->flags & IAVF_FLAG_LEGACY_RX)) { + struct net_device *netdev = adapter->netdev; + + /* For jumbo frames on systems with 4K pages we have to use + * an order 1 page, so we might as well increase the size + * of our Rx buffer to make better use of the available space + */ + rx_buf_len = I40E_RXBUFFER_3072; + + /* We use a 1536 buffer size for configurations with + * standard Ethernet mtu. On x86 this gives us enough room + * for shared info and 192 bytes of padding. + */ + if (!I40E_2K_TOO_SMALL_WITH_PADDING && + (netdev->mtu <= ETH_DATA_LEN)) + rx_buf_len = I40E_RXBUFFER_1536 - NET_IP_ALIGN; + } +#endif + + for (i = 0; i < adapter->num_active_queues; i++) { + adapter->rx_rings[i].tail = hw->hw_addr + I40E_QRX_TAIL1(i); + adapter->rx_rings[i].rx_buf_len = rx_buf_len; + + if (adapter->flags & IAVF_FLAG_LEGACY_RX) + clear_ring_build_skb_enabled(&adapter->rx_rings[i]); + else + set_ring_build_skb_enabled(&adapter->rx_rings[i]); + } +} + +/** + * iavf_find_vlan - Search filter list for specific vlan filter + * @adapter: board private structure + * @vlan: vlan tag + * + * Returns ptr to the filter object or NULL. Must be called while holding the + * mac_vlan_list_lock. + **/ +static struct +iavf_vlan_filter *iavf_find_vlan(struct iavf_adapter *adapter, u16 vlan) +{ + struct iavf_vlan_filter *f; + + list_for_each_entry(f, &adapter->vlan_filter_list, list) { + if (vlan == f->vlan) + return f; + } + return NULL; +} + +/** + * iavf_add_vlan - Add a vlan filter to the list + * @adapter: board private structure + * @vlan: VLAN tag + * + * Returns ptr to the filter object or NULL when no memory available. + **/ +static struct +iavf_vlan_filter *iavf_add_vlan(struct iavf_adapter *adapter, u16 vlan) +{ + struct iavf_vlan_filter *f = NULL; + + spin_lock_bh(&adapter->mac_vlan_list_lock); + + f = iavf_find_vlan(adapter, vlan); + if (!f) { + f = kzalloc(sizeof(*f), GFP_KERNEL); + if (!f) + goto clearout; + + f->vlan = vlan; + + INIT_LIST_HEAD(&f->list); + list_add(&f->list, &adapter->vlan_filter_list); + f->add = true; + adapter->aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER; + } + +clearout: + spin_unlock_bh(&adapter->mac_vlan_list_lock); + return f; +} + +/** + * iavf_del_vlan - Remove a vlan filter from the list + * @adapter: board private structure + * @vlan: VLAN tag + **/ +static void iavf_del_vlan(struct iavf_adapter *adapter, u16 vlan) +{ + struct iavf_vlan_filter *f; + + spin_lock_bh(&adapter->mac_vlan_list_lock); + + f = iavf_find_vlan(adapter, vlan); + if (f) { + f->remove = true; + adapter->aq_required |= IAVF_FLAG_AQ_DEL_VLAN_FILTER; + } + + spin_unlock_bh(&adapter->mac_vlan_list_lock); +} + +/** + * iavf_vlan_rx_add_vid - Add a VLAN filter to a device + * @netdev: network device struct + * @proto: unused protocol data + * @vid: VLAN tag + **/ +static int iavf_vlan_rx_add_vid(struct net_device *netdev, + __always_unused __be16 proto, u16 vid) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + + if (!VLAN_ALLOWED(adapter)) + return -EIO; + if (iavf_add_vlan(adapter, vid) == NULL) + return -ENOMEM; + return 0; +} + +/** + * iavf_vlan_rx_kill_vid - Remove a VLAN filter from a device + * @netdev: network device struct + * @proto: unused protocol data + * @vid: VLAN tag + **/ +static int iavf_vlan_rx_kill_vid(struct net_device *netdev, + __always_unused __be16 proto, u16 vid) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + + if (VLAN_ALLOWED(adapter)) { + iavf_del_vlan(adapter, vid); + return 0; + } + return -EIO; +} + +/** + * iavf_find_filter - Search filter list for specific mac filter + * @adapter: board private structure + * @macaddr: the MAC address + * + * Returns ptr to the filter object or NULL. Must be called while holding the + * mac_vlan_list_lock. + **/ +static struct +iavf_mac_filter *iavf_find_filter(struct iavf_adapter *adapter, + const u8 *macaddr) +{ + struct iavf_mac_filter *f; + + if (!macaddr) + return NULL; + + list_for_each_entry(f, &adapter->mac_filter_list, list) { + if (ether_addr_equal(macaddr, f->macaddr)) + return f; + } + return NULL; +} + +/** + * i40e_add_filter - Add a mac filter to the filter list + * @adapter: board private structure + * @macaddr: the MAC address + * + * Returns ptr to the filter object or NULL when no memory available. + **/ +static struct +iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter, + const u8 *macaddr) +{ + struct iavf_mac_filter *f; + + if (!macaddr) + return NULL; + + f = iavf_find_filter(adapter, macaddr); + if (!f) { + f = kzalloc(sizeof(*f), GFP_ATOMIC); + if (!f) + return f; + + ether_addr_copy(f->macaddr, macaddr); + + list_add_tail(&f->list, &adapter->mac_filter_list); + f->add = true; + adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER; + } else { + f->remove = false; + } + + return f; +} + +/** + * iavf_set_mac - NDO callback to set port mac address + * @netdev: network interface device structure + * @p: pointer to an address structure + * + * Returns 0 on success, negative on failure + **/ +static int iavf_set_mac(struct net_device *netdev, void *p) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + struct i40e_hw *hw = &adapter->hw; + struct iavf_mac_filter *f; + struct sockaddr *addr = p; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + if (ether_addr_equal(netdev->dev_addr, addr->sa_data)) + return 0; + + if (adapter->flags & IAVF_FLAG_ADDR_SET_BY_PF) + return -EPERM; + + spin_lock_bh(&adapter->mac_vlan_list_lock); + + f = iavf_find_filter(adapter, hw->mac.addr); + if (f) { + f->remove = true; + adapter->aq_required |= IAVF_FLAG_AQ_DEL_MAC_FILTER; + } + + f = iavf_add_filter(adapter, addr->sa_data); + + spin_unlock_bh(&adapter->mac_vlan_list_lock); + + if (f) { + ether_addr_copy(hw->mac.addr, addr->sa_data); + ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr); + } + + return (f == NULL) ? -ENOMEM : 0; +} + +/** + * iavf_addr_sync - Callback for dev_(mc|uc)_sync to add address + * @netdev: the netdevice + * @addr: address to add + * + * Called by __dev_(mc|uc)_sync when an address needs to be added. We call + * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock. + */ +static int iavf_addr_sync(struct net_device *netdev, const u8 *addr) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + + if (iavf_add_filter(adapter, addr)) + return 0; + else + return -ENOMEM; +} + +/** + * iavf_addr_unsync - Callback for dev_(mc|uc)_sync to remove address + * @netdev: the netdevice + * @addr: address to add + * + * Called by __dev_(mc|uc)_sync when an address needs to be removed. We call + * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock. + */ +static int iavf_addr_unsync(struct net_device *netdev, const u8 *addr) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + struct iavf_mac_filter *f; + + /* Under some circumstances, we might receive a request to delete + * our own device address from our uc list. Because we store the + * device address in the VSI's MAC/VLAN filter list, we need to ignore + * such requests and not delete our device address from this list. + */ + if (ether_addr_equal(addr, netdev->dev_addr)) + return 0; + + f = iavf_find_filter(adapter, addr); + if (f) { + f->remove = true; + adapter->aq_required |= IAVF_FLAG_AQ_DEL_MAC_FILTER; + } + return 0; +} + +/** + * iavf_set_rx_mode - NDO callback to set the netdev filters + * @netdev: network interface device structure + **/ +static void iavf_set_rx_mode(struct net_device *netdev) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + + spin_lock_bh(&adapter->mac_vlan_list_lock); + __dev_uc_sync(netdev, iavf_addr_sync, iavf_addr_unsync); + __dev_mc_sync(netdev, iavf_addr_sync, iavf_addr_unsync); + spin_unlock_bh(&adapter->mac_vlan_list_lock); + + if (netdev->flags & IFF_PROMISC && + !(adapter->flags & IAVF_FLAG_PROMISC_ON)) + adapter->aq_required |= IAVF_FLAG_AQ_REQUEST_PROMISC; + else if (!(netdev->flags & IFF_PROMISC) && + adapter->flags & IAVF_FLAG_PROMISC_ON) + adapter->aq_required |= IAVF_FLAG_AQ_RELEASE_PROMISC; + + if (netdev->flags & IFF_ALLMULTI && + !(adapter->flags & IAVF_FLAG_ALLMULTI_ON)) + adapter->aq_required |= IAVF_FLAG_AQ_REQUEST_ALLMULTI; + else if (!(netdev->flags & IFF_ALLMULTI) && + adapter->flags & IAVF_FLAG_ALLMULTI_ON) + adapter->aq_required |= IAVF_FLAG_AQ_RELEASE_ALLMULTI; +} + +/** + * iavf_napi_enable_all - enable NAPI on all queue vectors + * @adapter: board private structure + **/ +static void iavf_napi_enable_all(struct iavf_adapter *adapter) +{ + int q_idx; + struct i40e_q_vector *q_vector; + int q_vectors = adapter->num_msix_vectors - NONQ_VECS; + + for (q_idx = 0; q_idx < q_vectors; q_idx++) { + struct napi_struct *napi; + + q_vector = &adapter->q_vectors[q_idx]; + napi = &q_vector->napi; + napi_enable(napi); + } +} + +/** + * iavf_napi_disable_all - disable NAPI on all queue vectors + * @adapter: board private structure + **/ +static void iavf_napi_disable_all(struct iavf_adapter *adapter) +{ + int q_idx; + struct i40e_q_vector *q_vector; + int q_vectors = adapter->num_msix_vectors - NONQ_VECS; + + for (q_idx = 0; q_idx < q_vectors; q_idx++) { + q_vector = &adapter->q_vectors[q_idx]; + napi_disable(&q_vector->napi); + } +} + +/** + * iavf_configure - set up transmit and receive data structures + * @adapter: board private structure + **/ +static void iavf_configure(struct iavf_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + int i; + + iavf_set_rx_mode(netdev); + + iavf_configure_tx(adapter); + iavf_configure_rx(adapter); + adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_QUEUES; + + for (i = 0; i < adapter->num_active_queues; i++) { + struct i40e_ring *ring = &adapter->rx_rings[i]; + + iavf_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring)); + } +} + +/** + * iavf_up_complete - Finish the last steps of bringing up a connection + * @adapter: board private structure + * + * Expects to be called while holding the __IAVF_IN_CRITICAL_TASK bit lock. + **/ +static void iavf_up_complete(struct iavf_adapter *adapter) +{ + adapter->state = __IAVF_RUNNING; + clear_bit(__I40E_VSI_DOWN, adapter->vsi.state); + + iavf_napi_enable_all(adapter); + + adapter->aq_required |= IAVF_FLAG_AQ_ENABLE_QUEUES; + if (CLIENT_ENABLED(adapter)) + adapter->flags |= IAVF_FLAG_CLIENT_NEEDS_OPEN; + mod_timer_pending(&adapter->watchdog_timer, jiffies + 1); +} + +/** + * i40e_down - Shutdown the connection processing + * @adapter: board private structure + * + * Expects to be called while holding the __IAVF_IN_CRITICAL_TASK bit lock. + **/ +void iavf_down(struct iavf_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct iavf_vlan_filter *vlf; + struct iavf_mac_filter *f; + struct iavf_cloud_filter *cf; + + if (adapter->state <= __IAVF_DOWN_PENDING) + return; + + netif_carrier_off(netdev); + netif_tx_disable(netdev); + adapter->link_up = false; + iavf_napi_disable_all(adapter); + iavf_irq_disable(adapter); + + spin_lock_bh(&adapter->mac_vlan_list_lock); + + /* clear the sync flag on all filters */ + __dev_uc_unsync(adapter->netdev, NULL); + __dev_mc_unsync(adapter->netdev, NULL); + + /* remove all MAC filters */ + list_for_each_entry(f, &adapter->mac_filter_list, list) { + f->remove = true; + } + + /* remove all VLAN filters */ + list_for_each_entry(vlf, &adapter->vlan_filter_list, list) { + vlf->remove = true; + } + + spin_unlock_bh(&adapter->mac_vlan_list_lock); + + /* remove all cloud filters */ + spin_lock_bh(&adapter->cloud_filter_list_lock); + list_for_each_entry(cf, &adapter->cloud_filter_list, list) { + cf->del = true; + } + spin_unlock_bh(&adapter->cloud_filter_list_lock); + + if (!(adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) && + adapter->state != __IAVF_RESETTING) { + /* cancel any current operation */ + adapter->current_op = VIRTCHNL_OP_UNKNOWN; + /* Schedule operations to close down the HW. Don't wait + * here for this to complete. The watchdog is still running + * and it will take care of this. + */ + adapter->aq_required = IAVF_FLAG_AQ_DEL_MAC_FILTER; + adapter->aq_required |= IAVF_FLAG_AQ_DEL_VLAN_FILTER; + adapter->aq_required |= IAVF_FLAG_AQ_DEL_CLOUD_FILTER; + adapter->aq_required |= IAVF_FLAG_AQ_DISABLE_QUEUES; + } + + mod_timer_pending(&adapter->watchdog_timer, jiffies + 1); +} + +/** + * iavf_acquire_msix_vectors - Setup the MSIX capability + * @adapter: board private structure + * @vectors: number of vectors to request + * + * Work with the OS to set up the MSIX vectors needed. + * + * Returns 0 on success, negative on failure + **/ +static int +iavf_acquire_msix_vectors(struct iavf_adapter *adapter, int vectors) +{ + int err, vector_threshold; + + /* We'll want at least 3 (vector_threshold): + * 0) Other (Admin Queue and link, mostly) + * 1) TxQ[0] Cleanup + * 2) RxQ[0] Cleanup + */ + vector_threshold = MIN_MSIX_COUNT; + + /* The more we get, the more we will assign to Tx/Rx Cleanup + * for the separate queues...where Rx Cleanup >= Tx Cleanup. + * Right now, we simply care about how many we'll get; we'll + * set them up later while requesting irq's. + */ + err = pci_enable_msix_range(adapter->pdev, adapter->msix_entries, + vector_threshold, vectors); + if (err < 0) { + dev_err(&adapter->pdev->dev, "Unable to allocate MSI-X interrupts\n"); + kfree(adapter->msix_entries); + adapter->msix_entries = NULL; + return err; + } + + /* Adjust for only the vectors we'll use, which is minimum + * of max_msix_q_vectors + NONQ_VECS, or the number of + * vectors we were allocated. + */ + adapter->num_msix_vectors = err; + return 0; +} + +/** + * iavf_free_queues - Free memory for all rings + * @adapter: board private structure to initialize + * + * Free all of the memory associated with queue pairs. + **/ +static void iavf_free_queues(struct iavf_adapter *adapter) +{ + if (!adapter->vsi_res) + return; + adapter->num_active_queues = 0; + kfree(adapter->tx_rings); + adapter->tx_rings = NULL; + kfree(adapter->rx_rings); + adapter->rx_rings = NULL; +} + +/** + * iavf_alloc_queues - Allocate memory for all rings + * @adapter: board private structure to initialize + * + * We allocate one ring per queue at run-time since we don't know the + * number of queues at compile-time. The polling_netdev array is + * intended for Multiqueue, but should work fine with a single queue. + **/ +static int iavf_alloc_queues(struct iavf_adapter *adapter) +{ + int i, num_active_queues; + + /* If we're in reset reallocating queues we don't actually know yet for + * certain the PF gave us the number of queues we asked for but we'll + * assume it did. Once basic reset is finished we'll confirm once we + * start negotiating config with PF. + */ + if (adapter->num_req_queues) + num_active_queues = adapter->num_req_queues; + else if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) && + adapter->num_tc) + num_active_queues = adapter->ch_config.total_qps; + else + num_active_queues = min_t(int, + adapter->vsi_res->num_queue_pairs, + (int)(num_online_cpus())); + + + adapter->tx_rings = kcalloc(num_active_queues, + sizeof(struct i40e_ring), GFP_KERNEL); + if (!adapter->tx_rings) + goto err_out; + adapter->rx_rings = kcalloc(num_active_queues, + sizeof(struct i40e_ring), GFP_KERNEL); + if (!adapter->rx_rings) + goto err_out; + + for (i = 0; i < num_active_queues; i++) { + struct i40e_ring *tx_ring; + struct i40e_ring *rx_ring; + + tx_ring = &adapter->tx_rings[i]; + + tx_ring->queue_index = i; + tx_ring->netdev = adapter->netdev; + tx_ring->dev = &adapter->pdev->dev; + tx_ring->count = adapter->tx_desc_count; + tx_ring->itr_setting = I40E_ITR_TX_DEF; + if (adapter->flags & IAVF_FLAG_WB_ON_ITR_CAPABLE) + tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR; + + rx_ring = &adapter->rx_rings[i]; + rx_ring->queue_index = i; + rx_ring->netdev = adapter->netdev; + rx_ring->dev = &adapter->pdev->dev; + rx_ring->count = adapter->rx_desc_count; + rx_ring->itr_setting = I40E_ITR_RX_DEF; + } + + adapter->num_active_queues = num_active_queues; + + return 0; + +err_out: + iavf_free_queues(adapter); + return -ENOMEM; +} + +/** + * iavf_set_interrupt_capability - set MSI-X or FAIL if not supported + * @adapter: board private structure to initialize + * + * Attempt to configure the interrupts using the best available + * capabilities of the hardware and the kernel. + **/ +static int iavf_set_interrupt_capability(struct iavf_adapter *adapter) +{ + int vector, v_budget; + int pairs = 0; + int err = 0; + + if (!adapter->vsi_res) { + err = -EIO; + goto out; + } + pairs = adapter->num_active_queues; + + /* It's easy to be greedy for MSI-X vectors, but it really doesn't do + * us much good if we have more vectors than CPUs. However, we already + * limit the total number of queues by the number of CPUs so we do not + * need any further limiting here. + */ + v_budget = min_t(int, pairs + NONQ_VECS, + (int)adapter->vf_res->max_vectors); + + adapter->msix_entries = kcalloc(v_budget, + sizeof(struct msix_entry), GFP_KERNEL); + if (!adapter->msix_entries) { + err = -ENOMEM; + goto out; + } + + for (vector = 0; vector < v_budget; vector++) + adapter->msix_entries[vector].entry = vector; + + err = iavf_acquire_msix_vectors(adapter, v_budget); + +out: + netif_set_real_num_rx_queues(adapter->netdev, pairs); + netif_set_real_num_tx_queues(adapter->netdev, pairs); + return err; +} + +/** + * i40e_config_rss_aq - Configure RSS keys and lut by using AQ commands + * @adapter: board private structure + * + * Return 0 on success, negative on failure + **/ +static int iavf_config_rss_aq(struct iavf_adapter *adapter) +{ + struct i40e_aqc_get_set_rss_key_data *rss_key = + (struct i40e_aqc_get_set_rss_key_data *)adapter->rss_key; + struct i40e_hw *hw = &adapter->hw; + int ret = 0; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot configure RSS, command %d pending\n", + adapter->current_op); + return -EBUSY; + } + + ret = iavf_aq_set_rss_key(hw, adapter->vsi.id, rss_key); + if (ret) { + dev_err(&adapter->pdev->dev, "Cannot set RSS key, err %s aq_err %s\n", + iavf_stat_str(hw, ret), + iavf_aq_str(hw, hw->aq.asq_last_status)); + return ret; + + } + + ret = iavf_aq_set_rss_lut(hw, adapter->vsi.id, false, + adapter->rss_lut, adapter->rss_lut_size); + if (ret) { + dev_err(&adapter->pdev->dev, "Cannot set RSS lut, err %s aq_err %s\n", + iavf_stat_str(hw, ret), + iavf_aq_str(hw, hw->aq.asq_last_status)); + } + + return ret; + +} + +/** + * iavf_config_rss_reg - Configure RSS keys and lut by writing registers + * @adapter: board private structure + * + * Returns 0 on success, negative on failure + **/ +static int iavf_config_rss_reg(struct iavf_adapter *adapter) +{ + struct i40e_hw *hw = &adapter->hw; + u32 *dw; + u16 i; + + dw = (u32 *)adapter->rss_key; + for (i = 0; i <= adapter->rss_key_size / 4; i++) + wr32(hw, I40E_VFQF_HKEY(i), dw[i]); + + dw = (u32 *)adapter->rss_lut; + for (i = 0; i <= adapter->rss_lut_size / 4; i++) + wr32(hw, I40E_VFQF_HLUT(i), dw[i]); + + i40e_flush(hw); + + return 0; +} + +/** + * iavf_config_rss - Configure RSS keys and lut + * @adapter: board private structure + * + * Returns 0 on success, negative on failure + **/ +int iavf_config_rss(struct iavf_adapter *adapter) +{ + + if (RSS_PF(adapter)) { + adapter->aq_required |= IAVF_FLAG_AQ_SET_RSS_LUT | + IAVF_FLAG_AQ_SET_RSS_KEY; + return 0; + } else if (RSS_AQ(adapter)) { + return iavf_config_rss_aq(adapter); + } else { + return iavf_config_rss_reg(adapter); + } +} + +/** + * iavf_fill_rss_lut - Fill the lut with default values + * @adapter: board private structure + **/ +static void iavf_fill_rss_lut(struct iavf_adapter *adapter) +{ + u16 i; + + for (i = 0; i < adapter->rss_lut_size; i++) + adapter->rss_lut[i] = i % adapter->num_active_queues; +} + +/** + * iavf_init_rss - Prepare for RSS + * @adapter: board private structure + * + * Return 0 on success, negative on failure + **/ +static int iavf_init_rss(struct iavf_adapter *adapter) +{ + struct i40e_hw *hw = &adapter->hw; + int ret; + + if (!RSS_PF(adapter)) { + /* Enable PCTYPES for RSS, TCP/UDP with IPv4/IPv6 */ + if (adapter->vf_res->vf_cap_flags & + VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2) + adapter->hena = I40E_DEFAULT_RSS_HENA_EXPANDED; + else + adapter->hena = I40E_DEFAULT_RSS_HENA; + + wr32(hw, I40E_VFQF_HENA(0), (u32)adapter->hena); + wr32(hw, I40E_VFQF_HENA(1), (u32)(adapter->hena >> 32)); + } + + iavf_fill_rss_lut(adapter); + + netdev_rss_key_fill((void *)adapter->rss_key, adapter->rss_key_size); + ret = iavf_config_rss(adapter); + + return ret; +} + +/** + * iavf_alloc_q_vectors - Allocate memory for interrupt vectors + * @adapter: board private structure to initialize + * + * We allocate one q_vector per queue interrupt. If allocation fails we + * return -ENOMEM. + **/ +static int iavf_alloc_q_vectors(struct iavf_adapter *adapter) +{ + int q_idx = 0, num_q_vectors; + struct i40e_q_vector *q_vector; + + num_q_vectors = adapter->num_msix_vectors - NONQ_VECS; + adapter->q_vectors = kcalloc(num_q_vectors, sizeof(*q_vector), + GFP_KERNEL); + if (!adapter->q_vectors) + return -ENOMEM; + + for (q_idx = 0; q_idx < num_q_vectors; q_idx++) { + q_vector = &adapter->q_vectors[q_idx]; + q_vector->adapter = adapter; + q_vector->vsi = &adapter->vsi; + q_vector->v_idx = q_idx; + q_vector->reg_idx = q_idx; + cpumask_copy(&q_vector->affinity_mask, cpu_possible_mask); + netif_napi_add(adapter->netdev, &q_vector->napi, + iavf_napi_poll, NAPI_POLL_WEIGHT); + } + + return 0; +} + +/** + * iavf_free_q_vectors - Free memory allocated for interrupt vectors + * @adapter: board private structure to initialize + * + * This function frees the memory allocated to the q_vectors. In addition if + * NAPI is enabled it will delete any references to the NAPI struct prior + * to freeing the q_vector. + **/ +static void iavf_free_q_vectors(struct iavf_adapter *adapter) +{ + int q_idx, num_q_vectors; + int napi_vectors; + + if (!adapter->q_vectors) + return; + + num_q_vectors = adapter->num_msix_vectors - NONQ_VECS; + napi_vectors = adapter->num_active_queues; + + for (q_idx = 0; q_idx < num_q_vectors; q_idx++) { + struct i40e_q_vector *q_vector = &adapter->q_vectors[q_idx]; + + if (q_idx < napi_vectors) + netif_napi_del(&q_vector->napi); + } + kfree(adapter->q_vectors); + adapter->q_vectors = NULL; +} + +/** + * iavf_reset_interrupt_capability - Reset MSIX setup + * @adapter: board private structure + * + **/ +void iavf_reset_interrupt_capability(struct iavf_adapter *adapter) +{ + if (!adapter->msix_entries) + return; + + pci_disable_msix(adapter->pdev); + kfree(adapter->msix_entries); + adapter->msix_entries = NULL; +} + +/** + * iavf_init_interrupt_scheme - Determine if MSIX is supported and init + * @adapter: board private structure to initialize + * + **/ +int iavf_init_interrupt_scheme(struct iavf_adapter *adapter) +{ + int err; + + err = iavf_alloc_queues(adapter); + if (err) { + dev_err(&adapter->pdev->dev, + "Unable to allocate memory for queues\n"); + goto err_alloc_queues; + } + + rtnl_lock(); + err = iavf_set_interrupt_capability(adapter); + rtnl_unlock(); + if (err) { + dev_err(&adapter->pdev->dev, + "Unable to setup interrupt capabilities\n"); + goto err_set_interrupt; + } + + err = iavf_alloc_q_vectors(adapter); + if (err) { + dev_err(&adapter->pdev->dev, + "Unable to allocate memory for queue vectors\n"); + goto err_alloc_q_vectors; + } + + /* If we've made it so far while ADq flag being ON, then we haven't + * bailed out anywhere in middle. And ADq isn't just enabled but actual + * resources have been allocated in the reset path. + * Now we can truly claim that ADq is enabled. + */ + if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) && + adapter->num_tc) + dev_info(&adapter->pdev->dev, "ADq Enabled, %u TCs created", + adapter->num_tc); + + dev_info(&adapter->pdev->dev, "Multiqueue %s: Queue pair count = %u", + (adapter->num_active_queues > 1) ? "Enabled" : "Disabled", + adapter->num_active_queues); + + return 0; +err_alloc_q_vectors: + iavf_reset_interrupt_capability(adapter); +err_set_interrupt: + iavf_free_queues(adapter); +err_alloc_queues: + return err; +} + +/** + * iavf_free_rss - Free memory used by RSS structs + * @adapter: board private structure + **/ +static void iavf_free_rss(struct iavf_adapter *adapter) +{ + kfree(adapter->rss_key); + adapter->rss_key = NULL; + + kfree(adapter->rss_lut); + adapter->rss_lut = NULL; +} + +/** + * iavf_reinit_interrupt_scheme - Reallocate queues and vectors + * @adapter: board private structure + * + * Returns 0 on success, negative on failure + **/ +static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + int err; + + if (netif_running(netdev)) + iavf_free_traffic_irqs(adapter); + iavf_free_misc_irq(adapter); + iavf_reset_interrupt_capability(adapter); + iavf_free_q_vectors(adapter); + iavf_free_queues(adapter); + + err = iavf_init_interrupt_scheme(adapter); + if (err) + goto err; + + netif_tx_stop_all_queues(netdev); + + err = iavf_request_misc_irq(adapter); + if (err) + goto err; + + set_bit(__I40E_VSI_DOWN, adapter->vsi.state); + + iavf_map_rings_to_vectors(adapter); + + if (RSS_AQ(adapter)) + adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_RSS; + else + err = iavf_init_rss(adapter); +err: + return err; +} + +/** + * iavf_watchdog_timer - Periodic call-back timer + * @data: pointer to adapter disguised as unsigned long + **/ +static void iavf_watchdog_timer(struct timer_list *t) +{ + struct iavf_adapter *adapter = from_timer(adapter, t, + watchdog_timer); + + schedule_work(&adapter->watchdog_task); + /* timer will be rescheduled in watchdog task */ +} + +/** + * iavf_watchdog_task - Periodic call-back task + * @work: pointer to work_struct + **/ +static void iavf_watchdog_task(struct work_struct *work) +{ + struct iavf_adapter *adapter = container_of(work, + struct iavf_adapter, + watchdog_task); + struct i40e_hw *hw = &adapter->hw; + u32 reg_val; + + if (test_and_set_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section)) + goto restart_watchdog; + + if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) { + reg_val = rd32(hw, I40E_VFGEN_RSTAT) & + I40E_VFGEN_RSTAT_VFR_STATE_MASK; + if ((reg_val == VIRTCHNL_VFR_VFACTIVE) || + (reg_val == VIRTCHNL_VFR_COMPLETED)) { + /* A chance for redemption! */ + dev_err(&adapter->pdev->dev, "Hardware came out of reset. Attempting reinit.\n"); + adapter->state = __IAVF_STARTUP; + adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED; + schedule_delayed_work(&adapter->init_task, 10); + clear_bit(__IAVF_IN_CRITICAL_TASK, + &adapter->crit_section); + /* Don't reschedule the watchdog, since we've restarted + * the init task. When init_task contacts the PF and + * gets everything set up again, it'll restart the + * watchdog for us. Down, boy. Sit. Stay. Woof. + */ + return; + } + adapter->aq_required = 0; + adapter->current_op = VIRTCHNL_OP_UNKNOWN; + goto watchdog_done; + } + + if ((adapter->state < __IAVF_DOWN) || + (adapter->flags & IAVF_FLAG_RESET_PENDING)) + goto watchdog_done; + + /* check for reset */ + reg_val = rd32(hw, I40E_VF_ARQLEN1) & I40E_VF_ARQLEN1_ARQENABLE_MASK; + if (!(adapter->flags & IAVF_FLAG_RESET_PENDING) && !reg_val) { + adapter->state = __IAVF_RESETTING; + adapter->flags |= IAVF_FLAG_RESET_PENDING; + dev_err(&adapter->pdev->dev, "Hardware reset detected\n"); + schedule_work(&adapter->reset_task); + adapter->aq_required = 0; + adapter->current_op = VIRTCHNL_OP_UNKNOWN; + goto watchdog_done; + } + + /* Process admin queue tasks. After init, everything gets done + * here so we don't race on the admin queue. + */ + if (adapter->current_op) { + if (!iavf_asq_done(hw)) { + dev_dbg(&adapter->pdev->dev, "Admin queue timeout\n"); + iavf_send_api_ver(adapter); + } + goto watchdog_done; + } + if (adapter->aq_required & IAVF_FLAG_AQ_GET_CONFIG) { + iavf_send_vf_config_msg(adapter); + goto watchdog_done; + } + + if (adapter->aq_required & IAVF_FLAG_AQ_DISABLE_QUEUES) { + iavf_disable_queues(adapter); + goto watchdog_done; + } + + if (adapter->aq_required & IAVF_FLAG_AQ_MAP_VECTORS) { + iavf_map_queues(adapter); + goto watchdog_done; + } + + if (adapter->aq_required & IAVF_FLAG_AQ_ADD_MAC_FILTER) { + iavf_add_ether_addrs(adapter); + goto watchdog_done; + } + + if (adapter->aq_required & IAVF_FLAG_AQ_ADD_VLAN_FILTER) { + iavf_add_vlans(adapter); + goto watchdog_done; + } + + if (adapter->aq_required & IAVF_FLAG_AQ_DEL_MAC_FILTER) { + iavf_del_ether_addrs(adapter); + goto watchdog_done; + } + + if (adapter->aq_required & IAVF_FLAG_AQ_DEL_VLAN_FILTER) { + iavf_del_vlans(adapter); + goto watchdog_done; + } + + if (adapter->aq_required & IAVF_FLAG_AQ_ENABLE_VLAN_STRIPPING) { + iavf_enable_vlan_stripping(adapter); + goto watchdog_done; + } + + if (adapter->aq_required & IAVF_FLAG_AQ_DISABLE_VLAN_STRIPPING) { + iavf_disable_vlan_stripping(adapter); + goto watchdog_done; + } + + if (adapter->aq_required & IAVF_FLAG_AQ_CONFIGURE_QUEUES) { + iavf_configure_queues(adapter); + goto watchdog_done; + } + + if (adapter->aq_required & IAVF_FLAG_AQ_ENABLE_QUEUES) { + iavf_enable_queues(adapter); + goto watchdog_done; + } + + if (adapter->aq_required & IAVF_FLAG_AQ_CONFIGURE_RSS) { + /* This message goes straight to the firmware, not the + * PF, so we don't have to set current_op as we will + * not get a response through the ARQ. + */ + iavf_init_rss(adapter); + adapter->aq_required &= ~IAVF_FLAG_AQ_CONFIGURE_RSS; + goto watchdog_done; + } + if (adapter->aq_required & IAVF_FLAG_AQ_GET_HENA) { + iavf_get_hena(adapter); + goto watchdog_done; + } + if (adapter->aq_required & IAVF_FLAG_AQ_SET_HENA) { + iavf_set_hena(adapter); + goto watchdog_done; + } + if (adapter->aq_required & IAVF_FLAG_AQ_SET_RSS_KEY) { + iavf_set_rss_key(adapter); + goto watchdog_done; + } + if (adapter->aq_required & IAVF_FLAG_AQ_SET_RSS_LUT) { + iavf_set_rss_lut(adapter); + goto watchdog_done; + } + + if (adapter->aq_required & IAVF_FLAG_AQ_REQUEST_PROMISC) { + iavf_set_promiscuous(adapter, FLAG_VF_UNICAST_PROMISC | + FLAG_VF_MULTICAST_PROMISC); + goto watchdog_done; + } + + if (adapter->aq_required & IAVF_FLAG_AQ_REQUEST_ALLMULTI) { + iavf_set_promiscuous(adapter, FLAG_VF_MULTICAST_PROMISC); + goto watchdog_done; + } + + if ((adapter->aq_required & IAVF_FLAG_AQ_RELEASE_PROMISC) && + (adapter->aq_required & IAVF_FLAG_AQ_RELEASE_ALLMULTI)) { + iavf_set_promiscuous(adapter, 0); + goto watchdog_done; + } + + if (adapter->aq_required & IAVF_FLAG_AQ_ENABLE_CHANNELS) { + iavf_enable_channels(adapter); + goto watchdog_done; + } + + if (adapter->aq_required & IAVF_FLAG_AQ_DISABLE_CHANNELS) { + iavf_disable_channels(adapter); + goto watchdog_done; + } + + if (adapter->aq_required & IAVF_FLAG_AQ_ADD_CLOUD_FILTER) { + iavf_add_cloud_filter(adapter); + goto watchdog_done; + } + + if (adapter->aq_required & IAVF_FLAG_AQ_DEL_CLOUD_FILTER) { + iavf_del_cloud_filter(adapter); + goto watchdog_done; + } + + schedule_delayed_work(&adapter->client_task, msecs_to_jiffies(5)); + + if (adapter->state == __IAVF_RUNNING) + iavf_request_stats(adapter); +watchdog_done: + if (adapter->state == __IAVF_RUNNING) + iavf_detect_recover_hung(&adapter->vsi); + clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); +restart_watchdog: + if (adapter->state == __IAVF_REMOVE) + return; + if (adapter->aq_required) + mod_timer(&adapter->watchdog_timer, + jiffies + msecs_to_jiffies(20)); + else + mod_timer(&adapter->watchdog_timer, jiffies + (HZ * 2)); + schedule_work(&adapter->adminq_task); +} + +static void iavf_disable_vf(struct iavf_adapter *adapter) +{ + struct iavf_mac_filter *f, *ftmp; + struct iavf_vlan_filter *fv, *fvtmp; + struct iavf_cloud_filter *cf, *cftmp; + + adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED; + + /* We don't use netif_running() because it may be true prior to + * ndo_open() returning, so we can't assume it means all our open + * tasks have finished, since we're not holding the rtnl_lock here. + */ + if (adapter->state == __IAVF_RUNNING) { + set_bit(__I40E_VSI_DOWN, adapter->vsi.state); + netif_carrier_off(adapter->netdev); + netif_tx_disable(adapter->netdev); + adapter->link_up = false; + iavf_napi_disable_all(adapter); + iavf_irq_disable(adapter); + iavf_free_traffic_irqs(adapter); + iavf_free_all_tx_resources(adapter); + iavf_free_all_rx_resources(adapter); + } + + spin_lock_bh(&adapter->mac_vlan_list_lock); + + /* Delete all of the filters */ + list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) { + list_del(&f->list); + kfree(f); + } + + list_for_each_entry_safe(fv, fvtmp, &adapter->vlan_filter_list, list) { + list_del(&fv->list); + kfree(fv); + } + + spin_unlock_bh(&adapter->mac_vlan_list_lock); + + spin_lock_bh(&adapter->cloud_filter_list_lock); + list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) { + list_del(&cf->list); + kfree(cf); + adapter->num_cloud_filters--; + } + spin_unlock_bh(&adapter->cloud_filter_list_lock); + + iavf_free_misc_irq(adapter); + iavf_reset_interrupt_capability(adapter); + iavf_free_queues(adapter); + iavf_free_q_vectors(adapter); + kfree(adapter->vf_res); + iavf_shutdown_adminq(&adapter->hw); + adapter->netdev->flags &= ~IFF_UP; + clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); + adapter->flags &= ~IAVF_FLAG_RESET_PENDING; + adapter->state = __IAVF_DOWN; + wake_up(&adapter->down_waitqueue); + dev_info(&adapter->pdev->dev, "Reset task did not complete, VF disabled\n"); +} + +#define IAVF_RESET_WAIT_MS 10 +#define IAVF_RESET_WAIT_COUNT 500 +/** + * iavf_reset_task - Call-back task to handle hardware reset + * @work: pointer to work_struct + * + * During reset we need to shut down and reinitialize the admin queue + * before we can use it to communicate with the PF again. We also clear + * and reinit the rings because that context is lost as well. + **/ +static void iavf_reset_task(struct work_struct *work) +{ + struct iavf_adapter *adapter = container_of(work, + struct iavf_adapter, + reset_task); + struct virtchnl_vf_resource *vfres = adapter->vf_res; + struct net_device *netdev = adapter->netdev; + struct i40e_hw *hw = &adapter->hw; + struct iavf_vlan_filter *vlf; + struct iavf_cloud_filter *cf; + struct iavf_mac_filter *f; + u32 reg_val; + int i = 0, err; + bool running; + + /* When device is being removed it doesn't make sense to run the reset + * task, just return in such a case. + */ + if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) + return; + + while (test_and_set_bit(__IAVF_IN_CLIENT_TASK, + &adapter->crit_section)) + usleep_range(500, 1000); + if (CLIENT_ENABLED(adapter)) { + adapter->flags &= ~(IAVF_FLAG_CLIENT_NEEDS_OPEN | + IAVF_FLAG_CLIENT_NEEDS_CLOSE | + IAVF_FLAG_CLIENT_NEEDS_L2_PARAMS | + IAVF_FLAG_SERVICE_CLIENT_REQUESTED); + cancel_delayed_work_sync(&adapter->client_task); + iavf_notify_client_close(&adapter->vsi, true); + } + iavf_misc_irq_disable(adapter); + if (adapter->flags & IAVF_FLAG_RESET_NEEDED) { + adapter->flags &= ~IAVF_FLAG_RESET_NEEDED; + /* Restart the AQ here. If we have been reset but didn't + * detect it, or if the PF had to reinit, our AQ will be hosed. + */ + iavf_shutdown_adminq(hw); + iavf_init_adminq(hw); + iavf_request_reset(adapter); + } + adapter->flags |= IAVF_FLAG_RESET_PENDING; + + /* poll until we see the reset actually happen */ + for (i = 0; i < IAVF_RESET_WAIT_COUNT; i++) { + reg_val = rd32(hw, I40E_VF_ARQLEN1) & + I40E_VF_ARQLEN1_ARQENABLE_MASK; + if (!reg_val) + break; + usleep_range(5000, 10000); + } + if (i == IAVF_RESET_WAIT_COUNT) { + dev_info(&adapter->pdev->dev, "Never saw reset\n"); + goto continue_reset; /* act like the reset happened */ + } + + /* wait until the reset is complete and the PF is responding to us */ + for (i = 0; i < IAVF_RESET_WAIT_COUNT; i++) { + /* sleep first to make sure a minimum wait time is met */ + msleep(IAVF_RESET_WAIT_MS); + + reg_val = rd32(hw, I40E_VFGEN_RSTAT) & + I40E_VFGEN_RSTAT_VFR_STATE_MASK; + if (reg_val == VIRTCHNL_VFR_VFACTIVE) + break; + } + + pci_set_master(adapter->pdev); + + if (i == IAVF_RESET_WAIT_COUNT) { + dev_err(&adapter->pdev->dev, "Reset never finished (%x)\n", + reg_val); + iavf_disable_vf(adapter); + clear_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section); + return; /* Do not attempt to reinit. It's dead, Jim. */ + } + +continue_reset: + /* We don't use netif_running() because it may be true prior to + * ndo_open() returning, so we can't assume it means all our open + * tasks have finished, since we're not holding the rtnl_lock here. + */ + running = ((adapter->state == __IAVF_RUNNING) || + (adapter->state == __IAVF_RESETTING)); + + if (running) { + netif_carrier_off(netdev); + netif_tx_stop_all_queues(netdev); + adapter->link_up = false; + iavf_napi_disable_all(adapter); + } + iavf_irq_disable(adapter); + + adapter->state = __IAVF_RESETTING; + adapter->flags &= ~IAVF_FLAG_RESET_PENDING; + + /* free the Tx/Rx rings and descriptors, might be better to just + * re-use them sometime in the future + */ + iavf_free_all_rx_resources(adapter); + iavf_free_all_tx_resources(adapter); + + adapter->flags |= IAVF_FLAG_QUEUES_DISABLED; + /* kill and reinit the admin queue */ + iavf_shutdown_adminq(hw); + adapter->current_op = VIRTCHNL_OP_UNKNOWN; + err = iavf_init_adminq(hw); + if (err) + dev_info(&adapter->pdev->dev, "Failed to init adminq: %d\n", + err); + adapter->aq_required = 0; + + if (adapter->flags & IAVF_FLAG_REINIT_ITR_NEEDED) { + err = iavf_reinit_interrupt_scheme(adapter); + if (err) + goto reset_err; + } + + adapter->aq_required |= IAVF_FLAG_AQ_GET_CONFIG; + adapter->aq_required |= IAVF_FLAG_AQ_MAP_VECTORS; + + spin_lock_bh(&adapter->mac_vlan_list_lock); + + /* re-add all MAC filters */ + list_for_each_entry(f, &adapter->mac_filter_list, list) { + f->add = true; + } + /* re-add all VLAN filters */ + list_for_each_entry(vlf, &adapter->vlan_filter_list, list) { + vlf->add = true; + } + + spin_unlock_bh(&adapter->mac_vlan_list_lock); + + /* check if TCs are running and re-add all cloud filters */ + spin_lock_bh(&adapter->cloud_filter_list_lock); + if ((vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) && + adapter->num_tc) { + list_for_each_entry(cf, &adapter->cloud_filter_list, list) { + cf->add = true; + } + } + spin_unlock_bh(&adapter->cloud_filter_list_lock); + + adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER; + adapter->aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER; + adapter->aq_required |= IAVF_FLAG_AQ_ADD_CLOUD_FILTER; + iavf_misc_irq_enable(adapter); + + mod_timer(&adapter->watchdog_timer, jiffies + 2); + + /* We were running when the reset started, so we need to restore some + * state here. + */ + if (running) { + /* allocate transmit descriptors */ + err = iavf_setup_all_tx_resources(adapter); + if (err) + goto reset_err; + + /* allocate receive descriptors */ + err = iavf_setup_all_rx_resources(adapter); + if (err) + goto reset_err; + + if (adapter->flags & IAVF_FLAG_REINIT_ITR_NEEDED) { + err = iavf_request_traffic_irqs(adapter, netdev->name); + if (err) + goto reset_err; + + adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; + } + + iavf_configure(adapter); + + iavf_up_complete(adapter); + + iavf_irq_enable(adapter, true); + } else { + adapter->state = __IAVF_DOWN; + wake_up(&adapter->down_waitqueue); + } + clear_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section); + clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); + + return; +reset_err: + clear_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section); + clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); + dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n"); + iavf_close(netdev); +} + +/** + * iavf_adminq_task - worker thread to clean the admin queue + * @work: pointer to work_struct containing our data + **/ +static void iavf_adminq_task(struct work_struct *work) +{ + struct iavf_adapter *adapter = + container_of(work, struct iavf_adapter, adminq_task); + struct i40e_hw *hw = &adapter->hw; + struct i40e_arq_event_info event; + enum virtchnl_ops v_op; + iavf_status ret, v_ret; + u32 val, oldval; + u16 pending; + + if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) + goto out; + + event.buf_len = IAVF_MAX_AQ_BUF_SIZE; + event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL); + if (!event.msg_buf) + goto out; + + do { + ret = iavf_clean_arq_element(hw, &event, &pending); + v_op = (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high); + v_ret = (iavf_status)le32_to_cpu(event.desc.cookie_low); + + if (ret || !v_op) + break; /* No event to process or error cleaning ARQ */ + + iavf_virtchnl_completion(adapter, v_op, v_ret, event.msg_buf, + event.msg_len); + if (pending != 0) + memset(event.msg_buf, 0, IAVF_MAX_AQ_BUF_SIZE); + } while (pending); + + if ((adapter->flags & + (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) || + adapter->state == __IAVF_RESETTING) + goto freedom; + + /* check for error indications */ + val = rd32(hw, hw->aq.arq.len); + if (val == 0xdeadbeef) /* indicates device in reset */ + goto freedom; + oldval = val; + if (val & I40E_VF_ARQLEN1_ARQVFE_MASK) { + dev_info(&adapter->pdev->dev, "ARQ VF Error detected\n"); + val &= ~I40E_VF_ARQLEN1_ARQVFE_MASK; + } + if (val & I40E_VF_ARQLEN1_ARQOVFL_MASK) { + dev_info(&adapter->pdev->dev, "ARQ Overflow Error detected\n"); + val &= ~I40E_VF_ARQLEN1_ARQOVFL_MASK; + } + if (val & I40E_VF_ARQLEN1_ARQCRIT_MASK) { + dev_info(&adapter->pdev->dev, "ARQ Critical Error detected\n"); + val &= ~I40E_VF_ARQLEN1_ARQCRIT_MASK; + } + if (oldval != val) + wr32(hw, hw->aq.arq.len, val); + + val = rd32(hw, hw->aq.asq.len); + oldval = val; + if (val & I40E_VF_ATQLEN1_ATQVFE_MASK) { + dev_info(&adapter->pdev->dev, "ASQ VF Error detected\n"); + val &= ~I40E_VF_ATQLEN1_ATQVFE_MASK; + } + if (val & I40E_VF_ATQLEN1_ATQOVFL_MASK) { + dev_info(&adapter->pdev->dev, "ASQ Overflow Error detected\n"); + val &= ~I40E_VF_ATQLEN1_ATQOVFL_MASK; + } + if (val & I40E_VF_ATQLEN1_ATQCRIT_MASK) { + dev_info(&adapter->pdev->dev, "ASQ Critical Error detected\n"); + val &= ~I40E_VF_ATQLEN1_ATQCRIT_MASK; + } + if (oldval != val) + wr32(hw, hw->aq.asq.len, val); + +freedom: + kfree(event.msg_buf); +out: + /* re-enable Admin queue interrupt cause */ + iavf_misc_irq_enable(adapter); +} + +/** + * iavf_client_task - worker thread to perform client work + * @work: pointer to work_struct containing our data + * + * This task handles client interactions. Because client calls can be + * reentrant, we can't handle them in the watchdog. + **/ +static void iavf_client_task(struct work_struct *work) +{ + struct iavf_adapter *adapter = + container_of(work, struct iavf_adapter, client_task.work); + + /* If we can't get the client bit, just give up. We'll be rescheduled + * later. + */ + + if (test_and_set_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section)) + return; + + if (adapter->flags & IAVF_FLAG_SERVICE_CLIENT_REQUESTED) { + iavf_client_subtask(adapter); + adapter->flags &= ~IAVF_FLAG_SERVICE_CLIENT_REQUESTED; + goto out; + } + if (adapter->flags & IAVF_FLAG_CLIENT_NEEDS_L2_PARAMS) { + iavf_notify_client_l2_params(&adapter->vsi); + adapter->flags &= ~IAVF_FLAG_CLIENT_NEEDS_L2_PARAMS; + goto out; + } + if (adapter->flags & IAVF_FLAG_CLIENT_NEEDS_CLOSE) { + iavf_notify_client_close(&adapter->vsi, false); + adapter->flags &= ~IAVF_FLAG_CLIENT_NEEDS_CLOSE; + goto out; + } + if (adapter->flags & IAVF_FLAG_CLIENT_NEEDS_OPEN) { + iavf_notify_client_open(&adapter->vsi); + adapter->flags &= ~IAVF_FLAG_CLIENT_NEEDS_OPEN; + } +out: + clear_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section); +} + +/** + * iavf_free_all_tx_resources - Free Tx Resources for All Queues + * @adapter: board private structure + * + * Free all transmit software resources + **/ +void iavf_free_all_tx_resources(struct iavf_adapter *adapter) +{ + int i; + + if (!adapter->tx_rings) + return; + + for (i = 0; i < adapter->num_active_queues; i++) + if (adapter->tx_rings[i].desc) + iavf_free_tx_resources(&adapter->tx_rings[i]); +} + +/** + * iavf_setup_all_tx_resources - allocate all queues Tx resources + * @adapter: board private structure + * + * If this function returns with an error, then it's possible one or + * more of the rings is populated (while the rest are not). It is the + * callers duty to clean those orphaned rings. + * + * Return 0 on success, negative on failure + **/ +static int iavf_setup_all_tx_resources(struct iavf_adapter *adapter) +{ + int i, err = 0; + + for (i = 0; i < adapter->num_active_queues; i++) { + adapter->tx_rings[i].count = adapter->tx_desc_count; + err = iavf_setup_tx_descriptors(&adapter->tx_rings[i]); + if (!err) + continue; + dev_err(&adapter->pdev->dev, + "Allocation for Tx Queue %u failed\n", i); + break; + } + + return err; +} + +/** + * iavf_setup_all_rx_resources - allocate all queues Rx resources + * @adapter: board private structure + * + * If this function returns with an error, then it's possible one or + * more of the rings is populated (while the rest are not). It is the + * callers duty to clean those orphaned rings. + * + * Return 0 on success, negative on failure + **/ +static int iavf_setup_all_rx_resources(struct iavf_adapter *adapter) +{ + int i, err = 0; + + for (i = 0; i < adapter->num_active_queues; i++) { + adapter->rx_rings[i].count = adapter->rx_desc_count; + err = iavf_setup_rx_descriptors(&adapter->rx_rings[i]); + if (!err) + continue; + dev_err(&adapter->pdev->dev, + "Allocation for Rx Queue %u failed\n", i); + break; + } + return err; +} + +/** + * iavf_free_all_rx_resources - Free Rx Resources for All Queues + * @adapter: board private structure + * + * Free all receive software resources + **/ +void iavf_free_all_rx_resources(struct iavf_adapter *adapter) +{ + int i; + + if (!adapter->rx_rings) + return; + + for (i = 0; i < adapter->num_active_queues; i++) + if (adapter->rx_rings[i].desc) + iavf_free_rx_resources(&adapter->rx_rings[i]); +} + +/** + * iavf_validate_tx_bandwidth - validate the max Tx bandwidth + * @adapter: board private structure + * @max_tx_rate: max Tx bw for a tc + **/ +static int iavf_validate_tx_bandwidth(struct iavf_adapter *adapter, + u64 max_tx_rate) +{ + int speed = 0, ret = 0; + + switch (adapter->link_speed) { + case I40E_LINK_SPEED_40GB: + speed = 40000; + break; + case I40E_LINK_SPEED_25GB: + speed = 25000; + break; + case I40E_LINK_SPEED_20GB: + speed = 20000; + break; + case I40E_LINK_SPEED_10GB: + speed = 10000; + break; + case I40E_LINK_SPEED_1GB: + speed = 1000; + break; + case I40E_LINK_SPEED_100MB: + speed = 100; + break; + default: + break; + } + + if (max_tx_rate > speed) { + dev_err(&adapter->pdev->dev, + "Invalid tx rate specified\n"); + ret = -EINVAL; + } + + return ret; +} + +/** + * iavf_validate_channel_config - validate queue mapping info + * @adapter: board private structure + * @mqprio_qopt: queue parameters + * + * This function validates if the config provided by the user to + * configure queue channels is valid or not. Returns 0 on a valid + * config. + **/ +static int iavf_validate_ch_config(struct iavf_adapter *adapter, + struct tc_mqprio_qopt_offload *mqprio_qopt) +{ + u64 total_max_rate = 0; + int i, num_qps = 0; + u64 tx_rate = 0; + int ret = 0; + + if (mqprio_qopt->qopt.num_tc > IAVF_MAX_TRAFFIC_CLASS || + mqprio_qopt->qopt.num_tc < 1) + return -EINVAL; + + for (i = 0; i <= mqprio_qopt->qopt.num_tc - 1; i++) { + if (!mqprio_qopt->qopt.count[i] || + mqprio_qopt->qopt.offset[i] != num_qps) + return -EINVAL; + if (mqprio_qopt->min_rate[i]) { + dev_err(&adapter->pdev->dev, + "Invalid min tx rate (greater than 0) specified\n"); + return -EINVAL; + } + /*convert to Mbps */ + tx_rate = div_u64(mqprio_qopt->max_rate[i], + IAVF_MBPS_DIVISOR); + total_max_rate += tx_rate; + num_qps += mqprio_qopt->qopt.count[i]; + } + if (num_qps > IAVF_MAX_REQ_QUEUES) + return -EINVAL; + + ret = iavf_validate_tx_bandwidth(adapter, total_max_rate); + return ret; +} + +/** + * iavf_del_all_cloud_filters - delete all cloud filters + * on the traffic classes + **/ +static void iavf_del_all_cloud_filters(struct iavf_adapter *adapter) +{ + struct iavf_cloud_filter *cf, *cftmp; + + spin_lock_bh(&adapter->cloud_filter_list_lock); + list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, + list) { + list_del(&cf->list); + kfree(cf); + adapter->num_cloud_filters--; + } + spin_unlock_bh(&adapter->cloud_filter_list_lock); +} + +/** + * __iavf_setup_tc - configure multiple traffic classes + * @netdev: network interface device structure + * @type_date: tc offload data + * + * This function processes the config information provided by the + * user to configure traffic classes/queue channels and packages the + * information to request the PF to setup traffic classes. + * + * Returns 0 on success. + **/ +static int __iavf_setup_tc(struct net_device *netdev, void *type_data) +{ + struct tc_mqprio_qopt_offload *mqprio_qopt = type_data; + struct iavf_adapter *adapter = netdev_priv(netdev); + struct virtchnl_vf_resource *vfres = adapter->vf_res; + u8 num_tc = 0, total_qps = 0; + int ret = 0, netdev_tc = 0; + u64 max_tx_rate; + u16 mode; + int i; + + num_tc = mqprio_qopt->qopt.num_tc; + mode = mqprio_qopt->mode; + + /* delete queue_channel */ + if (!mqprio_qopt->qopt.hw) { + if (adapter->ch_config.state == __IAVF_TC_RUNNING) { + /* reset the tc configuration */ + netdev_reset_tc(netdev); + adapter->num_tc = 0; + netif_tx_stop_all_queues(netdev); + netif_tx_disable(netdev); + iavf_del_all_cloud_filters(adapter); + adapter->aq_required = IAVF_FLAG_AQ_DISABLE_CHANNELS; + goto exit; + } else { + return -EINVAL; + } + } + + /* add queue channel */ + if (mode == TC_MQPRIO_MODE_CHANNEL) { + if (!(vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ)) { + dev_err(&adapter->pdev->dev, "ADq not supported\n"); + return -EOPNOTSUPP; + } + if (adapter->ch_config.state != __IAVF_TC_INVALID) { + dev_err(&adapter->pdev->dev, "TC configuration already exists\n"); + return -EINVAL; + } + + ret = iavf_validate_ch_config(adapter, mqprio_qopt); + if (ret) + return ret; + /* Return if same TC config is requested */ + if (adapter->num_tc == num_tc) + return 0; + adapter->num_tc = num_tc; + + for (i = 0; i < IAVF_MAX_TRAFFIC_CLASS; i++) { + if (i < num_tc) { + adapter->ch_config.ch_info[i].count = + mqprio_qopt->qopt.count[i]; + adapter->ch_config.ch_info[i].offset = + mqprio_qopt->qopt.offset[i]; + total_qps += mqprio_qopt->qopt.count[i]; + max_tx_rate = mqprio_qopt->max_rate[i]; + /* convert to Mbps */ + max_tx_rate = div_u64(max_tx_rate, + IAVF_MBPS_DIVISOR); + adapter->ch_config.ch_info[i].max_tx_rate = + max_tx_rate; + } else { + adapter->ch_config.ch_info[i].count = 1; + adapter->ch_config.ch_info[i].offset = 0; + } + } + adapter->ch_config.total_qps = total_qps; + netif_tx_stop_all_queues(netdev); + netif_tx_disable(netdev); + adapter->aq_required |= IAVF_FLAG_AQ_ENABLE_CHANNELS; + netdev_reset_tc(netdev); + /* Report the tc mapping up the stack */ + netdev_set_num_tc(adapter->netdev, num_tc); + for (i = 0; i < IAVF_MAX_TRAFFIC_CLASS; i++) { + u16 qcount = mqprio_qopt->qopt.count[i]; + u16 qoffset = mqprio_qopt->qopt.offset[i]; + + if (i < num_tc) + netdev_set_tc_queue(netdev, netdev_tc++, qcount, + qoffset); + } + } +exit: + return ret; +} + +/** + * iavf_parse_cls_flower - Parse tc flower filters provided by kernel + * @adapter: board private structure + * @cls_flower: pointer to struct tc_cls_flower_offload + * @filter: pointer to cloud filter structure + */ +static int iavf_parse_cls_flower(struct iavf_adapter *adapter, + struct tc_cls_flower_offload *f, + struct iavf_cloud_filter *filter) +{ + u16 n_proto_mask = 0; + u16 n_proto_key = 0; + u8 field_flags = 0; + u16 addr_type = 0; + u16 n_proto = 0; + int i = 0; + struct virtchnl_filter *vf = &filter->f; + + if (f->dissector->used_keys & + ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_BASIC) | + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_VLAN) | + BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_PORTS) | + BIT(FLOW_DISSECTOR_KEY_ENC_KEYID))) { + dev_err(&adapter->pdev->dev, "Unsupported key used: 0x%x\n", + f->dissector->used_keys); + return -EOPNOTSUPP; + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_dissector_key_keyid *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID, + f->mask); + + if (mask->keyid != 0) + field_flags |= IAVF_CLOUD_FIELD_TEN_ID; + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_dissector_key_basic *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_BASIC, + f->key); + + struct flow_dissector_key_basic *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_BASIC, + f->mask); + n_proto_key = ntohs(key->n_proto); + n_proto_mask = ntohs(mask->n_proto); + + if (n_proto_key == ETH_P_ALL) { + n_proto_key = 0; + n_proto_mask = 0; + } + n_proto = n_proto_key & n_proto_mask; + if (n_proto != ETH_P_IP && n_proto != ETH_P_IPV6) + return -EINVAL; + if (n_proto == ETH_P_IPV6) { + /* specify flow type as TCP IPv6 */ + vf->flow_type = VIRTCHNL_TCP_V6_FLOW; + } + + if (key->ip_proto != IPPROTO_TCP) { + dev_info(&adapter->pdev->dev, "Only TCP transport is supported\n"); + return -EINVAL; + } + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_dissector_key_eth_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ETH_ADDRS, + f->key); + + struct flow_dissector_key_eth_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ETH_ADDRS, + f->mask); + /* use is_broadcast and is_zero to check for all 0xf or 0 */ + if (!is_zero_ether_addr(mask->dst)) { + if (is_broadcast_ether_addr(mask->dst)) { + field_flags |= IAVF_CLOUD_FIELD_OMAC; + } else { + dev_err(&adapter->pdev->dev, "Bad ether dest mask %pM\n", + mask->dst); + return I40E_ERR_CONFIG; + } + } + + if (!is_zero_ether_addr(mask->src)) { + if (is_broadcast_ether_addr(mask->src)) { + field_flags |= IAVF_CLOUD_FIELD_IMAC; + } else { + dev_err(&adapter->pdev->dev, "Bad ether src mask %pM\n", + mask->src); + return I40E_ERR_CONFIG; + } + } + + if (!is_zero_ether_addr(key->dst)) + if (is_valid_ether_addr(key->dst) || + is_multicast_ether_addr(key->dst)) { + /* set the mask if a valid dst_mac address */ + for (i = 0; i < ETH_ALEN; i++) + vf->mask.tcp_spec.dst_mac[i] |= 0xff; + ether_addr_copy(vf->data.tcp_spec.dst_mac, + key->dst); + } + + if (!is_zero_ether_addr(key->src)) + if (is_valid_ether_addr(key->src) || + is_multicast_ether_addr(key->src)) { + /* set the mask if a valid dst_mac address */ + for (i = 0; i < ETH_ALEN; i++) + vf->mask.tcp_spec.src_mac[i] |= 0xff; + ether_addr_copy(vf->data.tcp_spec.src_mac, + key->src); + } + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_dissector_key_vlan *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->key); + struct flow_dissector_key_vlan *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->mask); + + if (mask->vlan_id) { + if (mask->vlan_id == VLAN_VID_MASK) { + field_flags |= IAVF_CLOUD_FIELD_IVLAN; + } else { + dev_err(&adapter->pdev->dev, "Bad vlan mask %u\n", + mask->vlan_id); + return I40E_ERR_CONFIG; + } + } + vf->mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff); + vf->data.tcp_spec.vlan_id = cpu_to_be16(key->vlan_id); + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_dissector_key_control *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_CONTROL, + f->key); + + addr_type = key->addr_type; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_dissector_key_ipv4_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, + f->key); + struct flow_dissector_key_ipv4_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, + f->mask); + + if (mask->dst) { + if (mask->dst == cpu_to_be32(0xffffffff)) { + field_flags |= IAVF_CLOUD_FIELD_IIP; + } else { + dev_err(&adapter->pdev->dev, "Bad ip dst mask 0x%08x\n", + be32_to_cpu(mask->dst)); + return I40E_ERR_CONFIG; + } + } + + if (mask->src) { + if (mask->src == cpu_to_be32(0xffffffff)) { + field_flags |= IAVF_CLOUD_FIELD_IIP; + } else { + dev_err(&adapter->pdev->dev, "Bad ip src mask 0x%08x\n", + be32_to_cpu(mask->dst)); + return I40E_ERR_CONFIG; + } + } + + if (field_flags & IAVF_CLOUD_FIELD_TEN_ID) { + dev_info(&adapter->pdev->dev, "Tenant id not allowed for ip filter\n"); + return I40E_ERR_CONFIG; + } + if (key->dst) { + vf->mask.tcp_spec.dst_ip[0] |= cpu_to_be32(0xffffffff); + vf->data.tcp_spec.dst_ip[0] = key->dst; + } + if (key->src) { + vf->mask.tcp_spec.src_ip[0] |= cpu_to_be32(0xffffffff); + vf->data.tcp_spec.src_ip[0] = key->src; + } + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_dissector_key_ipv6_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, + f->key); + struct flow_dissector_key_ipv6_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, + f->mask); + + /* validate mask, make sure it is not IPV6_ADDR_ANY */ + if (ipv6_addr_any(&mask->dst)) { + dev_err(&adapter->pdev->dev, "Bad ipv6 dst mask 0x%02x\n", + IPV6_ADDR_ANY); + return I40E_ERR_CONFIG; + } + + /* src and dest IPv6 address should not be LOOPBACK + * (0:0:0:0:0:0:0:1) which can be represented as ::1 + */ + if (ipv6_addr_loopback(&key->dst) || + ipv6_addr_loopback(&key->src)) { + dev_err(&adapter->pdev->dev, + "ipv6 addr should not be loopback\n"); + return I40E_ERR_CONFIG; + } + if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src)) + field_flags |= IAVF_CLOUD_FIELD_IIP; + + for (i = 0; i < 4; i++) + vf->mask.tcp_spec.dst_ip[i] |= cpu_to_be32(0xffffffff); + memcpy(&vf->data.tcp_spec.dst_ip, &key->dst.s6_addr32, + sizeof(vf->data.tcp_spec.dst_ip)); + for (i = 0; i < 4; i++) + vf->mask.tcp_spec.src_ip[i] |= cpu_to_be32(0xffffffff); + memcpy(&vf->data.tcp_spec.src_ip, &key->src.s6_addr32, + sizeof(vf->data.tcp_spec.src_ip)); + } + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_dissector_key_ports *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_PORTS, + f->key); + struct flow_dissector_key_ports *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_PORTS, + f->mask); + + if (mask->src) { + if (mask->src == cpu_to_be16(0xffff)) { + field_flags |= IAVF_CLOUD_FIELD_IIP; + } else { + dev_err(&adapter->pdev->dev, "Bad src port mask %u\n", + be16_to_cpu(mask->src)); + return I40E_ERR_CONFIG; + } + } + + if (mask->dst) { + if (mask->dst == cpu_to_be16(0xffff)) { + field_flags |= IAVF_CLOUD_FIELD_IIP; + } else { + dev_err(&adapter->pdev->dev, "Bad dst port mask %u\n", + be16_to_cpu(mask->dst)); + return I40E_ERR_CONFIG; + } + } + if (key->dst) { + vf->mask.tcp_spec.dst_port |= cpu_to_be16(0xffff); + vf->data.tcp_spec.dst_port = key->dst; + } + + if (key->src) { + vf->mask.tcp_spec.src_port |= cpu_to_be16(0xffff); + vf->data.tcp_spec.src_port = key->src; + } + } + vf->field_flags = field_flags; + + return 0; +} + +/** + * iavf_handle_tclass - Forward to a traffic class on the device + * @adapter: board private structure + * @tc: traffic class index on the device + * @filter: pointer to cloud filter structure + */ +static int iavf_handle_tclass(struct iavf_adapter *adapter, u32 tc, + struct iavf_cloud_filter *filter) +{ + if (tc == 0) + return 0; + if (tc < adapter->num_tc) { + if (!filter->f.data.tcp_spec.dst_port) { + dev_err(&adapter->pdev->dev, + "Specify destination port to redirect to traffic class other than TC0\n"); + return -EINVAL; + } + } + /* redirect to a traffic class on the same device */ + filter->f.action = VIRTCHNL_ACTION_TC_REDIRECT; + filter->f.action_meta = tc; + return 0; +} + +/** + * iavf_configure_clsflower - Add tc flower filters + * @adapter: board private structure + * @cls_flower: Pointer to struct tc_cls_flower_offload + */ +static int iavf_configure_clsflower(struct iavf_adapter *adapter, + struct tc_cls_flower_offload *cls_flower) +{ + int tc = tc_classid_to_hwtc(adapter->netdev, cls_flower->classid); + struct iavf_cloud_filter *filter = NULL; + int err = -EINVAL, count = 50; + + if (tc < 0) { + dev_err(&adapter->pdev->dev, "Invalid traffic class\n"); + return -EINVAL; + } + + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) + return -ENOMEM; + + while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK, + &adapter->crit_section)) { + if (--count == 0) + goto err; + udelay(1); + } + + filter->cookie = cls_flower->cookie; + + /* set the mask to all zeroes to begin with */ + memset(&filter->f.mask.tcp_spec, 0, sizeof(struct virtchnl_l4_spec)); + /* start out with flow type and eth type IPv4 to begin with */ + filter->f.flow_type = VIRTCHNL_TCP_V4_FLOW; + err = iavf_parse_cls_flower(adapter, cls_flower, filter); + if (err < 0) + goto err; + + err = iavf_handle_tclass(adapter, tc, filter); + if (err < 0) + goto err; + + /* add filter to the list */ + spin_lock_bh(&adapter->cloud_filter_list_lock); + list_add_tail(&filter->list, &adapter->cloud_filter_list); + adapter->num_cloud_filters++; + filter->add = true; + adapter->aq_required |= IAVF_FLAG_AQ_ADD_CLOUD_FILTER; + spin_unlock_bh(&adapter->cloud_filter_list_lock); +err: + if (err) + kfree(filter); + + clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); + return err; +} + +/* iavf_find_cf - Find the cloud filter in the list + * @adapter: Board private structure + * @cookie: filter specific cookie + * + * Returns ptr to the filter object or NULL. Must be called while holding the + * cloud_filter_list_lock. + */ +static struct iavf_cloud_filter *iavf_find_cf(struct iavf_adapter *adapter, + unsigned long *cookie) +{ + struct iavf_cloud_filter *filter = NULL; + + if (!cookie) + return NULL; + + list_for_each_entry(filter, &adapter->cloud_filter_list, list) { + if (!memcmp(cookie, &filter->cookie, sizeof(filter->cookie))) + return filter; + } + return NULL; +} + +/** + * iavf_delete_clsflower - Remove tc flower filters + * @adapter: board private structure + * @cls_flower: Pointer to struct tc_cls_flower_offload + */ +static int iavf_delete_clsflower(struct iavf_adapter *adapter, + struct tc_cls_flower_offload *cls_flower) +{ + struct iavf_cloud_filter *filter = NULL; + int err = 0; + + spin_lock_bh(&adapter->cloud_filter_list_lock); + filter = iavf_find_cf(adapter, &cls_flower->cookie); + if (filter) { + filter->del = true; + adapter->aq_required |= IAVF_FLAG_AQ_DEL_CLOUD_FILTER; + } else { + err = -EINVAL; + } + spin_unlock_bh(&adapter->cloud_filter_list_lock); + + return err; +} + +/** + * iavf_setup_tc_cls_flower - flower classifier offloads + * @netdev: net device to configure + * @type_data: offload data + */ +static int iavf_setup_tc_cls_flower(struct iavf_adapter *adapter, + struct tc_cls_flower_offload *cls_flower) +{ + if (cls_flower->common.chain_index) + return -EOPNOTSUPP; + + switch (cls_flower->command) { + case TC_CLSFLOWER_REPLACE: + return iavf_configure_clsflower(adapter, cls_flower); + case TC_CLSFLOWER_DESTROY: + return iavf_delete_clsflower(adapter, cls_flower); + case TC_CLSFLOWER_STATS: + return -EOPNOTSUPP; + default: + return -EOPNOTSUPP; + } +} + +/** + * iavf_setup_tc_block_cb - block callback for tc + * @type: type of offload + * @type_data: offload data + * @cb_priv: + * + * This function is the block callback for traffic classes + **/ +static int iavf_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + switch (type) { + case TC_SETUP_CLSFLOWER: + return iavf_setup_tc_cls_flower(cb_priv, type_data); + default: + return -EOPNOTSUPP; + } +} + +/** + * iavf_setup_tc_block - register callbacks for tc + * @netdev: network interface device structure + * @f: tc offload data + * + * This function registers block callbacks for tc + * offloads + **/ +static int iavf_setup_tc_block(struct net_device *dev, + struct tc_block_offload *f) +{ + struct iavf_adapter *adapter = netdev_priv(dev); + + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, iavf_setup_tc_block_cb, + adapter, adapter, f->extack); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, iavf_setup_tc_block_cb, + adapter); + return 0; + default: + return -EOPNOTSUPP; + } +} + +/** + * iavf_setup_tc - configure multiple traffic classes + * @netdev: network interface device structure + * @type: type of offload + * @type_date: tc offload data + * + * This function is the callback to ndo_setup_tc in the + * netdev_ops. + * + * Returns 0 on success + **/ +static int iavf_setup_tc(struct net_device *netdev, enum tc_setup_type type, + void *type_data) +{ + switch (type) { + case TC_SETUP_QDISC_MQPRIO: + return __iavf_setup_tc(netdev, type_data); + case TC_SETUP_BLOCK: + return iavf_setup_tc_block(netdev, type_data); + default: + return -EOPNOTSUPP; + } +} + +/** + * iavf_open - Called when a network interface is made active + * @netdev: network interface device structure + * + * Returns 0 on success, negative value on failure + * + * The open entry point is called when a network interface is made + * active by the system (IFF_UP). At this point all resources needed + * for transmit and receive operations are allocated, the interrupt + * handler is registered with the OS, the watchdog timer is started, + * and the stack is notified that the interface is ready. + **/ +static int iavf_open(struct net_device *netdev) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + int err; + + if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) { + dev_err(&adapter->pdev->dev, "Unable to open device due to PF driver failure.\n"); + return -EIO; + } + + while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK, + &adapter->crit_section)) + usleep_range(500, 1000); + + if (adapter->state != __IAVF_DOWN) { + err = -EBUSY; + goto err_unlock; + } + + /* allocate transmit descriptors */ + err = iavf_setup_all_tx_resources(adapter); + if (err) + goto err_setup_tx; + + /* allocate receive descriptors */ + err = iavf_setup_all_rx_resources(adapter); + if (err) + goto err_setup_rx; + + /* clear any pending interrupts, may auto mask */ + err = iavf_request_traffic_irqs(adapter, netdev->name); + if (err) + goto err_req_irq; + + spin_lock_bh(&adapter->mac_vlan_list_lock); + + iavf_add_filter(adapter, adapter->hw.mac.addr); + + spin_unlock_bh(&adapter->mac_vlan_list_lock); + + iavf_configure(adapter); + + iavf_up_complete(adapter); + + iavf_irq_enable(adapter, true); + + clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); + + return 0; + +err_req_irq: + iavf_down(adapter); + iavf_free_traffic_irqs(adapter); +err_setup_rx: + iavf_free_all_rx_resources(adapter); +err_setup_tx: + iavf_free_all_tx_resources(adapter); +err_unlock: + clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); + + return err; +} + +/** + * iavf_close - Disables a network interface + * @netdev: network interface device structure + * + * Returns 0, this is not allowed to fail + * + * The close entry point is called when an interface is de-activated + * by the OS. The hardware is still under the drivers control, but + * needs to be disabled. All IRQs except vector 0 (reserved for admin queue) + * are freed, along with all transmit and receive resources. + **/ +static int iavf_close(struct net_device *netdev) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + int status; + + if (adapter->state <= __IAVF_DOWN_PENDING) + return 0; + + while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK, + &adapter->crit_section)) + usleep_range(500, 1000); + + set_bit(__I40E_VSI_DOWN, adapter->vsi.state); + if (CLIENT_ENABLED(adapter)) + adapter->flags |= IAVF_FLAG_CLIENT_NEEDS_CLOSE; + + iavf_down(adapter); + adapter->state = __IAVF_DOWN_PENDING; + iavf_free_traffic_irqs(adapter); + + clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); + + /* We explicitly don't free resources here because the hardware is + * still active and can DMA into memory. Resources are cleared in + * iavf_virtchnl_completion() after we get confirmation from the PF + * driver that the rings have been stopped. + * + * Also, we wait for state to transition to __IAVF_DOWN before + * returning. State change occurs in iavf_virtchnl_completion() after + * VF resources are released (which occurs after PF driver processes and + * responds to admin queue commands). + */ + + status = wait_event_timeout(adapter->down_waitqueue, + adapter->state == __IAVF_DOWN, + msecs_to_jiffies(200)); + if (!status) + netdev_warn(netdev, "Device resources not yet released\n"); + return 0; +} + +/** + * iavf_change_mtu - Change the Maximum Transfer Unit + * @netdev: network interface device structure + * @new_mtu: new value for maximum frame size + * + * Returns 0 on success, negative on failure + **/ +static int iavf_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + + netdev->mtu = new_mtu; + if (CLIENT_ENABLED(adapter)) { + iavf_notify_client_l2_params(&adapter->vsi); + adapter->flags |= IAVF_FLAG_SERVICE_CLIENT_REQUESTED; + } + adapter->flags |= IAVF_FLAG_RESET_NEEDED; + schedule_work(&adapter->reset_task); + + return 0; +} + +/** + * i40e_set_features - set the netdev feature flags + * @netdev: ptr to the netdev being adjusted + * @features: the feature set that the stack is suggesting + * Note: expects to be called while under rtnl_lock() + **/ +static int iavf_set_features(struct net_device *netdev, + netdev_features_t features) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + + /* Don't allow changing VLAN_RX flag when adapter is not capable + * of VLAN offload + */ + if (!VLAN_ALLOWED(adapter)) { + if ((netdev->features ^ features) & NETIF_F_HW_VLAN_CTAG_RX) + return -EINVAL; + } else if ((netdev->features ^ features) & NETIF_F_HW_VLAN_CTAG_RX) { + if (features & NETIF_F_HW_VLAN_CTAG_RX) + adapter->aq_required |= + IAVF_FLAG_AQ_ENABLE_VLAN_STRIPPING; + else + adapter->aq_required |= + IAVF_FLAG_AQ_DISABLE_VLAN_STRIPPING; + } + + return 0; +} + +/** + * iavf_features_check - Validate encapsulated packet conforms to limits + * @skb: skb buff + * @dev: This physical port's netdev + * @features: Offload features that the stack believes apply + **/ +static netdev_features_t iavf_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + size_t len; + + /* No point in doing any of this if neither checksum nor GSO are + * being requested for this frame. We can rule out both by just + * checking for CHECKSUM_PARTIAL + */ + if (skb->ip_summed != CHECKSUM_PARTIAL) + return features; + + /* We cannot support GSO if the MSS is going to be less than + * 64 bytes. If it is then we need to drop support for GSO. + */ + if (skb_is_gso(skb) && (skb_shinfo(skb)->gso_size < 64)) + features &= ~NETIF_F_GSO_MASK; + + /* MACLEN can support at most 63 words */ + len = skb_network_header(skb) - skb->data; + if (len & ~(63 * 2)) + goto out_err; + + /* IPLEN and EIPLEN can support at most 127 dwords */ + len = skb_transport_header(skb) - skb_network_header(skb); + if (len & ~(127 * 4)) + goto out_err; + + if (skb->encapsulation) { + /* L4TUNLEN can support 127 words */ + len = skb_inner_network_header(skb) - skb_transport_header(skb); + if (len & ~(127 * 2)) + goto out_err; + + /* IPLEN can support at most 127 dwords */ + len = skb_inner_transport_header(skb) - + skb_inner_network_header(skb); + if (len & ~(127 * 4)) + goto out_err; + } + + /* No need to validate L4LEN as TCP is the only protocol with a + * a flexible value and we support all possible values supported + * by TCP, which is at most 15 dwords + */ + + return features; +out_err: + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); +} + +/** + * iavf_fix_features - fix up the netdev feature bits + * @netdev: our net device + * @features: desired feature bits + * + * Returns fixed-up features bits + **/ +static netdev_features_t iavf_fix_features(struct net_device *netdev, + netdev_features_t features) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + + if (!(adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)) + features &= ~(NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER); + + return features; +} + +static const struct net_device_ops iavf_netdev_ops = { + .ndo_open = iavf_open, + .ndo_stop = iavf_close, + .ndo_start_xmit = iavf_xmit_frame, + .ndo_set_rx_mode = iavf_set_rx_mode, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = iavf_set_mac, + .ndo_change_mtu = iavf_change_mtu, + .ndo_tx_timeout = iavf_tx_timeout, + .ndo_vlan_rx_add_vid = iavf_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = iavf_vlan_rx_kill_vid, + .ndo_features_check = iavf_features_check, + .ndo_fix_features = iavf_fix_features, + .ndo_set_features = iavf_set_features, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = iavf_netpoll, +#endif + .ndo_setup_tc = iavf_setup_tc, +}; + +/** + * iavf_check_reset_complete - check that VF reset is complete + * @hw: pointer to hw struct + * + * Returns 0 if device is ready to use, or -EBUSY if it's in reset. + **/ +static int iavf_check_reset_complete(struct i40e_hw *hw) +{ + u32 rstat; + int i; + + for (i = 0; i < 100; i++) { + rstat = rd32(hw, I40E_VFGEN_RSTAT) & + I40E_VFGEN_RSTAT_VFR_STATE_MASK; + if ((rstat == VIRTCHNL_VFR_VFACTIVE) || + (rstat == VIRTCHNL_VFR_COMPLETED)) + return 0; + usleep_range(10, 20); + } + return -EBUSY; +} + +/** + * iavf_process_config - Process the config information we got from the PF + * @adapter: board private structure + * + * Verify that we have a valid config struct, and set up our netdev features + * and our VSI struct. + **/ +int iavf_process_config(struct iavf_adapter *adapter) +{ + struct virtchnl_vf_resource *vfres = adapter->vf_res; + int i, num_req_queues = adapter->num_req_queues; + struct net_device *netdev = adapter->netdev; + struct i40e_vsi *vsi = &adapter->vsi; + netdev_features_t hw_enc_features; + netdev_features_t hw_features; + + /* got VF config message back from PF, now we can parse it */ + for (i = 0; i < vfres->num_vsis; i++) { + if (vfres->vsi_res[i].vsi_type == VIRTCHNL_VSI_SRIOV) + adapter->vsi_res = &vfres->vsi_res[i]; + } + if (!adapter->vsi_res) { + dev_err(&adapter->pdev->dev, "No LAN VSI found\n"); + return -ENODEV; + } + + if (num_req_queues && + num_req_queues != adapter->vsi_res->num_queue_pairs) { + /* Problem. The PF gave us fewer queues than what we had + * negotiated in our request. Need a reset to see if we can't + * get back to a working state. + */ + dev_err(&adapter->pdev->dev, + "Requested %d queues, but PF only gave us %d.\n", + num_req_queues, + adapter->vsi_res->num_queue_pairs); + adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED; + adapter->num_req_queues = adapter->vsi_res->num_queue_pairs; + iavf_schedule_reset(adapter); + return -ENODEV; + } + adapter->num_req_queues = 0; + + hw_enc_features = NETIF_F_SG | + NETIF_F_IP_CSUM | + NETIF_F_IPV6_CSUM | + NETIF_F_HIGHDMA | + NETIF_F_SOFT_FEATURES | + NETIF_F_TSO | + NETIF_F_TSO_ECN | + NETIF_F_TSO6 | + NETIF_F_SCTP_CRC | + NETIF_F_RXHASH | + NETIF_F_RXCSUM | + 0; + + /* advertise to stack only if offloads for encapsulated packets is + * supported + */ + if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ENCAP) { + hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM | + NETIF_F_GSO_IPXIP4 | + NETIF_F_GSO_IPXIP6 | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL | + 0; + + if (!(vfres->vf_cap_flags & + VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM)) + netdev->gso_partial_features |= + NETIF_F_GSO_UDP_TUNNEL_CSUM; + + netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM; + netdev->hw_enc_features |= NETIF_F_TSO_MANGLEID; + netdev->hw_enc_features |= hw_enc_features; + } + /* record features VLANs can make use of */ + netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID; + + /* Write features and hw_features separately to avoid polluting + * with, or dropping, features that are set when we registered. + */ + hw_features = hw_enc_features; + + /* Enable VLAN features if supported */ + if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN) + hw_features |= (NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX); + /* Enable cloud filter if ADQ is supported */ + if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) + hw_features |= NETIF_F_HW_TC; + + netdev->hw_features |= hw_features; + + netdev->features |= hw_features; + + if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN) + netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; + + netdev->priv_flags |= IFF_UNICAST_FLT; + + /* Do not turn on offloads when they are requested to be turned off. + * TSO needs minimum 576 bytes to work correctly. + */ + if (netdev->wanted_features) { + if (!(netdev->wanted_features & NETIF_F_TSO) || + netdev->mtu < 576) + netdev->features &= ~NETIF_F_TSO; + if (!(netdev->wanted_features & NETIF_F_TSO6) || + netdev->mtu < 576) + netdev->features &= ~NETIF_F_TSO6; + if (!(netdev->wanted_features & NETIF_F_TSO_ECN)) + netdev->features &= ~NETIF_F_TSO_ECN; + if (!(netdev->wanted_features & NETIF_F_GRO)) + netdev->features &= ~NETIF_F_GRO; + if (!(netdev->wanted_features & NETIF_F_GSO)) + netdev->features &= ~NETIF_F_GSO; + } + + adapter->vsi.id = adapter->vsi_res->vsi_id; + + adapter->vsi.back = adapter; + adapter->vsi.base_vector = 1; + adapter->vsi.work_limit = I40E_DEFAULT_IRQ_WORK; + vsi->netdev = adapter->netdev; + vsi->qs_handle = adapter->vsi_res->qset_handle; + if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF) { + adapter->rss_key_size = vfres->rss_key_size; + adapter->rss_lut_size = vfres->rss_lut_size; + } else { + adapter->rss_key_size = IAVF_HKEY_ARRAY_SIZE; + adapter->rss_lut_size = IAVF_HLUT_ARRAY_SIZE; + } + + return 0; +} + +/** + * iavf_init_task - worker thread to perform delayed initialization + * @work: pointer to work_struct containing our data + * + * This task completes the work that was begun in probe. Due to the nature + * of VF-PF communications, we may need to wait tens of milliseconds to get + * responses back from the PF. Rather than busy-wait in probe and bog down the + * whole system, we'll do it in a task so we can sleep. + * This task only runs during driver init. Once we've established + * communications with the PF driver and set up our netdev, the watchdog + * takes over. + **/ +static void iavf_init_task(struct work_struct *work) +{ + struct iavf_adapter *adapter = container_of(work, + struct iavf_adapter, + init_task.work); + struct net_device *netdev = adapter->netdev; + struct i40e_hw *hw = &adapter->hw; + struct pci_dev *pdev = adapter->pdev; + int err, bufsz; + + switch (adapter->state) { + case __IAVF_STARTUP: + /* driver loaded, probe complete */ + adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED; + adapter->flags &= ~IAVF_FLAG_RESET_PENDING; + err = i40e_set_mac_type(hw); + if (err) { + dev_err(&pdev->dev, "Failed to set MAC type (%d)\n", + err); + goto err; + } + err = iavf_check_reset_complete(hw); + if (err) { + dev_info(&pdev->dev, "Device is still in reset (%d), retrying\n", + err); + goto err; + } + hw->aq.num_arq_entries = IAVF_AQ_LEN; + hw->aq.num_asq_entries = IAVF_AQ_LEN; + hw->aq.arq_buf_size = IAVF_MAX_AQ_BUF_SIZE; + hw->aq.asq_buf_size = IAVF_MAX_AQ_BUF_SIZE; + + err = iavf_init_adminq(hw); + if (err) { + dev_err(&pdev->dev, "Failed to init Admin Queue (%d)\n", + err); + goto err; + } + err = iavf_send_api_ver(adapter); + if (err) { + dev_err(&pdev->dev, "Unable to send to PF (%d)\n", err); + iavf_shutdown_adminq(hw); + goto err; + } + adapter->state = __IAVF_INIT_VERSION_CHECK; + goto restart; + case __IAVF_INIT_VERSION_CHECK: + if (!iavf_asq_done(hw)) { + dev_err(&pdev->dev, "Admin queue command never completed\n"); + iavf_shutdown_adminq(hw); + adapter->state = __IAVF_STARTUP; + goto err; + } + + /* aq msg sent, awaiting reply */ + err = iavf_verify_api_ver(adapter); + if (err) { + if (err == I40E_ERR_ADMIN_QUEUE_NO_WORK) + err = iavf_send_api_ver(adapter); + else + dev_err(&pdev->dev, "Unsupported PF API version %d.%d, expected %d.%d\n", + adapter->pf_version.major, + adapter->pf_version.minor, + VIRTCHNL_VERSION_MAJOR, + VIRTCHNL_VERSION_MINOR); + goto err; + } + err = iavf_send_vf_config_msg(adapter); + if (err) { + dev_err(&pdev->dev, "Unable to send config request (%d)\n", + err); + goto err; + } + adapter->state = __IAVF_INIT_GET_RESOURCES; + goto restart; + case __IAVF_INIT_GET_RESOURCES: + /* aq msg sent, awaiting reply */ + if (!adapter->vf_res) { + bufsz = sizeof(struct virtchnl_vf_resource) + + (I40E_MAX_VF_VSI * + sizeof(struct virtchnl_vsi_resource)); + adapter->vf_res = kzalloc(bufsz, GFP_KERNEL); + if (!adapter->vf_res) + goto err; + } + err = iavf_get_vf_config(adapter); + if (err == I40E_ERR_ADMIN_QUEUE_NO_WORK) { + err = iavf_send_vf_config_msg(adapter); + goto err; + } else if (err == I40E_ERR_PARAM) { + /* We only get ERR_PARAM if the device is in a very bad + * state or if we've been disabled for previous bad + * behavior. Either way, we're done now. + */ + iavf_shutdown_adminq(hw); + dev_err(&pdev->dev, "Unable to get VF config due to PF error condition, not retrying\n"); + return; + } + if (err) { + dev_err(&pdev->dev, "Unable to get VF config (%d)\n", + err); + goto err_alloc; + } + adapter->state = __IAVF_INIT_SW; + break; + default: + goto err_alloc; + } + + if (iavf_process_config(adapter)) + goto err_alloc; + adapter->current_op = VIRTCHNL_OP_UNKNOWN; + + adapter->flags |= IAVF_FLAG_RX_CSUM_ENABLED; + + netdev->netdev_ops = &iavf_netdev_ops; + iavf_set_ethtool_ops(netdev); + netdev->watchdog_timeo = 5 * HZ; + + /* MTU range: 68 - 9710 */ + netdev->min_mtu = ETH_MIN_MTU; + netdev->max_mtu = I40E_MAX_RXBUFFER - I40E_PACKET_HDR_PAD; + + if (!is_valid_ether_addr(adapter->hw.mac.addr)) { + dev_info(&pdev->dev, "Invalid MAC address %pM, using random\n", + adapter->hw.mac.addr); + eth_hw_addr_random(netdev); + ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr); + } else { + adapter->flags |= IAVF_FLAG_ADDR_SET_BY_PF; + ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr); + ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr); + } + + timer_setup(&adapter->watchdog_timer, iavf_watchdog_timer, 0); + mod_timer(&adapter->watchdog_timer, jiffies + 1); + + adapter->tx_desc_count = IAVF_DEFAULT_TXD; + adapter->rx_desc_count = IAVF_DEFAULT_RXD; + err = iavf_init_interrupt_scheme(adapter); + if (err) + goto err_sw_init; + iavf_map_rings_to_vectors(adapter); + if (adapter->vf_res->vf_cap_flags & + VIRTCHNL_VF_OFFLOAD_WB_ON_ITR) + adapter->flags |= IAVF_FLAG_WB_ON_ITR_CAPABLE; + + err = iavf_request_misc_irq(adapter); + if (err) + goto err_sw_init; + + netif_carrier_off(netdev); + adapter->link_up = false; + + if (!adapter->netdev_registered) { + err = register_netdev(netdev); + if (err) + goto err_register; + } + + adapter->netdev_registered = true; + + netif_tx_stop_all_queues(netdev); + if (CLIENT_ALLOWED(adapter)) { + err = iavf_lan_add_device(adapter); + if (err) + dev_info(&pdev->dev, "Failed to add VF to client API service list: %d\n", + err); + } + + dev_info(&pdev->dev, "MAC address: %pM\n", adapter->hw.mac.addr); + if (netdev->features & NETIF_F_GRO) + dev_info(&pdev->dev, "GRO is enabled\n"); + + adapter->state = __IAVF_DOWN; + set_bit(__I40E_VSI_DOWN, adapter->vsi.state); + iavf_misc_irq_enable(adapter); + wake_up(&adapter->down_waitqueue); + + adapter->rss_key = kzalloc(adapter->rss_key_size, GFP_KERNEL); + adapter->rss_lut = kzalloc(adapter->rss_lut_size, GFP_KERNEL); + if (!adapter->rss_key || !adapter->rss_lut) + goto err_mem; + + if (RSS_AQ(adapter)) { + adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_RSS; + mod_timer_pending(&adapter->watchdog_timer, jiffies + 1); + } else { + iavf_init_rss(adapter); + } + return; +restart: + schedule_delayed_work(&adapter->init_task, msecs_to_jiffies(30)); + return; +err_mem: + iavf_free_rss(adapter); +err_register: + iavf_free_misc_irq(adapter); +err_sw_init: + iavf_reset_interrupt_capability(adapter); +err_alloc: + kfree(adapter->vf_res); + adapter->vf_res = NULL; +err: + /* Things went into the weeds, so try again later */ + if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) { + dev_err(&pdev->dev, "Failed to communicate with PF; waiting before retry\n"); + adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED; + iavf_shutdown_adminq(hw); + adapter->state = __IAVF_STARTUP; + schedule_delayed_work(&adapter->init_task, HZ * 5); + return; + } + schedule_delayed_work(&adapter->init_task, HZ); +} + +/** + * iavf_shutdown - Shutdown the device in preparation for a reboot + * @pdev: pci device structure + **/ +static void iavf_shutdown(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct iavf_adapter *adapter = netdev_priv(netdev); + + netif_device_detach(netdev); + + if (netif_running(netdev)) + iavf_close(netdev); + + /* Prevent the watchdog from running. */ + adapter->state = __IAVF_REMOVE; + adapter->aq_required = 0; + +#ifdef CONFIG_PM + pci_save_state(pdev); + +#endif + pci_disable_device(pdev); +} + +/** + * iavf_probe - Device Initialization Routine + * @pdev: PCI device information struct + * @ent: entry in iavf_pci_tbl + * + * Returns 0 on success, negative on failure + * + * iavf_probe initializes an adapter identified by a pci_dev structure. + * The OS initialization, configuring of the adapter private structure, + * and a hardware reset occur. + **/ +static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct net_device *netdev; + struct iavf_adapter *adapter = NULL; + struct i40e_hw *hw = NULL; + int err; + + err = pci_enable_device(pdev); + if (err) + return err; + + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) { + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&pdev->dev, + "DMA configuration failed: 0x%x\n", err); + goto err_dma; + } + } + + err = pci_request_regions(pdev, iavf_driver_name); + if (err) { + dev_err(&pdev->dev, + "pci_request_regions failed 0x%x\n", err); + goto err_pci_reg; + } + + pci_enable_pcie_error_reporting(pdev); + + pci_set_master(pdev); + + netdev = alloc_etherdev_mq(sizeof(struct iavf_adapter), + IAVF_MAX_REQ_QUEUES); + if (!netdev) { + err = -ENOMEM; + goto err_alloc_etherdev; + } + + SET_NETDEV_DEV(netdev, &pdev->dev); + + pci_set_drvdata(pdev, netdev); + adapter = netdev_priv(netdev); + + adapter->netdev = netdev; + adapter->pdev = pdev; + + hw = &adapter->hw; + hw->back = adapter; + + adapter->msg_enable = BIT(DEFAULT_DEBUG_LEVEL_SHIFT) - 1; + adapter->state = __IAVF_STARTUP; + + /* Call save state here because it relies on the adapter struct. */ + pci_save_state(pdev); + + hw->hw_addr = ioremap(pci_resource_start(pdev, 0), + pci_resource_len(pdev, 0)); + if (!hw->hw_addr) { + err = -EIO; + goto err_ioremap; + } + hw->vendor_id = pdev->vendor; + hw->device_id = pdev->device; + pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id); + hw->subsystem_vendor_id = pdev->subsystem_vendor; + hw->subsystem_device_id = pdev->subsystem_device; + hw->bus.device = PCI_SLOT(pdev->devfn); + hw->bus.func = PCI_FUNC(pdev->devfn); + hw->bus.bus_id = pdev->bus->number; + + /* set up the locks for the AQ, do this only once in probe + * and destroy them only once in remove + */ + mutex_init(&hw->aq.asq_mutex); + mutex_init(&hw->aq.arq_mutex); + + spin_lock_init(&adapter->mac_vlan_list_lock); + spin_lock_init(&adapter->cloud_filter_list_lock); + + INIT_LIST_HEAD(&adapter->mac_filter_list); + INIT_LIST_HEAD(&adapter->vlan_filter_list); + INIT_LIST_HEAD(&adapter->cloud_filter_list); + + INIT_WORK(&adapter->reset_task, iavf_reset_task); + INIT_WORK(&adapter->adminq_task, iavf_adminq_task); + INIT_WORK(&adapter->watchdog_task, iavf_watchdog_task); + INIT_DELAYED_WORK(&adapter->client_task, iavf_client_task); + INIT_DELAYED_WORK(&adapter->init_task, iavf_init_task); + schedule_delayed_work(&adapter->init_task, + msecs_to_jiffies(5 * (pdev->devfn & 0x07))); + + /* Setup the wait queue for indicating transition to down status */ + init_waitqueue_head(&adapter->down_waitqueue); + + return 0; + +err_ioremap: + free_netdev(netdev); +err_alloc_etherdev: + pci_release_regions(pdev); +err_pci_reg: +err_dma: + pci_disable_device(pdev); + return err; +} + +#ifdef CONFIG_PM +/** + * iavf_suspend - Power management suspend routine + * @pdev: PCI device information struct + * @state: unused + * + * Called when the system (VM) is entering sleep/suspend. + **/ +static int iavf_suspend(struct pci_dev *pdev, pm_message_t state) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct iavf_adapter *adapter = netdev_priv(netdev); + int retval = 0; + + netif_device_detach(netdev); + + while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK, + &adapter->crit_section)) + usleep_range(500, 1000); + + if (netif_running(netdev)) { + rtnl_lock(); + iavf_down(adapter); + rtnl_unlock(); + } + iavf_free_misc_irq(adapter); + iavf_reset_interrupt_capability(adapter); + + clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); + + retval = pci_save_state(pdev); + if (retval) + return retval; + + pci_disable_device(pdev); + + return 0; +} + +/** + * iavf_resume - Power management resume routine + * @pdev: PCI device information struct + * + * Called when the system (VM) is resumed from sleep/suspend. + **/ +static int iavf_resume(struct pci_dev *pdev) +{ + struct iavf_adapter *adapter = pci_get_drvdata(pdev); + struct net_device *netdev = adapter->netdev; + u32 err; + + pci_set_power_state(pdev, PCI_D0); + pci_restore_state(pdev); + /* pci_restore_state clears dev->state_saved so call + * pci_save_state to restore it. + */ + pci_save_state(pdev); + + err = pci_enable_device_mem(pdev); + if (err) { + dev_err(&pdev->dev, "Cannot enable PCI device from suspend.\n"); + return err; + } + pci_set_master(pdev); + + rtnl_lock(); + err = iavf_set_interrupt_capability(adapter); + if (err) { + rtnl_unlock(); + dev_err(&pdev->dev, "Cannot enable MSI-X interrupts.\n"); + return err; + } + err = iavf_request_misc_irq(adapter); + rtnl_unlock(); + if (err) { + dev_err(&pdev->dev, "Cannot get interrupt vector.\n"); + return err; + } + + schedule_work(&adapter->reset_task); + + netif_device_attach(netdev); + + return err; +} + +#endif /* CONFIG_PM */ +/** + * iavf_remove - Device Removal Routine + * @pdev: PCI device information struct + * + * iavf_remove is called by the PCI subsystem to alert the driver + * that it should release a PCI device. The could be caused by a + * Hot-Plug event, or because the driver is going to be removed from + * memory. + **/ +static void iavf_remove(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct iavf_adapter *adapter = netdev_priv(netdev); + struct iavf_vlan_filter *vlf, *vlftmp; + struct iavf_mac_filter *f, *ftmp; + struct iavf_cloud_filter *cf, *cftmp; + struct i40e_hw *hw = &adapter->hw; + int err; + /* Indicate we are in remove and not to run reset_task */ + set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section); + cancel_delayed_work_sync(&adapter->init_task); + cancel_work_sync(&adapter->reset_task); + cancel_delayed_work_sync(&adapter->client_task); + if (adapter->netdev_registered) { + unregister_netdev(netdev); + adapter->netdev_registered = false; + } + if (CLIENT_ALLOWED(adapter)) { + err = iavf_lan_del_device(adapter); + if (err) + dev_warn(&pdev->dev, "Failed to delete client device: %d\n", + err); + } + + /* Shut down all the garbage mashers on the detention level */ + adapter->state = __IAVF_REMOVE; + adapter->aq_required = 0; + adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; + iavf_request_reset(adapter); + msleep(50); + /* If the FW isn't responding, kick it once, but only once. */ + if (!iavf_asq_done(hw)) { + iavf_request_reset(adapter); + msleep(50); + } + iavf_free_all_tx_resources(adapter); + iavf_free_all_rx_resources(adapter); + iavf_misc_irq_disable(adapter); + iavf_free_misc_irq(adapter); + iavf_reset_interrupt_capability(adapter); + iavf_free_q_vectors(adapter); + + if (adapter->watchdog_timer.function) + del_timer_sync(&adapter->watchdog_timer); + + cancel_work_sync(&adapter->adminq_task); + + iavf_free_rss(adapter); + + if (hw->aq.asq.count) + iavf_shutdown_adminq(hw); + + /* destroy the locks only once, here */ + mutex_destroy(&hw->aq.arq_mutex); + mutex_destroy(&hw->aq.asq_mutex); + + iounmap(hw->hw_addr); + pci_release_regions(pdev); + iavf_free_all_tx_resources(adapter); + iavf_free_all_rx_resources(adapter); + iavf_free_queues(adapter); + kfree(adapter->vf_res); + spin_lock_bh(&adapter->mac_vlan_list_lock); + /* If we got removed before an up/down sequence, we've got a filter + * hanging out there that we need to get rid of. + */ + list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) { + list_del(&f->list); + kfree(f); + } + list_for_each_entry_safe(vlf, vlftmp, &adapter->vlan_filter_list, + list) { + list_del(&vlf->list); + kfree(vlf); + } + + spin_unlock_bh(&adapter->mac_vlan_list_lock); + + spin_lock_bh(&adapter->cloud_filter_list_lock); + list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) { + list_del(&cf->list); + kfree(cf); + } + spin_unlock_bh(&adapter->cloud_filter_list_lock); + + free_netdev(netdev); + + pci_disable_pcie_error_reporting(pdev); + + pci_disable_device(pdev); +} + +static struct pci_driver iavf_driver = { + .name = iavf_driver_name, + .id_table = iavf_pci_tbl, + .probe = iavf_probe, + .remove = iavf_remove, +#ifdef CONFIG_PM + .suspend = iavf_suspend, + .resume = iavf_resume, +#endif + .shutdown = iavf_shutdown, +}; + +/** + * i40e_init_module - Driver Registration Routine + * + * i40e_init_module is the first routine called when the driver is + * loaded. All it does is register with the PCI subsystem. + **/ +static int __init iavf_init_module(void) +{ + int ret; + + pr_info("iavf: %s - version %s\n", iavf_driver_string, + iavf_driver_version); + + pr_info("%s\n", iavf_copyright); + + iavf_wq = alloc_workqueue("%s", WQ_UNBOUND | WQ_MEM_RECLAIM, 1, + iavf_driver_name); + if (!iavf_wq) { + pr_err("%s: Failed to create workqueue\n", iavf_driver_name); + return -ENOMEM; + } + ret = pci_register_driver(&iavf_driver); + return ret; +} + +module_init(iavf_init_module); + +/** + * i40e_exit_module - Driver Exit Cleanup Routine + * + * i40e_exit_module is called just before the driver is removed + * from memory. + **/ +static void __exit iavf_exit_module(void) +{ + pci_unregister_driver(&iavf_driver); + destroy_workqueue(iavf_wq); +} + +module_exit(iavf_exit_module); + +/* iavf_main.c */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c new file mode 100644 index 0000000000000..104f278b38d83 --- /dev/null +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -0,0 +1,2508 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2013 - 2018 Intel Corporation. */ + +#include +#include + +#include "iavf.h" +#include "i40e_trace.h" +#include "i40e_prototype.h" + +static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size, + u32 td_tag) +{ + return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA | + ((u64)td_cmd << I40E_TXD_QW1_CMD_SHIFT) | + ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) | + ((u64)size << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) | + ((u64)td_tag << I40E_TXD_QW1_L2TAG1_SHIFT)); +} + +#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) + +/** + * i40e_unmap_and_free_tx_resource - Release a Tx buffer + * @ring: the ring that owns the buffer + * @tx_buffer: the buffer to free + **/ +static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, + struct i40e_tx_buffer *tx_buffer) +{ + if (tx_buffer->skb) { + if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) + kfree(tx_buffer->raw_buf); + else + dev_kfree_skb_any(tx_buffer->skb); + if (dma_unmap_len(tx_buffer, len)) + dma_unmap_single(ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + } else if (dma_unmap_len(tx_buffer, len)) { + dma_unmap_page(ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + } + + tx_buffer->next_to_watch = NULL; + tx_buffer->skb = NULL; + dma_unmap_len_set(tx_buffer, len, 0); + /* tx_buffer must be completely set up in the transmit path */ +} + +/** + * iavf_clean_tx_ring - Free any empty Tx buffers + * @tx_ring: ring to be cleaned + **/ +void iavf_clean_tx_ring(struct i40e_ring *tx_ring) +{ + unsigned long bi_size; + u16 i; + + /* ring already cleared, nothing to do */ + if (!tx_ring->tx_bi) + return; + + /* Free all the Tx ring sk_buffs */ + for (i = 0; i < tx_ring->count; i++) + i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]); + + bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count; + memset(tx_ring->tx_bi, 0, bi_size); + + /* Zero out the descriptor ring */ + memset(tx_ring->desc, 0, tx_ring->size); + + tx_ring->next_to_use = 0; + tx_ring->next_to_clean = 0; + + if (!tx_ring->netdev) + return; + + /* cleanup Tx queue statistics */ + netdev_tx_reset_queue(txring_txq(tx_ring)); +} + +/** + * iavf_free_tx_resources - Free Tx resources per queue + * @tx_ring: Tx descriptor ring for a specific queue + * + * Free all transmit software resources + **/ +void iavf_free_tx_resources(struct i40e_ring *tx_ring) +{ + iavf_clean_tx_ring(tx_ring); + kfree(tx_ring->tx_bi); + tx_ring->tx_bi = NULL; + + if (tx_ring->desc) { + dma_free_coherent(tx_ring->dev, tx_ring->size, + tx_ring->desc, tx_ring->dma); + tx_ring->desc = NULL; + } +} + +/** + * iavf_get_tx_pending - how many Tx descriptors not processed + * @ring: the ring of descriptors + * @in_sw: is tx_pending being checked in SW or HW + * + * Since there is no access to the ring head register + * in XL710, we need to use our local copies + **/ +u32 iavf_get_tx_pending(struct i40e_ring *ring, bool in_sw) +{ + u32 head, tail; + + head = ring->next_to_clean; + tail = readl(ring->tail); + + if (head != tail) + return (head < tail) ? + tail - head : (tail + ring->count - head); + + return 0; +} + +/** + * iavf_detect_recover_hung - Function to detect and recover hung_queues + * @vsi: pointer to vsi struct with tx queues + * + * VSI has netdev and netdev has TX queues. This function is to check each of + * those TX queues if they are hung, trigger recovery by issuing SW interrupt. + **/ +void iavf_detect_recover_hung(struct i40e_vsi *vsi) +{ + struct i40e_ring *tx_ring = NULL; + struct net_device *netdev; + unsigned int i; + int packets; + + if (!vsi) + return; + + if (test_bit(__I40E_VSI_DOWN, vsi->state)) + return; + + netdev = vsi->netdev; + if (!netdev) + return; + + if (!netif_carrier_ok(netdev)) + return; + + for (i = 0; i < vsi->back->num_active_queues; i++) { + tx_ring = &vsi->back->tx_rings[i]; + if (tx_ring && tx_ring->desc) { + /* If packet counter has not changed the queue is + * likely stalled, so force an interrupt for this + * queue. + * + * prev_pkt_ctr would be negative if there was no + * pending work. + */ + packets = tx_ring->stats.packets & INT_MAX; + if (tx_ring->tx_stats.prev_pkt_ctr == packets) { + iavf_force_wb(vsi, tx_ring->q_vector); + continue; + } + + /* Memory barrier between read of packet count and call + * to iavf_get_tx_pending() + */ + smp_rmb(); + tx_ring->tx_stats.prev_pkt_ctr = + iavf_get_tx_pending(tx_ring, true) ? packets : -1; + } + } +} + +#define WB_STRIDE 4 + +/** + * i40e_clean_tx_irq - Reclaim resources after transmit completes + * @vsi: the VSI we care about + * @tx_ring: Tx ring to clean + * @napi_budget: Used to determine if we are in netpoll + * + * Returns true if there's any budget left (e.g. the clean is finished) + **/ +static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, + struct i40e_ring *tx_ring, int napi_budget) +{ + u16 i = tx_ring->next_to_clean; + struct i40e_tx_buffer *tx_buf; + struct i40e_tx_desc *tx_desc; + unsigned int total_bytes = 0, total_packets = 0; + unsigned int budget = vsi->work_limit; + + tx_buf = &tx_ring->tx_bi[i]; + tx_desc = I40E_TX_DESC(tx_ring, i); + i -= tx_ring->count; + + do { + struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch; + + /* if next_to_watch is not set then there is no work pending */ + if (!eop_desc) + break; + + /* prevent any other reads prior to eop_desc */ + smp_rmb(); + + i40e_trace(clean_tx_irq, tx_ring, tx_desc, tx_buf); + /* if the descriptor isn't done, no work yet to do */ + if (!(eop_desc->cmd_type_offset_bsz & + cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE))) + break; + + /* clear next_to_watch to prevent false hangs */ + tx_buf->next_to_watch = NULL; + + /* update the statistics for this packet */ + total_bytes += tx_buf->bytecount; + total_packets += tx_buf->gso_segs; + + /* free the skb */ + napi_consume_skb(tx_buf->skb, napi_budget); + + /* unmap skb header data */ + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); + + /* clear tx_buffer data */ + tx_buf->skb = NULL; + dma_unmap_len_set(tx_buf, len, 0); + + /* unmap remaining buffers */ + while (tx_desc != eop_desc) { + i40e_trace(clean_tx_irq_unmap, + tx_ring, tx_desc, tx_buf); + + tx_buf++; + tx_desc++; + i++; + if (unlikely(!i)) { + i -= tx_ring->count; + tx_buf = tx_ring->tx_bi; + tx_desc = I40E_TX_DESC(tx_ring, 0); + } + + /* unmap any remaining paged data */ + if (dma_unmap_len(tx_buf, len)) { + dma_unmap_page(tx_ring->dev, + dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buf, len, 0); + } + } + + /* move us one more past the eop_desc for start of next pkt */ + tx_buf++; + tx_desc++; + i++; + if (unlikely(!i)) { + i -= tx_ring->count; + tx_buf = tx_ring->tx_bi; + tx_desc = I40E_TX_DESC(tx_ring, 0); + } + + prefetch(tx_desc); + + /* update budget accounting */ + budget--; + } while (likely(budget)); + + i += tx_ring->count; + tx_ring->next_to_clean = i; + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->stats.bytes += total_bytes; + tx_ring->stats.packets += total_packets; + u64_stats_update_end(&tx_ring->syncp); + tx_ring->q_vector->tx.total_bytes += total_bytes; + tx_ring->q_vector->tx.total_packets += total_packets; + + if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) { + /* check to see if there are < 4 descriptors + * waiting to be written back, then kick the hardware to force + * them to be written back in case we stay in NAPI. + * In this mode on X722 we do not enable Interrupt. + */ + unsigned int j = iavf_get_tx_pending(tx_ring, false); + + if (budget && + ((j / WB_STRIDE) == 0) && (j > 0) && + !test_bit(__I40E_VSI_DOWN, vsi->state) && + (I40E_DESC_UNUSED(tx_ring) != tx_ring->count)) + tx_ring->arm_wb = true; + } + + /* notify netdev of completed buffers */ + netdev_tx_completed_queue(txring_txq(tx_ring), + total_packets, total_bytes); + +#define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2)) + if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && + (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) { + /* Make sure that anybody stopping the queue after this + * sees the new next_to_clean. + */ + smp_mb(); + if (__netif_subqueue_stopped(tx_ring->netdev, + tx_ring->queue_index) && + !test_bit(__I40E_VSI_DOWN, vsi->state)) { + netif_wake_subqueue(tx_ring->netdev, + tx_ring->queue_index); + ++tx_ring->tx_stats.restart_queue; + } + } + + return !!budget; +} + +/** + * iavf_enable_wb_on_itr - Arm hardware to do a wb, interrupts are not enabled + * @vsi: the VSI we care about + * @q_vector: the vector on which to enable writeback + * + **/ +static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi, + struct i40e_q_vector *q_vector) +{ + u16 flags = q_vector->tx.ring[0].flags; + u32 val; + + if (!(flags & I40E_TXR_FLAGS_WB_ON_ITR)) + return; + + if (q_vector->arm_wb_state) + return; + + val = I40E_VFINT_DYN_CTLN1_WB_ON_ITR_MASK | + I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK; /* set noitr */ + + wr32(&vsi->back->hw, + I40E_VFINT_DYN_CTLN1(q_vector->reg_idx), val); + q_vector->arm_wb_state = true; +} + +/** + * iavf_force_wb - Issue SW Interrupt so HW does a wb + * @vsi: the VSI we care about + * @q_vector: the vector on which to force writeback + * + **/ +void iavf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) +{ + u32 val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | + I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK | /* set noitr */ + I40E_VFINT_DYN_CTLN1_SWINT_TRIG_MASK | + I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK + /* allow 00 to be written to the index */; + + wr32(&vsi->back->hw, + I40E_VFINT_DYN_CTLN1(q_vector->reg_idx), + val); +} + +static inline bool i40e_container_is_rx(struct i40e_q_vector *q_vector, + struct i40e_ring_container *rc) +{ + return &q_vector->rx == rc; +} + +static inline unsigned int i40e_itr_divisor(struct i40e_q_vector *q_vector) +{ + unsigned int divisor; + + switch (q_vector->adapter->link_speed) { + case I40E_LINK_SPEED_40GB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024; + break; + case I40E_LINK_SPEED_25GB: + case I40E_LINK_SPEED_20GB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 512; + break; + default: + case I40E_LINK_SPEED_10GB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 256; + break; + case I40E_LINK_SPEED_1GB: + case I40E_LINK_SPEED_100MB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 32; + break; + } + + return divisor; +} + +/** + * i40e_update_itr - update the dynamic ITR value based on statistics + * @q_vector: structure containing interrupt and ring information + * @rc: structure containing ring performance data + * + * Stores a new ITR value based on packets and byte + * counts during the last interrupt. The advantage of per interrupt + * computation is faster updates and more accurate ITR for the current + * traffic pattern. Constants in this function were computed + * based on theoretical maximum wire speed and thresholds were set based + * on testing data as well as attempting to minimize response time + * while increasing bulk throughput. + **/ +static void i40e_update_itr(struct i40e_q_vector *q_vector, + struct i40e_ring_container *rc) +{ + unsigned int avg_wire_size, packets, bytes, itr; + unsigned long next_update = jiffies; + + /* If we don't have any rings just leave ourselves set for maximum + * possible latency so we take ourselves out of the equation. + */ + if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting)) + return; + + /* For Rx we want to push the delay up and default to low latency. + * for Tx we want to pull the delay down and default to high latency. + */ + itr = i40e_container_is_rx(q_vector, rc) ? + I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY : + I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY; + + /* If we didn't update within up to 1 - 2 jiffies we can assume + * that either packets are coming in so slow there hasn't been + * any work, or that there is so much work that NAPI is dealing + * with interrupt moderation and we don't need to do anything. + */ + if (time_after(next_update, rc->next_update)) + goto clear_counts; + + /* If itr_countdown is set it means we programmed an ITR within + * the last 4 interrupt cycles. This has a side effect of us + * potentially firing an early interrupt. In order to work around + * this we need to throw out any data received for a few + * interrupts following the update. + */ + if (q_vector->itr_countdown) { + itr = rc->target_itr; + goto clear_counts; + } + + packets = rc->total_packets; + bytes = rc->total_bytes; + + if (i40e_container_is_rx(q_vector, rc)) { + /* If Rx there are 1 to 4 packets and bytes are less than + * 9000 assume insufficient data to use bulk rate limiting + * approach unless Tx is already in bulk rate limiting. We + * are likely latency driven. + */ + if (packets && packets < 4 && bytes < 9000 && + (q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) { + itr = I40E_ITR_ADAPTIVE_LATENCY; + goto adjust_by_size; + } + } else if (packets < 4) { + /* If we have Tx and Rx ITR maxed and Tx ITR is running in + * bulk mode and we are receiving 4 or fewer packets just + * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so + * that the Rx can relax. + */ + if (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS && + (q_vector->rx.target_itr & I40E_ITR_MASK) == + I40E_ITR_ADAPTIVE_MAX_USECS) + goto clear_counts; + } else if (packets > 32) { + /* If we have processed over 32 packets in a single interrupt + * for Tx assume we need to switch over to "bulk" mode. + */ + rc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY; + } + + /* We have no packets to actually measure against. This means + * either one of the other queues on this vector is active or + * we are a Tx queue doing TSO with too high of an interrupt rate. + * + * Between 4 and 56 we can assume that our current interrupt delay + * is only slightly too low. As such we should increase it by a small + * fixed amount. + */ + if (packets < 56) { + itr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC; + if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) { + itr &= I40E_ITR_ADAPTIVE_LATENCY; + itr += I40E_ITR_ADAPTIVE_MAX_USECS; + } + goto clear_counts; + } + + if (packets <= 256) { + itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr); + itr &= I40E_ITR_MASK; + + /* Between 56 and 112 is our "goldilocks" zone where we are + * working out "just right". Just report that our current + * ITR is good for us. + */ + if (packets <= 112) + goto clear_counts; + + /* If packet count is 128 or greater we are likely looking + * at a slight overrun of the delay we want. Try halving + * our delay to see if that will cut the number of packets + * in half per interrupt. + */ + itr /= 2; + itr &= I40E_ITR_MASK; + if (itr < I40E_ITR_ADAPTIVE_MIN_USECS) + itr = I40E_ITR_ADAPTIVE_MIN_USECS; + + goto clear_counts; + } + + /* The paths below assume we are dealing with a bulk ITR since + * number of packets is greater than 256. We are just going to have + * to compute a value and try to bring the count under control, + * though for smaller packet sizes there isn't much we can do as + * NAPI polling will likely be kicking in sooner rather than later. + */ + itr = I40E_ITR_ADAPTIVE_BULK; + +adjust_by_size: + /* If packet counts are 256 or greater we can assume we have a gross + * overestimation of what the rate should be. Instead of trying to fine + * tune it just use the formula below to try and dial in an exact value + * give the current packet size of the frame. + */ + avg_wire_size = bytes / packets; + + /* The following is a crude approximation of: + * wmem_default / (size + overhead) = desired_pkts_per_int + * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate + * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value + * + * Assuming wmem_default is 212992 and overhead is 640 bytes per + * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the + * formula down to + * + * (170 * (size + 24)) / (size + 640) = ITR + * + * We first do some math on the packet size and then finally bitshift + * by 8 after rounding up. We also have to account for PCIe link speed + * difference as ITR scales based on this. + */ + if (avg_wire_size <= 60) { + /* Start at 250k ints/sec */ + avg_wire_size = 4096; + } else if (avg_wire_size <= 380) { + /* 250K ints/sec to 60K ints/sec */ + avg_wire_size *= 40; + avg_wire_size += 1696; + } else if (avg_wire_size <= 1084) { + /* 60K ints/sec to 36K ints/sec */ + avg_wire_size *= 15; + avg_wire_size += 11452; + } else if (avg_wire_size <= 1980) { + /* 36K ints/sec to 30K ints/sec */ + avg_wire_size *= 5; + avg_wire_size += 22420; + } else { + /* plateau at a limit of 30K ints/sec */ + avg_wire_size = 32256; + } + + /* If we are in low latency mode halve our delay which doubles the + * rate to somewhere between 100K to 16K ints/sec + */ + if (itr & I40E_ITR_ADAPTIVE_LATENCY) + avg_wire_size /= 2; + + /* Resultant value is 256 times larger than it needs to be. This + * gives us room to adjust the value as needed to either increase + * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc. + * + * Use addition as we have already recorded the new latency flag + * for the ITR value. + */ + itr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) * + I40E_ITR_ADAPTIVE_MIN_INC; + + if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) { + itr &= I40E_ITR_ADAPTIVE_LATENCY; + itr += I40E_ITR_ADAPTIVE_MAX_USECS; + } + +clear_counts: + /* write back value */ + rc->target_itr = itr; + + /* next update should occur within next jiffy */ + rc->next_update = next_update + 1; + + rc->total_bytes = 0; + rc->total_packets = 0; +} + +/** + * iavf_setup_tx_descriptors - Allocate the Tx descriptors + * @tx_ring: the tx ring to set up + * + * Return 0 on success, negative on error + **/ +int iavf_setup_tx_descriptors(struct i40e_ring *tx_ring) +{ + struct device *dev = tx_ring->dev; + int bi_size; + + if (!dev) + return -ENOMEM; + + /* warn if we are about to overwrite the pointer */ + WARN_ON(tx_ring->tx_bi); + bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count; + tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL); + if (!tx_ring->tx_bi) + goto err; + + /* round up to nearest 4K */ + tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc); + tx_ring->size = ALIGN(tx_ring->size, 4096); + tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, + &tx_ring->dma, GFP_KERNEL); + if (!tx_ring->desc) { + dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n", + tx_ring->size); + goto err; + } + + tx_ring->next_to_use = 0; + tx_ring->next_to_clean = 0; + tx_ring->tx_stats.prev_pkt_ctr = -1; + return 0; + +err: + kfree(tx_ring->tx_bi); + tx_ring->tx_bi = NULL; + return -ENOMEM; +} + +/** + * iavf_clean_rx_ring - Free Rx buffers + * @rx_ring: ring to be cleaned + **/ +void iavf_clean_rx_ring(struct i40e_ring *rx_ring) +{ + unsigned long bi_size; + u16 i; + + /* ring already cleared, nothing to do */ + if (!rx_ring->rx_bi) + return; + + if (rx_ring->skb) { + dev_kfree_skb(rx_ring->skb); + rx_ring->skb = NULL; + } + + /* Free all the Rx ring sk_buffs */ + for (i = 0; i < rx_ring->count; i++) { + struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i]; + + if (!rx_bi->page) + continue; + + /* Invalidate cache lines that may have been written to by + * device so that we avoid corrupting memory. + */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_bi->dma, + rx_bi->page_offset, + rx_ring->rx_buf_len, + DMA_FROM_DEVICE); + + /* free resources associated with mapping */ + dma_unmap_page_attrs(rx_ring->dev, rx_bi->dma, + i40e_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, + I40E_RX_DMA_ATTR); + + __page_frag_cache_drain(rx_bi->page, rx_bi->pagecnt_bias); + + rx_bi->page = NULL; + rx_bi->page_offset = 0; + } + + bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count; + memset(rx_ring->rx_bi, 0, bi_size); + + /* Zero out the descriptor ring */ + memset(rx_ring->desc, 0, rx_ring->size); + + rx_ring->next_to_alloc = 0; + rx_ring->next_to_clean = 0; + rx_ring->next_to_use = 0; +} + +/** + * iavf_free_rx_resources - Free Rx resources + * @rx_ring: ring to clean the resources from + * + * Free all receive software resources + **/ +void iavf_free_rx_resources(struct i40e_ring *rx_ring) +{ + iavf_clean_rx_ring(rx_ring); + kfree(rx_ring->rx_bi); + rx_ring->rx_bi = NULL; + + if (rx_ring->desc) { + dma_free_coherent(rx_ring->dev, rx_ring->size, + rx_ring->desc, rx_ring->dma); + rx_ring->desc = NULL; + } +} + +/** + * iavf_setup_rx_descriptors - Allocate Rx descriptors + * @rx_ring: Rx descriptor ring (for a specific queue) to setup + * + * Returns 0 on success, negative on failure + **/ +int iavf_setup_rx_descriptors(struct i40e_ring *rx_ring) +{ + struct device *dev = rx_ring->dev; + int bi_size; + + /* warn if we are about to overwrite the pointer */ + WARN_ON(rx_ring->rx_bi); + bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count; + rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL); + if (!rx_ring->rx_bi) + goto err; + + u64_stats_init(&rx_ring->syncp); + + /* Round up to nearest 4K */ + rx_ring->size = rx_ring->count * sizeof(union i40e_32byte_rx_desc); + rx_ring->size = ALIGN(rx_ring->size, 4096); + rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, + &rx_ring->dma, GFP_KERNEL); + + if (!rx_ring->desc) { + dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n", + rx_ring->size); + goto err; + } + + rx_ring->next_to_alloc = 0; + rx_ring->next_to_clean = 0; + rx_ring->next_to_use = 0; + + return 0; +err: + kfree(rx_ring->rx_bi); + rx_ring->rx_bi = NULL; + return -ENOMEM; +} + +/** + * i40e_release_rx_desc - Store the new tail and head values + * @rx_ring: ring to bump + * @val: new head index + **/ +static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val) +{ + rx_ring->next_to_use = val; + + /* update next to alloc since we have filled the ring */ + rx_ring->next_to_alloc = val; + + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. (Only + * applicable for weak-ordered memory model archs, + * such as IA-64). + */ + wmb(); + writel(val, rx_ring->tail); +} + +/** + * i40e_rx_offset - Return expected offset into page to access data + * @rx_ring: Ring we are requesting offset of + * + * Returns the offset value for ring into the data buffer. + */ +static inline unsigned int i40e_rx_offset(struct i40e_ring *rx_ring) +{ + return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0; +} + +/** + * i40e_alloc_mapped_page - recycle or make a new page + * @rx_ring: ring to use + * @bi: rx_buffer struct to modify + * + * Returns true if the page was successfully allocated or + * reused. + **/ +static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring, + struct i40e_rx_buffer *bi) +{ + struct page *page = bi->page; + dma_addr_t dma; + + /* since we are recycling buffers we should seldom need to alloc */ + if (likely(page)) { + rx_ring->rx_stats.page_reuse_count++; + return true; + } + + /* alloc new page for storage */ + page = dev_alloc_pages(i40e_rx_pg_order(rx_ring)); + if (unlikely(!page)) { + rx_ring->rx_stats.alloc_page_failed++; + return false; + } + + /* map page for use */ + dma = dma_map_page_attrs(rx_ring->dev, page, 0, + i40e_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, + I40E_RX_DMA_ATTR); + + /* if mapping failed free memory back to system since + * there isn't much point in holding memory we can't use + */ + if (dma_mapping_error(rx_ring->dev, dma)) { + __free_pages(page, i40e_rx_pg_order(rx_ring)); + rx_ring->rx_stats.alloc_page_failed++; + return false; + } + + bi->dma = dma; + bi->page = page; + bi->page_offset = i40e_rx_offset(rx_ring); + + /* initialize pagecnt_bias to 1 representing we fully own page */ + bi->pagecnt_bias = 1; + + return true; +} + +/** + * i40e_receive_skb - Send a completed packet up the stack + * @rx_ring: rx ring in play + * @skb: packet to send up + * @vlan_tag: vlan tag for packet + **/ +static void i40e_receive_skb(struct i40e_ring *rx_ring, + struct sk_buff *skb, u16 vlan_tag) +{ + struct i40e_q_vector *q_vector = rx_ring->q_vector; + + if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) && + (vlan_tag & VLAN_VID_MASK)) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); + + napi_gro_receive(&q_vector->napi, skb); +} + +/** + * iavf_alloc_rx_buffers - Replace used receive buffers + * @rx_ring: ring to place buffers on + * @cleaned_count: number of buffers to replace + * + * Returns false if all allocations were successful, true if any fail + **/ +bool iavf_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count) +{ + u16 ntu = rx_ring->next_to_use; + union i40e_rx_desc *rx_desc; + struct i40e_rx_buffer *bi; + + /* do nothing if no valid netdev defined */ + if (!rx_ring->netdev || !cleaned_count) + return false; + + rx_desc = I40E_RX_DESC(rx_ring, ntu); + bi = &rx_ring->rx_bi[ntu]; + + do { + if (!i40e_alloc_mapped_page(rx_ring, bi)) + goto no_buffers; + + /* sync the buffer for use by the device */ + dma_sync_single_range_for_device(rx_ring->dev, bi->dma, + bi->page_offset, + rx_ring->rx_buf_len, + DMA_FROM_DEVICE); + + /* Refresh the desc even if buffer_addrs didn't change + * because each write-back erases this info. + */ + rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); + + rx_desc++; + bi++; + ntu++; + if (unlikely(ntu == rx_ring->count)) { + rx_desc = I40E_RX_DESC(rx_ring, 0); + bi = rx_ring->rx_bi; + ntu = 0; + } + + /* clear the status bits for the next_to_use descriptor */ + rx_desc->wb.qword1.status_error_len = 0; + + cleaned_count--; + } while (cleaned_count); + + if (rx_ring->next_to_use != ntu) + i40e_release_rx_desc(rx_ring, ntu); + + return false; + +no_buffers: + if (rx_ring->next_to_use != ntu) + i40e_release_rx_desc(rx_ring, ntu); + + /* make sure to come back via polling to try again after + * allocation failure + */ + return true; +} + +/** + * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum + * @vsi: the VSI we care about + * @skb: skb currently being received and modified + * @rx_desc: the receive descriptor + **/ +static inline void i40e_rx_checksum(struct i40e_vsi *vsi, + struct sk_buff *skb, + union i40e_rx_desc *rx_desc) +{ + struct i40e_rx_ptype_decoded decoded; + u32 rx_error, rx_status; + bool ipv4, ipv6; + u8 ptype; + u64 qword; + + qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); + ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT; + rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >> + I40E_RXD_QW1_ERROR_SHIFT; + rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> + I40E_RXD_QW1_STATUS_SHIFT; + decoded = decode_rx_desc_ptype(ptype); + + skb->ip_summed = CHECKSUM_NONE; + + skb_checksum_none_assert(skb); + + /* Rx csum enabled and ip headers found? */ + if (!(vsi->netdev->features & NETIF_F_RXCSUM)) + return; + + /* did the hardware decode the packet and checksum? */ + if (!(rx_status & BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT))) + return; + + /* both known and outer_ip must be set for the below code to work */ + if (!(decoded.known && decoded.outer_ip)) + return; + + ipv4 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) && + (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4); + ipv6 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) && + (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6); + + if (ipv4 && + (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) | + BIT(I40E_RX_DESC_ERROR_EIPE_SHIFT)))) + goto checksum_fail; + + /* likely incorrect csum if alternate IP extension headers found */ + if (ipv6 && + rx_status & BIT(I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) + /* don't increment checksum err here, non-fatal err */ + return; + + /* there was some L4 error, count error and punt packet to the stack */ + if (rx_error & BIT(I40E_RX_DESC_ERROR_L4E_SHIFT)) + goto checksum_fail; + + /* handle packets that were not able to be checksummed due + * to arrival speed, in this case the stack can compute + * the csum. + */ + if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT)) + return; + + /* Only report checksum unnecessary for TCP, UDP, or SCTP */ + switch (decoded.inner_prot) { + case I40E_RX_PTYPE_INNER_PROT_TCP: + case I40E_RX_PTYPE_INNER_PROT_UDP: + case I40E_RX_PTYPE_INNER_PROT_SCTP: + skb->ip_summed = CHECKSUM_UNNECESSARY; + /* fall though */ + default: + break; + } + + return; + +checksum_fail: + vsi->back->hw_csum_rx_error++; +} + +/** + * i40e_ptype_to_htype - get a hash type + * @ptype: the ptype value from the descriptor + * + * Returns a hash type to be used by skb_set_hash + **/ +static inline int i40e_ptype_to_htype(u8 ptype) +{ + struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype); + + if (!decoded.known) + return PKT_HASH_TYPE_NONE; + + if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && + decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4) + return PKT_HASH_TYPE_L4; + else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && + decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3) + return PKT_HASH_TYPE_L3; + else + return PKT_HASH_TYPE_L2; +} + +/** + * i40e_rx_hash - set the hash value in the skb + * @ring: descriptor ring + * @rx_desc: specific descriptor + * @skb: skb currently being received and modified + * @rx_ptype: Rx packet type + **/ +static inline void i40e_rx_hash(struct i40e_ring *ring, + union i40e_rx_desc *rx_desc, + struct sk_buff *skb, + u8 rx_ptype) +{ + u32 hash; + const __le64 rss_mask = + cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH << + I40E_RX_DESC_STATUS_FLTSTAT_SHIFT); + + if (ring->netdev->features & NETIF_F_RXHASH) + return; + + if ((rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) { + hash = le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss); + skb_set_hash(skb, hash, i40e_ptype_to_htype(rx_ptype)); + } +} + +/** + * iavf_process_skb_fields - Populate skb header fields from Rx descriptor + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being populated + * @rx_ptype: the packet type decoded by hardware + * + * This function checks the ring, descriptor, and packet information in + * order to populate the hash, checksum, VLAN, protocol, and + * other fields within the skb. + **/ +static inline +void iavf_process_skb_fields(struct i40e_ring *rx_ring, + union i40e_rx_desc *rx_desc, struct sk_buff *skb, + u8 rx_ptype) +{ + i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype); + + i40e_rx_checksum(rx_ring->vsi, skb, rx_desc); + + skb_record_rx_queue(skb, rx_ring->queue_index); + + /* modifies the skb - consumes the enet header */ + skb->protocol = eth_type_trans(skb, rx_ring->netdev); +} + +/** + * i40e_cleanup_headers - Correct empty headers + * @rx_ring: rx descriptor ring packet is being transacted on + * @skb: pointer to current skb being fixed + * + * Also address the case where we are pulling data in on pages only + * and as such no data is present in the skb header. + * + * In addition if skb is not at least 60 bytes we need to pad it so that + * it is large enough to qualify as a valid Ethernet frame. + * + * Returns true if an error was encountered and skb was freed. + **/ +static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb) +{ + /* if eth_skb_pad returns an error the skb was freed */ + if (eth_skb_pad(skb)) + return true; + + return false; +} + +/** + * i40e_reuse_rx_page - page flip buffer and store it back on the ring + * @rx_ring: rx descriptor ring to store buffers on + * @old_buff: donor buffer to have page reused + * + * Synchronizes page for reuse by the adapter + **/ +static void i40e_reuse_rx_page(struct i40e_ring *rx_ring, + struct i40e_rx_buffer *old_buff) +{ + struct i40e_rx_buffer *new_buff; + u16 nta = rx_ring->next_to_alloc; + + new_buff = &rx_ring->rx_bi[nta]; + + /* update, and store next to alloc */ + nta++; + rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; + + /* transfer page from old buffer to new buffer */ + new_buff->dma = old_buff->dma; + new_buff->page = old_buff->page; + new_buff->page_offset = old_buff->page_offset; + new_buff->pagecnt_bias = old_buff->pagecnt_bias; +} + +/** + * i40e_page_is_reusable - check if any reuse is possible + * @page: page struct to check + * + * A page is not reusable if it was allocated under low memory + * conditions, or it's not in the same NUMA node as this CPU. + */ +static inline bool i40e_page_is_reusable(struct page *page) +{ + return (page_to_nid(page) == numa_mem_id()) && + !page_is_pfmemalloc(page); +} + +/** + * i40e_can_reuse_rx_page - Determine if this page can be reused by + * the adapter for another receive + * + * @rx_buffer: buffer containing the page + * + * If page is reusable, rx_buffer->page_offset is adjusted to point to + * an unused region in the page. + * + * For small pages, @truesize will be a constant value, half the size + * of the memory at page. We'll attempt to alternate between high and + * low halves of the page, with one half ready for use by the hardware + * and the other half being consumed by the stack. We use the page + * ref count to determine whether the stack has finished consuming the + * portion of this page that was passed up with a previous packet. If + * the page ref count is >1, we'll assume the "other" half page is + * still busy, and this page cannot be reused. + * + * For larger pages, @truesize will be the actual space used by the + * received packet (adjusted upward to an even multiple of the cache + * line size). This will advance through the page by the amount + * actually consumed by the received packets while there is still + * space for a buffer. Each region of larger pages will be used at + * most once, after which the page will not be reused. + * + * In either case, if the page is reusable its refcount is increased. + **/ +static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer) +{ + unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; + struct page *page = rx_buffer->page; + + /* Is any reuse possible? */ + if (unlikely(!i40e_page_is_reusable(page))) + return false; + +#if (PAGE_SIZE < 8192) + /* if we are only owner of page we can reuse it */ + if (unlikely((page_count(page) - pagecnt_bias) > 1)) + return false; +#else +#define I40E_LAST_OFFSET \ + (SKB_WITH_OVERHEAD(PAGE_SIZE) - I40E_RXBUFFER_2048) + if (rx_buffer->page_offset > I40E_LAST_OFFSET) + return false; +#endif + + /* If we have drained the page fragment pool we need to update + * the pagecnt_bias and page count so that we fully restock the + * number of references the driver holds. + */ + if (unlikely(!pagecnt_bias)) { + page_ref_add(page, USHRT_MAX); + rx_buffer->pagecnt_bias = USHRT_MAX; + } + + return true; +} + +/** + * i40e_add_rx_frag - Add contents of Rx buffer to sk_buff + * @rx_ring: rx descriptor ring to transact packets on + * @rx_buffer: buffer containing page to add + * @skb: sk_buff to place the data into + * @size: packet length from rx_desc + * + * This function will add the data contained in rx_buffer->page to the skb. + * It will just attach the page as a frag to the skb. + * + * The function will then update the page offset. + **/ +static void i40e_add_rx_frag(struct i40e_ring *rx_ring, + struct i40e_rx_buffer *rx_buffer, + struct sk_buff *skb, + unsigned int size) +{ +#if (PAGE_SIZE < 8192) + unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; +#else + unsigned int truesize = SKB_DATA_ALIGN(size + i40e_rx_offset(rx_ring)); +#endif + + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, + rx_buffer->page_offset, size, truesize); + + /* page is being used so we must update the page offset */ +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; +#endif +} + +/** + * i40e_get_rx_buffer - Fetch Rx buffer and synchronize data for use + * @rx_ring: rx descriptor ring to transact packets on + * @size: size of buffer to add to skb + * + * This function will pull an Rx buffer from the ring and synchronize it + * for use by the CPU. + */ +static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring, + const unsigned int size) +{ + struct i40e_rx_buffer *rx_buffer; + + rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean]; + prefetchw(rx_buffer->page); + + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_buffer->dma, + rx_buffer->page_offset, + size, + DMA_FROM_DEVICE); + + /* We have pulled a buffer for use, so decrement pagecnt_bias */ + rx_buffer->pagecnt_bias--; + + return rx_buffer; +} + +/** + * i40e_construct_skb - Allocate skb and populate it + * @rx_ring: rx descriptor ring to transact packets on + * @rx_buffer: rx buffer to pull data from + * @size: size of buffer to add to skb + * + * This function allocates an skb. It then populates it with the page + * data from the current receive descriptor, taking care to set up the + * skb correctly. + */ +static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, + struct i40e_rx_buffer *rx_buffer, + unsigned int size) +{ + void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; +#if (PAGE_SIZE < 8192) + unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; +#else + unsigned int truesize = SKB_DATA_ALIGN(size); +#endif + unsigned int headlen; + struct sk_buff *skb; + + /* prefetch first cache line of first page */ + prefetch(va); +#if L1_CACHE_BYTES < 128 + prefetch(va + L1_CACHE_BYTES); +#endif + + /* allocate a skb to store the frags */ + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, + I40E_RX_HDR_SIZE, + GFP_ATOMIC | __GFP_NOWARN); + if (unlikely(!skb)) + return NULL; + + /* Determine available headroom for copy */ + headlen = size; + if (headlen > I40E_RX_HDR_SIZE) + headlen = eth_get_headlen(va, I40E_RX_HDR_SIZE); + + /* align pull length to size of long to optimize memcpy performance */ + memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); + + /* update all of the pointers */ + size -= headlen; + if (size) { + skb_add_rx_frag(skb, 0, rx_buffer->page, + rx_buffer->page_offset + headlen, + size, truesize); + + /* buffer is used by skb, update page_offset */ +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; +#endif + } else { + /* buffer is unused, reset bias back to rx_buffer */ + rx_buffer->pagecnt_bias++; + } + + return skb; +} + +/** + * i40e_build_skb - Build skb around an existing buffer + * @rx_ring: Rx descriptor ring to transact packets on + * @rx_buffer: Rx buffer to pull data from + * @size: size of buffer to add to skb + * + * This function builds an skb around an existing Rx buffer, taking care + * to set up the skb correctly and avoid any memcpy overhead. + */ +static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, + struct i40e_rx_buffer *rx_buffer, + unsigned int size) +{ + void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; +#if (PAGE_SIZE < 8192) + unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; +#else + unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + + SKB_DATA_ALIGN(I40E_SKB_PAD + size); +#endif + struct sk_buff *skb; + + /* prefetch first cache line of first page */ + prefetch(va); +#if L1_CACHE_BYTES < 128 + prefetch(va + L1_CACHE_BYTES); +#endif + /* build an skb around the page buffer */ + skb = build_skb(va - I40E_SKB_PAD, truesize); + if (unlikely(!skb)) + return NULL; + + /* update pointers within the skb to store the data */ + skb_reserve(skb, I40E_SKB_PAD); + __skb_put(skb, size); + + /* buffer is used by skb, update page_offset */ +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; +#endif + + return skb; +} + +/** + * i40e_put_rx_buffer - Clean up used buffer and either recycle or free + * @rx_ring: rx descriptor ring to transact packets on + * @rx_buffer: rx buffer to pull data from + * + * This function will clean up the contents of the rx_buffer. It will + * either recycle the buffer or unmap it and free the associated resources. + */ +static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, + struct i40e_rx_buffer *rx_buffer) +{ + if (i40e_can_reuse_rx_page(rx_buffer)) { + /* hand second half of page back to the ring */ + i40e_reuse_rx_page(rx_ring, rx_buffer); + rx_ring->rx_stats.page_reuse_count++; + } else { + /* we are not reusing the buffer so unmap it */ + dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, + i40e_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, I40E_RX_DMA_ATTR); + __page_frag_cache_drain(rx_buffer->page, + rx_buffer->pagecnt_bias); + } + + /* clear contents of buffer_info */ + rx_buffer->page = NULL; +} + +/** + * i40e_is_non_eop - process handling of non-EOP buffers + * @rx_ring: Rx ring being processed + * @rx_desc: Rx descriptor for current buffer + * @skb: Current socket buffer containing buffer in progress + * + * This function updates next to clean. If the buffer is an EOP buffer + * this function exits returning false, otherwise it will place the + * sk_buff in the next buffer to be chained and return true indicating + * that this is in fact a non-EOP buffer. + **/ +static bool i40e_is_non_eop(struct i40e_ring *rx_ring, + union i40e_rx_desc *rx_desc, + struct sk_buff *skb) +{ + u32 ntc = rx_ring->next_to_clean + 1; + + /* fetch, update, and store next to clean */ + ntc = (ntc < rx_ring->count) ? ntc : 0; + rx_ring->next_to_clean = ntc; + + prefetch(I40E_RX_DESC(rx_ring, ntc)); + + /* if we are the last buffer then there is nothing else to do */ +#define I40E_RXD_EOF BIT(I40E_RX_DESC_STATUS_EOF_SHIFT) + if (likely(i40e_test_staterr(rx_desc, I40E_RXD_EOF))) + return false; + + rx_ring->rx_stats.non_eop_descs++; + + return true; +} + +/** + * i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf + * @rx_ring: rx descriptor ring to transact packets on + * @budget: Total limit on number of packets to process + * + * This function provides a "bounce buffer" approach to Rx interrupt + * processing. The advantage to this is that on systems that have + * expensive overhead for IOMMU access this provides a means of avoiding + * it by maintaining the mapping of the page to the system. + * + * Returns amount of work completed + **/ +static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) +{ + unsigned int total_rx_bytes = 0, total_rx_packets = 0; + struct sk_buff *skb = rx_ring->skb; + u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); + bool failure = false; + + while (likely(total_rx_packets < (unsigned int)budget)) { + struct i40e_rx_buffer *rx_buffer; + union i40e_rx_desc *rx_desc; + unsigned int size; + u16 vlan_tag; + u8 rx_ptype; + u64 qword; + + /* return some buffers to hardware, one at a time is too slow */ + if (cleaned_count >= I40E_RX_BUFFER_WRITE) { + failure = failure || + iavf_alloc_rx_buffers(rx_ring, cleaned_count); + cleaned_count = 0; + } + + rx_desc = I40E_RX_DESC(rx_ring, rx_ring->next_to_clean); + + /* status_error_len will always be zero for unused descriptors + * because it's cleared in cleanup, and overlaps with hdr_addr + * which is always zero because packet split isn't used, if the + * hardware wrote DD then the length will be non-zero + */ + qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); + + /* This memory barrier is needed to keep us from reading + * any other fields out of the rx_desc until we have + * verified the descriptor has been written back. + */ + dma_rmb(); + + size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> + I40E_RXD_QW1_LENGTH_PBUF_SHIFT; + if (!size) + break; + + i40e_trace(clean_rx_irq, rx_ring, rx_desc, skb); + rx_buffer = i40e_get_rx_buffer(rx_ring, size); + + /* retrieve a buffer from the ring */ + if (skb) + i40e_add_rx_frag(rx_ring, rx_buffer, skb, size); + else if (ring_uses_build_skb(rx_ring)) + skb = i40e_build_skb(rx_ring, rx_buffer, size); + else + skb = i40e_construct_skb(rx_ring, rx_buffer, size); + + /* exit if we failed to retrieve a buffer */ + if (!skb) { + rx_ring->rx_stats.alloc_buff_failed++; + rx_buffer->pagecnt_bias++; + break; + } + + i40e_put_rx_buffer(rx_ring, rx_buffer); + cleaned_count++; + + if (i40e_is_non_eop(rx_ring, rx_desc, skb)) + continue; + + /* ERR_MASK will only have valid bits if EOP set, and + * what we are doing here is actually checking + * I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in + * the error field + */ + if (unlikely(i40e_test_staterr(rx_desc, BIT(I40E_RXD_QW1_ERROR_SHIFT)))) { + dev_kfree_skb_any(skb); + skb = NULL; + continue; + } + + if (i40e_cleanup_headers(rx_ring, skb)) { + skb = NULL; + continue; + } + + /* probably a little skewed due to removing CRC */ + total_rx_bytes += skb->len; + + qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); + rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> + I40E_RXD_QW1_PTYPE_SHIFT; + + /* populate checksum, VLAN, and protocol */ + iavf_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype); + + + vlan_tag = (qword & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) ? + le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0; + + i40e_trace(clean_rx_irq_rx, rx_ring, rx_desc, skb); + i40e_receive_skb(rx_ring, skb, vlan_tag); + skb = NULL; + + /* update budget accounting */ + total_rx_packets++; + } + + rx_ring->skb = skb; + + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->stats.packets += total_rx_packets; + rx_ring->stats.bytes += total_rx_bytes; + u64_stats_update_end(&rx_ring->syncp); + rx_ring->q_vector->rx.total_packets += total_rx_packets; + rx_ring->q_vector->rx.total_bytes += total_rx_bytes; + + /* guarantee a trip back through this routine if there was a failure */ + return failure ? budget : (int)total_rx_packets; +} + +static inline u32 i40e_buildreg_itr(const int type, u16 itr) +{ + u32 val; + + /* We don't bother with setting the CLEARPBA bit as the data sheet + * points out doing so is "meaningless since it was already + * auto-cleared". The auto-clearing happens when the interrupt is + * asserted. + * + * Hardware errata 28 for also indicates that writing to a + * xxINT_DYN_CTLx CSR with INTENA_MSK (bit 31) set to 0 will clear + * an event in the PBA anyway so we need to rely on the automask + * to hold pending events for us until the interrupt is re-enabled + * + * The itr value is reported in microseconds, and the register + * value is recorded in 2 microsecond units. For this reason we + * only need to shift by the interval shift - 1 instead of the + * full value. + */ + itr &= I40E_ITR_MASK; + + val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | + (type << I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) | + (itr << (I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT - 1)); + + return val; +} + +/* a small macro to shorten up some long lines */ +#define INTREG I40E_VFINT_DYN_CTLN1 + +/* The act of updating the ITR will cause it to immediately trigger. In order + * to prevent this from throwing off adaptive update statistics we defer the + * update so that it can only happen so often. So after either Tx or Rx are + * updated we make the adaptive scheme wait until either the ITR completely + * expires via the next_update expiration or we have been through at least + * 3 interrupts. + */ +#define ITR_COUNTDOWN_START 3 + +/** + * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt + * @vsi: the VSI we care about + * @q_vector: q_vector for which itr is being updated and interrupt enabled + * + **/ +static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, + struct i40e_q_vector *q_vector) +{ + struct i40e_hw *hw = &vsi->back->hw; + u32 intval; + + /* These will do nothing if dynamic updates are not enabled */ + i40e_update_itr(q_vector, &q_vector->tx); + i40e_update_itr(q_vector, &q_vector->rx); + + /* This block of logic allows us to get away with only updating + * one ITR value with each interrupt. The idea is to perform a + * pseudo-lazy update with the following criteria. + * + * 1. Rx is given higher priority than Tx if both are in same state + * 2. If we must reduce an ITR that is given highest priority. + * 3. We then give priority to increasing ITR based on amount. + */ + if (q_vector->rx.target_itr < q_vector->rx.current_itr) { + /* Rx ITR needs to be reduced, this is highest priority */ + intval = i40e_buildreg_itr(I40E_RX_ITR, + q_vector->rx.target_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) || + ((q_vector->rx.target_itr - q_vector->rx.current_itr) < + (q_vector->tx.target_itr - q_vector->tx.current_itr))) { + /* Tx ITR needs to be reduced, this is second priority + * Tx ITR needs to be increased more than Rx, fourth priority + */ + intval = i40e_buildreg_itr(I40E_TX_ITR, + q_vector->tx.target_itr); + q_vector->tx.current_itr = q_vector->tx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) { + /* Rx ITR needs to be increased, third priority */ + intval = i40e_buildreg_itr(I40E_RX_ITR, + q_vector->rx.target_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else { + /* No ITR update, lowest priority */ + intval = i40e_buildreg_itr(I40E_ITR_NONE, 0); + if (q_vector->itr_countdown) + q_vector->itr_countdown--; + } + + if (!test_bit(__I40E_VSI_DOWN, vsi->state)) + wr32(hw, INTREG(q_vector->reg_idx), intval); +} + +/** + * iavf_napi_poll - NAPI polling Rx/Tx cleanup routine + * @napi: napi struct with our devices info in it + * @budget: amount of work driver is allowed to do this pass, in packets + * + * This function will clean all queues associated with a q_vector. + * + * Returns the amount of work done + **/ +int iavf_napi_poll(struct napi_struct *napi, int budget) +{ + struct i40e_q_vector *q_vector = + container_of(napi, struct i40e_q_vector, napi); + struct i40e_vsi *vsi = q_vector->vsi; + struct i40e_ring *ring; + bool clean_complete = true; + bool arm_wb = false; + int budget_per_ring; + int work_done = 0; + + if (test_bit(__I40E_VSI_DOWN, vsi->state)) { + napi_complete(napi); + return 0; + } + + /* Since the actual Tx work is minimal, we can give the Tx a larger + * budget and be more aggressive about cleaning up the Tx descriptors. + */ + i40e_for_each_ring(ring, q_vector->tx) { + if (!i40e_clean_tx_irq(vsi, ring, budget)) { + clean_complete = false; + continue; + } + arm_wb |= ring->arm_wb; + ring->arm_wb = false; + } + + /* Handle case where we are called by netpoll with a budget of 0 */ + if (budget <= 0) + goto tx_only; + + /* We attempt to distribute budget to each Rx queue fairly, but don't + * allow the budget to go below 1 because that would exit polling early. + */ + budget_per_ring = max(budget/q_vector->num_ringpairs, 1); + + i40e_for_each_ring(ring, q_vector->rx) { + int cleaned = i40e_clean_rx_irq(ring, budget_per_ring); + + work_done += cleaned; + /* if we clean as many as budgeted, we must not be done */ + if (cleaned >= budget_per_ring) + clean_complete = false; + } + + /* If work not completed, return budget and polling will return */ + if (!clean_complete) { + int cpu_id = smp_processor_id(); + + /* It is possible that the interrupt affinity has changed but, + * if the cpu is pegged at 100%, polling will never exit while + * traffic continues and the interrupt will be stuck on this + * cpu. We check to make sure affinity is correct before we + * continue to poll, otherwise we must stop polling so the + * interrupt can move to the correct cpu. + */ + if (!cpumask_test_cpu(cpu_id, &q_vector->affinity_mask)) { + /* Tell napi that we are done polling */ + napi_complete_done(napi, work_done); + + /* Force an interrupt */ + iavf_force_wb(vsi, q_vector); + + /* Return budget-1 so that polling stops */ + return budget - 1; + } +tx_only: + if (arm_wb) { + q_vector->tx.ring[0].tx_stats.tx_force_wb++; + i40e_enable_wb_on_itr(vsi, q_vector); + } + return budget; + } + + if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR) + q_vector->arm_wb_state = false; + + /* Work is done so exit the polling mode and re-enable the interrupt */ + napi_complete_done(napi, work_done); + + i40e_update_enable_itr(vsi, q_vector); + + return min(work_done, budget - 1); +} + +/** + * iavf_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW + * @skb: send buffer + * @tx_ring: ring to send buffer on + * @flags: the tx flags to be set + * + * Checks the skb and set up correspondingly several generic transmit flags + * related to VLAN tagging for the HW, such as VLAN, DCB, etc. + * + * Returns error code indicate the frame should be dropped upon error and the + * otherwise returns 0 to indicate the flags has been set properly. + **/ +static inline int iavf_tx_prepare_vlan_flags(struct sk_buff *skb, + struct i40e_ring *tx_ring, + u32 *flags) +{ + __be16 protocol = skb->protocol; + u32 tx_flags = 0; + + if (protocol == htons(ETH_P_8021Q) && + !(tx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) { + /* When HW VLAN acceleration is turned off by the user the + * stack sets the protocol to 8021q so that the driver + * can take any steps required to support the SW only + * VLAN handling. In our case the driver doesn't need + * to take any further steps so just set the protocol + * to the encapsulated ethertype. + */ + skb->protocol = vlan_get_protocol(skb); + goto out; + } + + /* if we have a HW VLAN tag being added, default to the HW one */ + if (skb_vlan_tag_present(skb)) { + tx_flags |= skb_vlan_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT; + tx_flags |= I40E_TX_FLAGS_HW_VLAN; + /* else if it is a SW VLAN, check the next protocol and store the tag */ + } else if (protocol == htons(ETH_P_8021Q)) { + struct vlan_hdr *vhdr, _vhdr; + + vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr); + if (!vhdr) + return -EINVAL; + + protocol = vhdr->h_vlan_encapsulated_proto; + tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT; + tx_flags |= I40E_TX_FLAGS_SW_VLAN; + } + +out: + *flags = tx_flags; + return 0; +} + +/** + * i40e_tso - set up the tso context descriptor + * @first: pointer to first Tx buffer for xmit + * @hdr_len: ptr to the size of the packet header + * @cd_type_cmd_tso_mss: Quad Word 1 + * + * Returns 0 if no TSO can happen, 1 if tso is going, or error + **/ +static int i40e_tso(struct i40e_tx_buffer *first, u8 *hdr_len, + u64 *cd_type_cmd_tso_mss) +{ + struct sk_buff *skb = first->skb; + u64 cd_cmd, cd_tso_len, cd_mss; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + struct udphdr *udp; + unsigned char *hdr; + } l4; + u32 paylen, l4_offset; + u16 gso_segs, gso_size; + int err; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + if (!skb_is_gso(skb)) + return 0; + + err = skb_cow_head(skb, 0); + if (err < 0) + return err; + + ip.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + + /* initialize outer IP header fields */ + if (ip.v4->version == 4) { + ip.v4->tot_len = 0; + ip.v4->check = 0; + } else { + ip.v6->payload_len = 0; + } + + if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | + SKB_GSO_IPXIP4 | + SKB_GSO_IPXIP6 | + SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM)) { + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) && + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) { + l4.udp->len = 0; + + /* determine offset of outer transport header */ + l4_offset = l4.hdr - skb->data; + + /* remove payload length from outer checksum */ + paylen = skb->len - l4_offset; + csum_replace_by_diff(&l4.udp->check, + (__force __wsum)htonl(paylen)); + } + + /* reset pointers to inner headers */ + ip.hdr = skb_inner_network_header(skb); + l4.hdr = skb_inner_transport_header(skb); + + /* initialize inner IP header fields */ + if (ip.v4->version == 4) { + ip.v4->tot_len = 0; + ip.v4->check = 0; + } else { + ip.v6->payload_len = 0; + } + } + + /* determine offset of inner transport header */ + l4_offset = l4.hdr - skb->data; + + /* remove payload length from inner checksum */ + paylen = skb->len - l4_offset; + csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen)); + + /* compute length of segmentation header */ + *hdr_len = (l4.tcp->doff * 4) + l4_offset; + + /* pull values out of skb_shinfo */ + gso_size = skb_shinfo(skb)->gso_size; + gso_segs = skb_shinfo(skb)->gso_segs; + + /* update GSO size and bytecount with header size */ + first->gso_segs = gso_segs; + first->bytecount += (first->gso_segs - 1) * *hdr_len; + + /* find the field values */ + cd_cmd = I40E_TX_CTX_DESC_TSO; + cd_tso_len = skb->len - *hdr_len; + cd_mss = gso_size; + *cd_type_cmd_tso_mss |= (cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | + (cd_tso_len << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | + (cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); + return 1; +} + +/** + * i40e_tx_enable_csum - Enable Tx checksum offloads + * @skb: send buffer + * @tx_flags: pointer to Tx flags currently set + * @td_cmd: Tx descriptor command bits to set + * @td_offset: Tx descriptor header offsets to set + * @tx_ring: Tx descriptor ring + * @cd_tunneling: ptr to context desc bits + **/ +static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, + u32 *td_cmd, u32 *td_offset, + struct i40e_ring *tx_ring, + u32 *cd_tunneling) +{ + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + struct udphdr *udp; + unsigned char *hdr; + } l4; + unsigned char *exthdr; + u32 offset, cmd = 0; + __be16 frag_off; + u8 l4_proto = 0; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + ip.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + + /* compute outer L2 header size */ + offset = ((ip.hdr - skb->data) / 2) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; + + if (skb->encapsulation) { + u32 tunnel = 0; + /* define outer network header type */ + if (*tx_flags & I40E_TX_FLAGS_IPV4) { + tunnel |= (*tx_flags & I40E_TX_FLAGS_TSO) ? + I40E_TX_CTX_EXT_IP_IPV4 : + I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; + + l4_proto = ip.v4->protocol; + } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { + tunnel |= I40E_TX_CTX_EXT_IP_IPV6; + + exthdr = ip.hdr + sizeof(*ip.v6); + l4_proto = ip.v6->nexthdr; + if (l4.hdr != exthdr) + ipv6_skip_exthdr(skb, exthdr - skb->data, + &l4_proto, &frag_off); + } + + /* define outer transport */ + switch (l4_proto) { + case IPPROTO_UDP: + tunnel |= I40E_TXD_CTX_UDP_TUNNELING; + *tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL; + break; + case IPPROTO_GRE: + tunnel |= I40E_TXD_CTX_GRE_TUNNELING; + *tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL; + break; + case IPPROTO_IPIP: + case IPPROTO_IPV6: + *tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL; + l4.hdr = skb_inner_network_header(skb); + break; + default: + if (*tx_flags & I40E_TX_FLAGS_TSO) + return -1; + + skb_checksum_help(skb); + return 0; + } + + /* compute outer L3 header size */ + tunnel |= ((l4.hdr - ip.hdr) / 4) << + I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT; + + /* switch IP header pointer from outer to inner header */ + ip.hdr = skb_inner_network_header(skb); + + /* compute tunnel header size */ + tunnel |= ((ip.hdr - l4.hdr) / 2) << + I40E_TXD_CTX_QW0_NATLEN_SHIFT; + + /* indicate if we need to offload outer UDP header */ + if ((*tx_flags & I40E_TX_FLAGS_TSO) && + !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) && + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) + tunnel |= I40E_TXD_CTX_QW0_L4T_CS_MASK; + + /* record tunnel offload values */ + *cd_tunneling |= tunnel; + + /* switch L4 header pointer from outer to inner */ + l4.hdr = skb_inner_transport_header(skb); + l4_proto = 0; + + /* reset type as we transition from outer to inner headers */ + *tx_flags &= ~(I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6); + if (ip.v4->version == 4) + *tx_flags |= I40E_TX_FLAGS_IPV4; + if (ip.v6->version == 6) + *tx_flags |= I40E_TX_FLAGS_IPV6; + } + + /* Enable IP checksum offloads */ + if (*tx_flags & I40E_TX_FLAGS_IPV4) { + l4_proto = ip.v4->protocol; + /* the stack computes the IP header already, the only time we + * need the hardware to recompute it is in the case of TSO. + */ + cmd |= (*tx_flags & I40E_TX_FLAGS_TSO) ? + I40E_TX_DESC_CMD_IIPT_IPV4_CSUM : + I40E_TX_DESC_CMD_IIPT_IPV4; + } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { + cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; + + exthdr = ip.hdr + sizeof(*ip.v6); + l4_proto = ip.v6->nexthdr; + if (l4.hdr != exthdr) + ipv6_skip_exthdr(skb, exthdr - skb->data, + &l4_proto, &frag_off); + } + + /* compute inner L3 header size */ + offset |= ((l4.hdr - ip.hdr) / 4) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; + + /* Enable L4 checksum offloads */ + switch (l4_proto) { + case IPPROTO_TCP: + /* enable checksum offloads */ + cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; + offset |= l4.tcp->doff << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; + break; + case IPPROTO_SCTP: + /* enable SCTP checksum offload */ + cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; + offset |= (sizeof(struct sctphdr) >> 2) << + I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; + break; + case IPPROTO_UDP: + /* enable UDP checksum offload */ + cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; + offset |= (sizeof(struct udphdr) >> 2) << + I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; + break; + default: + if (*tx_flags & I40E_TX_FLAGS_TSO) + return -1; + skb_checksum_help(skb); + return 0; + } + + *td_cmd |= cmd; + *td_offset |= offset; + + return 1; +} + +/** + * i40e_create_tx_ctx Build the Tx context descriptor + * @tx_ring: ring to create the descriptor on + * @cd_type_cmd_tso_mss: Quad Word 1 + * @cd_tunneling: Quad Word 0 - bits 0-31 + * @cd_l2tag2: Quad Word 0 - bits 32-63 + **/ +static void i40e_create_tx_ctx(struct i40e_ring *tx_ring, + const u64 cd_type_cmd_tso_mss, + const u32 cd_tunneling, const u32 cd_l2tag2) +{ + struct i40e_tx_context_desc *context_desc; + int i = tx_ring->next_to_use; + + if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) && + !cd_tunneling && !cd_l2tag2) + return; + + /* grab the next descriptor */ + context_desc = I40E_TX_CTXTDESC(tx_ring, i); + + i++; + tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; + + /* cpu_to_le32 and assign to struct fields */ + context_desc->tunneling_params = cpu_to_le32(cd_tunneling); + context_desc->l2tag2 = cpu_to_le16(cd_l2tag2); + context_desc->rsvd = cpu_to_le16(0); + context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss); +} + +/** + * __iavf_chk_linearize - Check if there are more than 8 buffers per packet + * @skb: send buffer + * + * Note: Our HW can't DMA more than 8 buffers to build a packet on the wire + * and so we need to figure out the cases where we need to linearize the skb. + * + * For TSO we need to count the TSO header and segment payload separately. + * As such we need to check cases where we have 7 fragments or more as we + * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for + * the segment payload in the first descriptor, and another 7 for the + * fragments. + **/ +bool __iavf_chk_linearize(struct sk_buff *skb) +{ + const struct skb_frag_struct *frag, *stale; + int nr_frags, sum; + + /* no need to check if number of frags is less than 7 */ + nr_frags = skb_shinfo(skb)->nr_frags; + if (nr_frags < (I40E_MAX_BUFFER_TXD - 1)) + return false; + + /* We need to walk through the list and validate that each group + * of 6 fragments totals at least gso_size. + */ + nr_frags -= I40E_MAX_BUFFER_TXD - 2; + frag = &skb_shinfo(skb)->frags[0]; + + /* Initialize size to the negative value of gso_size minus 1. We + * use this as the worst case scenerio in which the frag ahead + * of us only provides one byte which is why we are limited to 6 + * descriptors for a single transmit as the header and previous + * fragment are already consuming 2 descriptors. + */ + sum = 1 - skb_shinfo(skb)->gso_size; + + /* Add size of frags 0 through 4 to create our initial sum */ + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + + /* Walk through fragments adding latest fragment, testing it, and + * then removing stale fragments from the sum. + */ + for (stale = &skb_shinfo(skb)->frags[0];; stale++) { + int stale_size = skb_frag_size(stale); + + sum += skb_frag_size(frag++); + + /* The stale fragment may present us with a smaller + * descriptor than the actual fragment size. To account + * for that we need to remove all the data on the front and + * figure out what the remainder would be in the last + * descriptor associated with the fragment. + */ + if (stale_size > I40E_MAX_DATA_PER_TXD) { + int align_pad = -(stale->page_offset) & + (I40E_MAX_READ_REQ_SIZE - 1); + + sum -= align_pad; + stale_size -= align_pad; + + do { + sum -= I40E_MAX_DATA_PER_TXD_ALIGNED; + stale_size -= I40E_MAX_DATA_PER_TXD_ALIGNED; + } while (stale_size > I40E_MAX_DATA_PER_TXD); + } + + /* if sum is negative we failed to make sufficient progress */ + if (sum < 0) + return true; + + if (!nr_frags--) + break; + + sum -= stale_size; + } + + return false; +} + +/** + * __iavf_maybe_stop_tx - 2nd level check for tx stop conditions + * @tx_ring: the ring to be checked + * @size: the size buffer we want to assure is available + * + * Returns -EBUSY if a stop is needed, else 0 + **/ +int __iavf_maybe_stop_tx(struct i40e_ring *tx_ring, int size) +{ + netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); + /* Memory barrier before checking head and tail */ + smp_mb(); + + /* Check again in a case another CPU has just made room available. */ + if (likely(I40E_DESC_UNUSED(tx_ring) < size)) + return -EBUSY; + + /* A reprieve! - use start_queue because it doesn't call schedule */ + netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index); + ++tx_ring->tx_stats.restart_queue; + return 0; +} + +/** + * iavf_tx_map - Build the Tx descriptor + * @tx_ring: ring to send buffer on + * @skb: send buffer + * @first: first buffer info buffer to use + * @tx_flags: collected send information + * @hdr_len: size of the packet header + * @td_cmd: the command field in the descriptor + * @td_offset: offset for checksum or crc + **/ +static inline void iavf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, + struct i40e_tx_buffer *first, u32 tx_flags, + const u8 hdr_len, u32 td_cmd, u32 td_offset) +{ + unsigned int data_len = skb->data_len; + unsigned int size = skb_headlen(skb); + struct skb_frag_struct *frag; + struct i40e_tx_buffer *tx_bi; + struct i40e_tx_desc *tx_desc; + u16 i = tx_ring->next_to_use; + u32 td_tag = 0; + dma_addr_t dma; + + if (tx_flags & I40E_TX_FLAGS_HW_VLAN) { + td_cmd |= I40E_TX_DESC_CMD_IL2TAG1; + td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >> + I40E_TX_FLAGS_VLAN_SHIFT; + } + + first->tx_flags = tx_flags; + + dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); + + tx_desc = I40E_TX_DESC(tx_ring, i); + tx_bi = first; + + for (frag = &skb_shinfo(skb)->frags[0];; frag++) { + unsigned int max_data = I40E_MAX_DATA_PER_TXD_ALIGNED; + + if (dma_mapping_error(tx_ring->dev, dma)) + goto dma_error; + + /* record length, and DMA address */ + dma_unmap_len_set(tx_bi, len, size); + dma_unmap_addr_set(tx_bi, dma, dma); + + /* align size to end of page */ + max_data += -dma & (I40E_MAX_READ_REQ_SIZE - 1); + tx_desc->buffer_addr = cpu_to_le64(dma); + + while (unlikely(size > I40E_MAX_DATA_PER_TXD)) { + tx_desc->cmd_type_offset_bsz = + build_ctob(td_cmd, td_offset, + max_data, td_tag); + + tx_desc++; + i++; + + if (i == tx_ring->count) { + tx_desc = I40E_TX_DESC(tx_ring, 0); + i = 0; + } + + dma += max_data; + size -= max_data; + + max_data = I40E_MAX_DATA_PER_TXD_ALIGNED; + tx_desc->buffer_addr = cpu_to_le64(dma); + } + + if (likely(!data_len)) + break; + + tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, + size, td_tag); + + tx_desc++; + i++; + + if (i == tx_ring->count) { + tx_desc = I40E_TX_DESC(tx_ring, 0); + i = 0; + } + + size = skb_frag_size(frag); + data_len -= size; + + dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size, + DMA_TO_DEVICE); + + tx_bi = &tx_ring->tx_bi[i]; + } + + netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); + + i++; + if (i == tx_ring->count) + i = 0; + + tx_ring->next_to_use = i; + + i40e_maybe_stop_tx(tx_ring, DESC_NEEDED); + + /* write last descriptor with RS and EOP bits */ + td_cmd |= I40E_TXD_CMD; + tx_desc->cmd_type_offset_bsz = + build_ctob(td_cmd, td_offset, size, td_tag); + + /* Force memory writes to complete before letting h/w know there + * are new descriptors to fetch. + * + * We also use this memory barrier to make certain all of the + * status bits have been updated before next_to_watch is written. + */ + wmb(); + + /* set next_to_watch value indicating a packet is present */ + first->next_to_watch = tx_desc; + + /* notify HW of packet */ + if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) { + writel(i, tx_ring->tail); + + /* we need this if more than one processor can write to our tail + * at a time, it synchronizes IO on IA64/Altix systems + */ + mmiowb(); + } + + return; + +dma_error: + dev_info(tx_ring->dev, "TX DMA map failed\n"); + + /* clear dma mappings for failed tx_bi map */ + for (;;) { + tx_bi = &tx_ring->tx_bi[i]; + i40e_unmap_and_free_tx_resource(tx_ring, tx_bi); + if (tx_bi == first) + break; + if (i == 0) + i = tx_ring->count; + i--; + } + + tx_ring->next_to_use = i; +} + +/** + * i40e_xmit_frame_ring - Sends buffer on Tx ring + * @skb: send buffer + * @tx_ring: ring to send buffer on + * + * Returns NETDEV_TX_OK if sent, else an error code + **/ +static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, + struct i40e_ring *tx_ring) +{ + u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT; + u32 cd_tunneling = 0, cd_l2tag2 = 0; + struct i40e_tx_buffer *first; + u32 td_offset = 0; + u32 tx_flags = 0; + __be16 protocol; + u32 td_cmd = 0; + u8 hdr_len = 0; + int tso, count; + + /* prefetch the data, we'll need it later */ + prefetch(skb->data); + + i40e_trace(xmit_frame_ring, skb, tx_ring); + + count = i40e_xmit_descriptor_count(skb); + if (i40e_chk_linearize(skb, count)) { + if (__skb_linearize(skb)) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + count = i40e_txd_use_count(skb->len); + tx_ring->tx_stats.tx_linearize++; + } + + /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD, + * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD, + * + 4 desc gap to avoid the cache line where head is, + * + 1 desc for context descriptor, + * otherwise try next time + */ + if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) { + tx_ring->tx_stats.tx_busy++; + return NETDEV_TX_BUSY; + } + + /* record the location of the first descriptor for this packet */ + first = &tx_ring->tx_bi[tx_ring->next_to_use]; + first->skb = skb; + first->bytecount = skb->len; + first->gso_segs = 1; + + /* prepare the xmit flags */ + if (iavf_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags)) + goto out_drop; + + /* obtain protocol of skb */ + protocol = vlan_get_protocol(skb); + + /* setup IPv4/IPv6 offloads */ + if (protocol == htons(ETH_P_IP)) + tx_flags |= I40E_TX_FLAGS_IPV4; + else if (protocol == htons(ETH_P_IPV6)) + tx_flags |= I40E_TX_FLAGS_IPV6; + + tso = i40e_tso(first, &hdr_len, &cd_type_cmd_tso_mss); + + if (tso < 0) + goto out_drop; + else if (tso) + tx_flags |= I40E_TX_FLAGS_TSO; + + /* Always offload the checksum, since it's in the data descriptor */ + tso = i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset, + tx_ring, &cd_tunneling); + if (tso < 0) + goto out_drop; + + skb_tx_timestamp(skb); + + /* always enable CRC insertion offload */ + td_cmd |= I40E_TX_DESC_CMD_ICRC; + + i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss, + cd_tunneling, cd_l2tag2); + + iavf_tx_map(tx_ring, skb, first, tx_flags, hdr_len, + td_cmd, td_offset); + + return NETDEV_TX_OK; + +out_drop: + i40e_trace(xmit_frame_ring_drop, first->skb, tx_ring); + dev_kfree_skb_any(first->skb); + first->skb = NULL; + return NETDEV_TX_OK; +} + +/** + * iavf_xmit_frame - Selects the correct VSI and Tx queue to send buffer + * @skb: send buffer + * @netdev: network interface device structure + * + * Returns NETDEV_TX_OK if sent, else an error code + **/ +netdev_tx_t iavf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + struct i40e_ring *tx_ring = &adapter->tx_rings[skb->queue_mapping]; + + /* hardware can't handle really short frames, hardware padding works + * beyond this point + */ + if (unlikely(skb->len < I40E_MIN_TX_LEN)) { + if (skb_pad(skb, I40E_MIN_TX_LEN - skb->len)) + return NETDEV_TX_OK; + skb->len = I40E_MIN_TX_LEN; + skb_set_tail_pointer(skb, I40E_MIN_TX_LEN); + } + + return i40e_xmit_frame_ring(skb, tx_ring); +} diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.h b/drivers/net/ethernet/intel/iavf/iavf_txrx.h new file mode 100644 index 0000000000000..5dbb1cd52febd --- /dev/null +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.h @@ -0,0 +1,524 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ + +#ifndef _I40E_TXRX_H_ +#define _I40E_TXRX_H_ + +/* Interrupt Throttling and Rate Limiting Goodies */ +#define I40E_DEFAULT_IRQ_WORK 256 + +/* The datasheet for the X710 and XL710 indicate that the maximum value for + * the ITR is 8160usec which is then called out as 0xFF0 with a 2usec + * resolution. 8160 is 0x1FE0 when written out in hex. So instead of storing + * the register value which is divided by 2 lets use the actual values and + * avoid an excessive amount of translation. + */ +#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ +#define I40E_ITR_MASK 0x1FFE /* mask for ITR register value */ +#define I40E_MIN_ITR 2 /* reg uses 2 usec resolution */ +#define I40E_ITR_100K 10 /* all values below must be even */ +#define I40E_ITR_50K 20 +#define I40E_ITR_20K 50 +#define I40E_ITR_18K 60 +#define I40E_ITR_8K 122 +#define I40E_MAX_ITR 8160 /* maximum value as per datasheet */ +#define ITR_TO_REG(setting) ((setting) & ~I40E_ITR_DYNAMIC) +#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~I40E_ITR_MASK) +#define ITR_IS_DYNAMIC(setting) (!!((setting) & I40E_ITR_DYNAMIC)) + +#define I40E_ITR_RX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC) +#define I40E_ITR_TX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC) + +/* 0x40 is the enable bit for interrupt rate limiting, and must be set if + * the value of the rate limit is non-zero + */ +#define INTRL_ENA BIT(6) +#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */ +#define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2) +#define INTRL_USEC_TO_REG(set) ((set) ? ((set) >> 2) | INTRL_ENA : 0) +#define I40E_INTRL_8K 125 /* 8000 ints/sec */ +#define I40E_INTRL_62K 16 /* 62500 ints/sec */ +#define I40E_INTRL_83K 12 /* 83333 ints/sec */ + +#define I40E_QUEUE_END_OF_LIST 0x7FF + +/* this enum matches hardware bits and is meant to be used by DYN_CTLN + * registers and QINT registers or more generally anywhere in the manual + * mentioning ITR_INDX, ITR_NONE cannot be used as an index 'n' into any + * register but instead is a special value meaning "don't update" ITR0/1/2. + */ +enum i40e_dyn_idx_t { + I40E_IDX_ITR0 = 0, + I40E_IDX_ITR1 = 1, + I40E_IDX_ITR2 = 2, + I40E_ITR_NONE = 3 /* ITR_NONE must not be used as an index */ +}; + +/* these are indexes into ITRN registers */ +#define I40E_RX_ITR I40E_IDX_ITR0 +#define I40E_TX_ITR I40E_IDX_ITR1 +#define I40E_PE_ITR I40E_IDX_ITR2 + +/* Supported RSS offloads */ +#define I40E_DEFAULT_RSS_HENA ( \ + BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | \ + BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_SCTP) | \ + BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP) | \ + BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_OTHER) | \ + BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4) | \ + BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | \ + BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP) | \ + BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_SCTP) | \ + BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_OTHER) | \ + BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6) | \ + BIT_ULL(I40E_FILTER_PCTYPE_L2_PAYLOAD)) + +#define I40E_DEFAULT_RSS_HENA_EXPANDED (I40E_DEFAULT_RSS_HENA | \ + BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK) | \ + BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | \ + BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP) | \ + BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK) | \ + BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | \ + BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP)) + +/* Supported Rx Buffer Sizes (a multiple of 128) */ +#define I40E_RXBUFFER_256 256 +#define I40E_RXBUFFER_1536 1536 /* 128B aligned standard Ethernet frame */ +#define I40E_RXBUFFER_2048 2048 +#define I40E_RXBUFFER_3072 3072 /* Used for large frames w/ padding */ +#define I40E_MAX_RXBUFFER 9728 /* largest size for single descriptor */ + +/* NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN means we + * reserve 2 more, and skb_shared_info adds an additional 384 bytes more, + * this adds up to 512 bytes of extra data meaning the smallest allocation + * we could have is 1K. + * i.e. RXBUFFER_256 --> 960 byte skb (size-1024 slab) + * i.e. RXBUFFER_512 --> 1216 byte skb (size-2048 slab) + */ +#define I40E_RX_HDR_SIZE I40E_RXBUFFER_256 +#define I40E_PACKET_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2)) +#define i40e_rx_desc i40e_32byte_rx_desc + +#define I40E_RX_DMA_ATTR \ + (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) + +/* Attempt to maximize the headroom available for incoming frames. We + * use a 2K buffer for receives and need 1536/1534 to store the data for + * the frame. This leaves us with 512 bytes of room. From that we need + * to deduct the space needed for the shared info and the padding needed + * to IP align the frame. + * + * Note: For cache line sizes 256 or larger this value is going to end + * up negative. In these cases we should fall back to the legacy + * receive path. + */ +#if (PAGE_SIZE < 8192) +#define I40E_2K_TOO_SMALL_WITH_PADDING \ +((NET_SKB_PAD + I40E_RXBUFFER_1536) > SKB_WITH_OVERHEAD(I40E_RXBUFFER_2048)) + +static inline int i40e_compute_pad(int rx_buf_len) +{ + int page_size, pad_size; + + page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2); + pad_size = SKB_WITH_OVERHEAD(page_size) - rx_buf_len; + + return pad_size; +} + +static inline int i40e_skb_pad(void) +{ + int rx_buf_len; + + /* If a 2K buffer cannot handle a standard Ethernet frame then + * optimize padding for a 3K buffer instead of a 1.5K buffer. + * + * For a 3K buffer we need to add enough padding to allow for + * tailroom due to NET_IP_ALIGN possibly shifting us out of + * cache-line alignment. + */ + if (I40E_2K_TOO_SMALL_WITH_PADDING) + rx_buf_len = I40E_RXBUFFER_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN); + else + rx_buf_len = I40E_RXBUFFER_1536; + + /* if needed make room for NET_IP_ALIGN */ + rx_buf_len -= NET_IP_ALIGN; + + return i40e_compute_pad(rx_buf_len); +} + +#define I40E_SKB_PAD i40e_skb_pad() +#else +#define I40E_2K_TOO_SMALL_WITH_PADDING false +#define I40E_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) +#endif + +/** + * i40e_test_staterr - tests bits in Rx descriptor status and error fields + * @rx_desc: pointer to receive descriptor (in le64 format) + * @stat_err_bits: value to mask + * + * This function does some fast chicanery in order to return the + * value of the mask which is really only used for boolean tests. + * The status_error_len doesn't need to be shifted because it begins + * at offset zero. + */ +static inline bool i40e_test_staterr(union i40e_rx_desc *rx_desc, + const u64 stat_err_bits) +{ + return !!(rx_desc->wb.qword1.status_error_len & + cpu_to_le64(stat_err_bits)); +} + +/* How many Rx Buffers do we bundle into one write to the hardware ? */ +#define I40E_RX_BUFFER_WRITE 32 /* Must be power of 2 */ +#define I40E_RX_INCREMENT(r, i) \ + do { \ + (i)++; \ + if ((i) == (r)->count) \ + i = 0; \ + r->next_to_clean = i; \ + } while (0) + +#define I40E_RX_NEXT_DESC(r, i, n) \ + do { \ + (i)++; \ + if ((i) == (r)->count) \ + i = 0; \ + (n) = I40E_RX_DESC((r), (i)); \ + } while (0) + +#define I40E_RX_NEXT_DESC_PREFETCH(r, i, n) \ + do { \ + I40E_RX_NEXT_DESC((r), (i), (n)); \ + prefetch((n)); \ + } while (0) + +#define I40E_MAX_BUFFER_TXD 8 +#define I40E_MIN_TX_LEN 17 + +/* The size limit for a transmit buffer in a descriptor is (16K - 1). + * In order to align with the read requests we will align the value to + * the nearest 4K which represents our maximum read request size. + */ +#define I40E_MAX_READ_REQ_SIZE 4096 +#define I40E_MAX_DATA_PER_TXD (16 * 1024 - 1) +#define I40E_MAX_DATA_PER_TXD_ALIGNED \ + (I40E_MAX_DATA_PER_TXD & ~(I40E_MAX_READ_REQ_SIZE - 1)) + +/** + * i40e_txd_use_count - estimate the number of descriptors needed for Tx + * @size: transmit request size in bytes + * + * Due to hardware alignment restrictions (4K alignment), we need to + * assume that we can have no more than 12K of data per descriptor, even + * though each descriptor can take up to 16K - 1 bytes of aligned memory. + * Thus, we need to divide by 12K. But division is slow! Instead, + * we decompose the operation into shifts and one relatively cheap + * multiply operation. + * + * To divide by 12K, we first divide by 4K, then divide by 3: + * To divide by 4K, shift right by 12 bits + * To divide by 3, multiply by 85, then divide by 256 + * (Divide by 256 is done by shifting right by 8 bits) + * Finally, we add one to round up. Because 256 isn't an exact multiple of + * 3, we'll underestimate near each multiple of 12K. This is actually more + * accurate as we have 4K - 1 of wiggle room that we can fit into the last + * segment. For our purposes this is accurate out to 1M which is orders of + * magnitude greater than our largest possible GSO size. + * + * This would then be implemented as: + * return (((size >> 12) * 85) >> 8) + 1; + * + * Since multiplication and division are commutative, we can reorder + * operations into: + * return ((size * 85) >> 20) + 1; + */ +static inline unsigned int i40e_txd_use_count(unsigned int size) +{ + return ((size * 85) >> 20) + 1; +} + +/* Tx Descriptors needed, worst case */ +#define DESC_NEEDED (MAX_SKB_FRAGS + 6) +#define I40E_MIN_DESC_PENDING 4 + +#define I40E_TX_FLAGS_HW_VLAN BIT(1) +#define I40E_TX_FLAGS_SW_VLAN BIT(2) +#define I40E_TX_FLAGS_TSO BIT(3) +#define I40E_TX_FLAGS_IPV4 BIT(4) +#define I40E_TX_FLAGS_IPV6 BIT(5) +#define I40E_TX_FLAGS_FCCRC BIT(6) +#define I40E_TX_FLAGS_FSO BIT(7) +#define I40E_TX_FLAGS_FD_SB BIT(9) +#define I40E_TX_FLAGS_VXLAN_TUNNEL BIT(10) +#define I40E_TX_FLAGS_VLAN_MASK 0xffff0000 +#define I40E_TX_FLAGS_VLAN_PRIO_MASK 0xe0000000 +#define I40E_TX_FLAGS_VLAN_PRIO_SHIFT 29 +#define I40E_TX_FLAGS_VLAN_SHIFT 16 + +struct i40e_tx_buffer { + struct i40e_tx_desc *next_to_watch; + union { + struct sk_buff *skb; + void *raw_buf; + }; + unsigned int bytecount; + unsigned short gso_segs; + + DEFINE_DMA_UNMAP_ADDR(dma); + DEFINE_DMA_UNMAP_LEN(len); + u32 tx_flags; +}; + +struct i40e_rx_buffer { + dma_addr_t dma; + struct page *page; +#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) + __u32 page_offset; +#else + __u16 page_offset; +#endif + __u16 pagecnt_bias; +}; + +struct i40e_queue_stats { + u64 packets; + u64 bytes; +}; + +struct i40e_tx_queue_stats { + u64 restart_queue; + u64 tx_busy; + u64 tx_done_old; + u64 tx_linearize; + u64 tx_force_wb; + int prev_pkt_ctr; + u64 tx_lost_interrupt; +}; + +struct i40e_rx_queue_stats { + u64 non_eop_descs; + u64 alloc_page_failed; + u64 alloc_buff_failed; + u64 page_reuse_count; + u64 realloc_count; +}; + +enum i40e_ring_state_t { + __I40E_TX_FDIR_INIT_DONE, + __I40E_TX_XPS_INIT_DONE, + __I40E_RING_STATE_NBITS /* must be last */ +}; + +/* some useful defines for virtchannel interface, which + * is the only remaining user of header split + */ +#define I40E_RX_DTYPE_NO_SPLIT 0 +#define I40E_RX_DTYPE_HEADER_SPLIT 1 +#define I40E_RX_DTYPE_SPLIT_ALWAYS 2 +#define I40E_RX_SPLIT_L2 0x1 +#define I40E_RX_SPLIT_IP 0x2 +#define I40E_RX_SPLIT_TCP_UDP 0x4 +#define I40E_RX_SPLIT_SCTP 0x8 + +/* struct that defines a descriptor ring, associated with a VSI */ +struct i40e_ring { + struct i40e_ring *next; /* pointer to next ring in q_vector */ + void *desc; /* Descriptor ring memory */ + struct device *dev; /* Used for DMA mapping */ + struct net_device *netdev; /* netdev ring maps to */ + union { + struct i40e_tx_buffer *tx_bi; + struct i40e_rx_buffer *rx_bi; + }; + DECLARE_BITMAP(state, __I40E_RING_STATE_NBITS); + u16 queue_index; /* Queue number of ring */ + u8 dcb_tc; /* Traffic class of ring */ + u8 __iomem *tail; + + /* high bit set means dynamic, use accessors routines to read/write. + * hardware only supports 2us resolution for the ITR registers. + * these values always store the USER setting, and must be converted + * before programming to a register. + */ + u16 itr_setting; + + u16 count; /* Number of descriptors */ + u16 reg_idx; /* HW register index of the ring */ + u16 rx_buf_len; + + /* used in interrupt processing */ + u16 next_to_use; + u16 next_to_clean; + + u8 atr_sample_rate; + u8 atr_count; + + bool ring_active; /* is ring online or not */ + bool arm_wb; /* do something to arm write back */ + u8 packet_stride; + + u16 flags; +#define I40E_TXR_FLAGS_WB_ON_ITR BIT(0) +#define I40E_RXR_FLAGS_BUILD_SKB_ENABLED BIT(1) + + /* stats structs */ + struct i40e_queue_stats stats; + struct u64_stats_sync syncp; + union { + struct i40e_tx_queue_stats tx_stats; + struct i40e_rx_queue_stats rx_stats; + }; + + unsigned int size; /* length of descriptor ring in bytes */ + dma_addr_t dma; /* physical address of ring */ + + struct i40e_vsi *vsi; /* Backreference to associated VSI */ + struct i40e_q_vector *q_vector; /* Backreference to associated vector */ + + struct rcu_head rcu; /* to avoid race on free */ + u16 next_to_alloc; + struct sk_buff *skb; /* When iavf_clean_rx_ring_irq() must + * return before it sees the EOP for + * the current packet, we save that skb + * here and resume receiving this + * packet the next time + * iavf_clean_rx_ring_irq() is called + * for this ring. + */ +} ____cacheline_internodealigned_in_smp; + +static inline bool ring_uses_build_skb(struct i40e_ring *ring) +{ + return !!(ring->flags & I40E_RXR_FLAGS_BUILD_SKB_ENABLED); +} + +static inline void set_ring_build_skb_enabled(struct i40e_ring *ring) +{ + ring->flags |= I40E_RXR_FLAGS_BUILD_SKB_ENABLED; +} + +static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring) +{ + ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED; +} + +#define I40E_ITR_ADAPTIVE_MIN_INC 0x0002 +#define I40E_ITR_ADAPTIVE_MIN_USECS 0x0002 +#define I40E_ITR_ADAPTIVE_MAX_USECS 0x007e +#define I40E_ITR_ADAPTIVE_LATENCY 0x8000 +#define I40E_ITR_ADAPTIVE_BULK 0x0000 +#define ITR_IS_BULK(x) (!((x) & I40E_ITR_ADAPTIVE_LATENCY)) + +struct i40e_ring_container { + struct i40e_ring *ring; /* pointer to linked list of ring(s) */ + unsigned long next_update; /* jiffies value of next update */ + unsigned int total_bytes; /* total bytes processed this int */ + unsigned int total_packets; /* total packets processed this int */ + u16 count; + u16 target_itr; /* target ITR setting for ring(s) */ + u16 current_itr; /* current ITR setting for ring(s) */ +}; + +/* iterator for handling rings in ring container */ +#define i40e_for_each_ring(pos, head) \ + for (pos = (head).ring; pos != NULL; pos = pos->next) + +static inline unsigned int i40e_rx_pg_order(struct i40e_ring *ring) +{ +#if (PAGE_SIZE < 8192) + if (ring->rx_buf_len > (PAGE_SIZE / 2)) + return 1; +#endif + return 0; +} + +#define i40e_rx_pg_size(_ring) (PAGE_SIZE << i40e_rx_pg_order(_ring)) + +bool iavf_alloc_rx_buffers(struct i40e_ring *rxr, u16 cleaned_count); +netdev_tx_t iavf_xmit_frame(struct sk_buff *skb, struct net_device *netdev); +void iavf_clean_tx_ring(struct i40e_ring *tx_ring); +void iavf_clean_rx_ring(struct i40e_ring *rx_ring); +int iavf_setup_tx_descriptors(struct i40e_ring *tx_ring); +int iavf_setup_rx_descriptors(struct i40e_ring *rx_ring); +void iavf_free_tx_resources(struct i40e_ring *tx_ring); +void iavf_free_rx_resources(struct i40e_ring *rx_ring); +int iavf_napi_poll(struct napi_struct *napi, int budget); +void iavf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector); +u32 iavf_get_tx_pending(struct i40e_ring *ring, bool in_sw); +void iavf_detect_recover_hung(struct i40e_vsi *vsi); +int __iavf_maybe_stop_tx(struct i40e_ring *tx_ring, int size); +bool __iavf_chk_linearize(struct sk_buff *skb); + +/** + * i40e_xmit_descriptor_count - calculate number of Tx descriptors needed + * @skb: send buffer + * @tx_ring: ring to send buffer on + * + * Returns number of data descriptors needed for this skb. Returns 0 to indicate + * there is not enough descriptors available in this ring since we need at least + * one descriptor. + **/ +static inline int i40e_xmit_descriptor_count(struct sk_buff *skb) +{ + const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + unsigned int nr_frags = skb_shinfo(skb)->nr_frags; + int count = 0, size = skb_headlen(skb); + + for (;;) { + count += i40e_txd_use_count(size); + + if (!nr_frags--) + break; + + size = skb_frag_size(frag++); + } + + return count; +} + +/** + * i40e_maybe_stop_tx - 1st level check for Tx stop conditions + * @tx_ring: the ring to be checked + * @size: the size buffer we want to assure is available + * + * Returns 0 if stop is not needed + **/ +static inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) +{ + if (likely(I40E_DESC_UNUSED(tx_ring) >= size)) + return 0; + return __iavf_maybe_stop_tx(tx_ring, size); +} + +/** + * i40e_chk_linearize - Check if there are more than 8 fragments per packet + * @skb: send buffer + * @count: number of buffers used + * + * Note: Our HW can't scatter-gather more than 8 fragments to build + * a packet on the wire and so we need to figure out the cases where we + * need to linearize the skb. + **/ +static inline bool i40e_chk_linearize(struct sk_buff *skb, int count) +{ + /* Both TSO and single send will work if count is less than 8 */ + if (likely(count < I40E_MAX_BUFFER_TXD)) + return false; + + if (skb_is_gso(skb)) + return __iavf_chk_linearize(skb); + + /* we can support up to 8 data buffers for a single send */ + return count != I40E_MAX_BUFFER_TXD; +} +/** + * @ring: Tx ring to find the netdev equivalent of + **/ +static inline struct netdev_queue *txring_txq(const struct i40e_ring *ring) +{ + return netdev_get_tx_queue(ring->netdev, ring->queue_index); +} +#endif /* _I40E_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c new file mode 100644 index 0000000000000..ebfef5fe346c1 --- /dev/null +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -0,0 +1,1452 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2013 - 2018 Intel Corporation. */ + +#include "iavf.h" +#include "i40e_prototype.h" +#include "iavf_client.h" + +/* busy wait delay in msec */ +#define IAVF_BUSY_WAIT_DELAY 10 +#define IAVF_BUSY_WAIT_COUNT 50 + +/** + * iavf_send_pf_msg + * @adapter: adapter structure + * @op: virtual channel opcode + * @msg: pointer to message buffer + * @len: message length + * + * Send message to PF and print status if failure. + **/ +static int iavf_send_pf_msg(struct iavf_adapter *adapter, + enum virtchnl_ops op, u8 *msg, u16 len) +{ + struct i40e_hw *hw = &adapter->hw; + iavf_status err; + + if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) + return 0; /* nothing to see here, move along */ + + err = iavf_aq_send_msg_to_pf(hw, op, 0, msg, len, NULL); + if (err) + dev_dbg(&adapter->pdev->dev, "Unable to send opcode %d to PF, err %s, aq_err %s\n", + op, iavf_stat_str(hw, err), + iavf_aq_str(hw, hw->aq.asq_last_status)); + return err; +} + +/** + * iavf_send_api_ver + * @adapter: adapter structure + * + * Send API version admin queue message to the PF. The reply is not checked + * in this function. Returns 0 if the message was successfully + * sent, or one of the I40E_ADMIN_QUEUE_ERROR_ statuses if not. + **/ +int iavf_send_api_ver(struct iavf_adapter *adapter) +{ + struct virtchnl_version_info vvi; + + vvi.major = VIRTCHNL_VERSION_MAJOR; + vvi.minor = VIRTCHNL_VERSION_MINOR; + + return iavf_send_pf_msg(adapter, VIRTCHNL_OP_VERSION, (u8 *)&vvi, + sizeof(vvi)); +} + +/** + * iavf_verify_api_ver + * @adapter: adapter structure + * + * Compare API versions with the PF. Must be called after admin queue is + * initialized. Returns 0 if API versions match, -EIO if they do not, + * I40E_ERR_ADMIN_QUEUE_NO_WORK if the admin queue is empty, and any errors + * from the firmware are propagated. + **/ +int iavf_verify_api_ver(struct iavf_adapter *adapter) +{ + struct virtchnl_version_info *pf_vvi; + struct i40e_hw *hw = &adapter->hw; + struct i40e_arq_event_info event; + enum virtchnl_ops op; + iavf_status err; + + event.buf_len = IAVF_MAX_AQ_BUF_SIZE; + event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL); + if (!event.msg_buf) { + err = -ENOMEM; + goto out; + } + + while (1) { + err = iavf_clean_arq_element(hw, &event, NULL); + /* When the AQ is empty, iavf_clean_arq_element will return + * nonzero and this loop will terminate. + */ + if (err) + goto out_alloc; + op = + (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high); + if (op == VIRTCHNL_OP_VERSION) + break; + } + + + err = (iavf_status)le32_to_cpu(event.desc.cookie_low); + if (err) + goto out_alloc; + + if (op != VIRTCHNL_OP_VERSION) { + dev_info(&adapter->pdev->dev, "Invalid reply type %d from PF\n", + op); + err = -EIO; + goto out_alloc; + } + + pf_vvi = (struct virtchnl_version_info *)event.msg_buf; + adapter->pf_version = *pf_vvi; + + if ((pf_vvi->major > VIRTCHNL_VERSION_MAJOR) || + ((pf_vvi->major == VIRTCHNL_VERSION_MAJOR) && + (pf_vvi->minor > VIRTCHNL_VERSION_MINOR))) + err = -EIO; + +out_alloc: + kfree(event.msg_buf); +out: + return err; +} + +/** + * iavf_send_vf_config_msg + * @adapter: adapter structure + * + * Send VF configuration request admin queue message to the PF. The reply + * is not checked in this function. Returns 0 if the message was + * successfully sent, or one of the I40E_ADMIN_QUEUE_ERROR_ statuses if not. + **/ +int iavf_send_vf_config_msg(struct iavf_adapter *adapter) +{ + u32 caps; + + caps = VIRTCHNL_VF_OFFLOAD_L2 | + VIRTCHNL_VF_OFFLOAD_RSS_PF | + VIRTCHNL_VF_OFFLOAD_RSS_AQ | + VIRTCHNL_VF_OFFLOAD_RSS_REG | + VIRTCHNL_VF_OFFLOAD_VLAN | + VIRTCHNL_VF_OFFLOAD_WB_ON_ITR | + VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 | + VIRTCHNL_VF_OFFLOAD_ENCAP | + VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM | + VIRTCHNL_VF_OFFLOAD_REQ_QUEUES | + VIRTCHNL_VF_OFFLOAD_ADQ; + + adapter->current_op = VIRTCHNL_OP_GET_VF_RESOURCES; + adapter->aq_required &= ~IAVF_FLAG_AQ_GET_CONFIG; + if (PF_IS_V11(adapter)) + return iavf_send_pf_msg(adapter, VIRTCHNL_OP_GET_VF_RESOURCES, + (u8 *)&caps, sizeof(caps)); + else + return iavf_send_pf_msg(adapter, VIRTCHNL_OP_GET_VF_RESOURCES, + NULL, 0); +} + +/** + * iavf_validate_num_queues + * @adapter: adapter structure + * + * Validate that the number of queues the PF has sent in + * VIRTCHNL_OP_GET_VF_RESOURCES is not larger than the VF can handle. + **/ +static void iavf_validate_num_queues(struct iavf_adapter *adapter) +{ + if (adapter->vf_res->num_queue_pairs > IAVF_MAX_REQ_QUEUES) { + struct virtchnl_vsi_resource *vsi_res; + int i; + + dev_info(&adapter->pdev->dev, "Received %d queues, but can only have a max of %d\n", + adapter->vf_res->num_queue_pairs, + IAVF_MAX_REQ_QUEUES); + dev_info(&adapter->pdev->dev, "Fixing by reducing queues to %d\n", + IAVF_MAX_REQ_QUEUES); + adapter->vf_res->num_queue_pairs = IAVF_MAX_REQ_QUEUES; + for (i = 0; i < adapter->vf_res->num_vsis; i++) { + vsi_res = &adapter->vf_res->vsi_res[i]; + vsi_res->num_queue_pairs = IAVF_MAX_REQ_QUEUES; + } + } +} + +/** + * iavf_get_vf_config + * @adapter: private adapter structure + * + * Get VF configuration from PF and populate hw structure. Must be called after + * admin queue is initialized. Busy waits until response is received from PF, + * with maximum timeout. Response from PF is returned in the buffer for further + * processing by the caller. + **/ +int iavf_get_vf_config(struct iavf_adapter *adapter) +{ + struct i40e_hw *hw = &adapter->hw; + struct i40e_arq_event_info event; + enum virtchnl_ops op; + iavf_status err; + u16 len; + + len = sizeof(struct virtchnl_vf_resource) + + I40E_MAX_VF_VSI * sizeof(struct virtchnl_vsi_resource); + event.buf_len = len; + event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL); + if (!event.msg_buf) { + err = -ENOMEM; + goto out; + } + + while (1) { + /* When the AQ is empty, iavf_clean_arq_element will return + * nonzero and this loop will terminate. + */ + err = iavf_clean_arq_element(hw, &event, NULL); + if (err) + goto out_alloc; + op = + (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high); + if (op == VIRTCHNL_OP_GET_VF_RESOURCES) + break; + } + + err = (iavf_status)le32_to_cpu(event.desc.cookie_low); + memcpy(adapter->vf_res, event.msg_buf, min(event.msg_len, len)); + + /* some PFs send more queues than we should have so validate that + * we aren't getting too many queues + */ + if (!err) + iavf_validate_num_queues(adapter); + iavf_vf_parse_hw_config(hw, adapter->vf_res); +out_alloc: + kfree(event.msg_buf); +out: + return err; +} + +/** + * iavf_configure_queues + * @adapter: adapter structure + * + * Request that the PF set up our (previously allocated) queues. + **/ +void iavf_configure_queues(struct iavf_adapter *adapter) +{ + struct virtchnl_vsi_queue_config_info *vqci; + struct virtchnl_queue_pair_info *vqpi; + int pairs = adapter->num_active_queues; + int i, len, max_frame = I40E_MAX_RXBUFFER; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot configure queues, command %d pending\n", + adapter->current_op); + return; + } + adapter->current_op = VIRTCHNL_OP_CONFIG_VSI_QUEUES; + len = sizeof(struct virtchnl_vsi_queue_config_info) + + (sizeof(struct virtchnl_queue_pair_info) * pairs); + vqci = kzalloc(len, GFP_KERNEL); + if (!vqci) + return; + + /* Limit maximum frame size when jumbo frames is not enabled */ + if (!(adapter->flags & IAVF_FLAG_LEGACY_RX) && + (adapter->netdev->mtu <= ETH_DATA_LEN)) + max_frame = I40E_RXBUFFER_1536 - NET_IP_ALIGN; + + vqci->vsi_id = adapter->vsi_res->vsi_id; + vqci->num_queue_pairs = pairs; + vqpi = vqci->qpair; + /* Size check is not needed here - HW max is 16 queue pairs, and we + * can fit info for 31 of them into the AQ buffer before it overflows. + */ + for (i = 0; i < pairs; i++) { + vqpi->txq.vsi_id = vqci->vsi_id; + vqpi->txq.queue_id = i; + vqpi->txq.ring_len = adapter->tx_rings[i].count; + vqpi->txq.dma_ring_addr = adapter->tx_rings[i].dma; + vqpi->rxq.vsi_id = vqci->vsi_id; + vqpi->rxq.queue_id = i; + vqpi->rxq.ring_len = adapter->rx_rings[i].count; + vqpi->rxq.dma_ring_addr = adapter->rx_rings[i].dma; + vqpi->rxq.max_pkt_size = max_frame; + vqpi->rxq.databuffer_size = + ALIGN(adapter->rx_rings[i].rx_buf_len, + BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT)); + vqpi++; + } + + adapter->aq_required &= ~IAVF_FLAG_AQ_CONFIGURE_QUEUES; + iavf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_VSI_QUEUES, + (u8 *)vqci, len); + kfree(vqci); +} + +/** + * iavf_enable_queues + * @adapter: adapter structure + * + * Request that the PF enable all of our queues. + **/ +void iavf_enable_queues(struct iavf_adapter *adapter) +{ + struct virtchnl_queue_select vqs; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot enable queues, command %d pending\n", + adapter->current_op); + return; + } + adapter->current_op = VIRTCHNL_OP_ENABLE_QUEUES; + vqs.vsi_id = adapter->vsi_res->vsi_id; + vqs.tx_queues = BIT(adapter->num_active_queues) - 1; + vqs.rx_queues = vqs.tx_queues; + adapter->aq_required &= ~IAVF_FLAG_AQ_ENABLE_QUEUES; + iavf_send_pf_msg(adapter, VIRTCHNL_OP_ENABLE_QUEUES, + (u8 *)&vqs, sizeof(vqs)); +} + +/** + * iavf_disable_queues + * @adapter: adapter structure + * + * Request that the PF disable all of our queues. + **/ +void iavf_disable_queues(struct iavf_adapter *adapter) +{ + struct virtchnl_queue_select vqs; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot disable queues, command %d pending\n", + adapter->current_op); + return; + } + adapter->current_op = VIRTCHNL_OP_DISABLE_QUEUES; + vqs.vsi_id = adapter->vsi_res->vsi_id; + vqs.tx_queues = BIT(adapter->num_active_queues) - 1; + vqs.rx_queues = vqs.tx_queues; + adapter->aq_required &= ~IAVF_FLAG_AQ_DISABLE_QUEUES; + iavf_send_pf_msg(adapter, VIRTCHNL_OP_DISABLE_QUEUES, + (u8 *)&vqs, sizeof(vqs)); +} + +/** + * iavf_map_queues + * @adapter: adapter structure + * + * Request that the PF map queues to interrupt vectors. Misc causes, including + * admin queue, are always mapped to vector 0. + **/ +void iavf_map_queues(struct iavf_adapter *adapter) +{ + struct virtchnl_irq_map_info *vimi; + struct virtchnl_vector_map *vecmap; + int v_idx, q_vectors, len; + struct i40e_q_vector *q_vector; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot map queues to vectors, command %d pending\n", + adapter->current_op); + return; + } + adapter->current_op = VIRTCHNL_OP_CONFIG_IRQ_MAP; + + q_vectors = adapter->num_msix_vectors - NONQ_VECS; + + len = sizeof(struct virtchnl_irq_map_info) + + (adapter->num_msix_vectors * + sizeof(struct virtchnl_vector_map)); + vimi = kzalloc(len, GFP_KERNEL); + if (!vimi) + return; + + vimi->num_vectors = adapter->num_msix_vectors; + /* Queue vectors first */ + for (v_idx = 0; v_idx < q_vectors; v_idx++) { + q_vector = &adapter->q_vectors[v_idx]; + vecmap = &vimi->vecmap[v_idx]; + + vecmap->vsi_id = adapter->vsi_res->vsi_id; + vecmap->vector_id = v_idx + NONQ_VECS; + vecmap->txq_map = q_vector->ring_mask; + vecmap->rxq_map = q_vector->ring_mask; + vecmap->rxitr_idx = I40E_RX_ITR; + vecmap->txitr_idx = I40E_TX_ITR; + } + /* Misc vector last - this is only for AdminQ messages */ + vecmap = &vimi->vecmap[v_idx]; + vecmap->vsi_id = adapter->vsi_res->vsi_id; + vecmap->vector_id = 0; + vecmap->txq_map = 0; + vecmap->rxq_map = 0; + + adapter->aq_required &= ~IAVF_FLAG_AQ_MAP_VECTORS; + iavf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_IRQ_MAP, + (u8 *)vimi, len); + kfree(vimi); +} + +/** + * iavf_request_queues + * @adapter: adapter structure + * @num: number of requested queues + * + * We get a default number of queues from the PF. This enables us to request a + * different number. Returns 0 on success, negative on failure + **/ +int iavf_request_queues(struct iavf_adapter *adapter, int num) +{ + struct virtchnl_vf_res_request vfres; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot request queues, command %d pending\n", + adapter->current_op); + return -EBUSY; + } + + vfres.num_queue_pairs = num; + + adapter->current_op = VIRTCHNL_OP_REQUEST_QUEUES; + adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED; + return iavf_send_pf_msg(adapter, VIRTCHNL_OP_REQUEST_QUEUES, + (u8 *)&vfres, sizeof(vfres)); +} + +/** + * iavf_add_ether_addrs + * @adapter: adapter structure + * + * Request that the PF add one or more addresses to our filters. + **/ +void iavf_add_ether_addrs(struct iavf_adapter *adapter) +{ + struct virtchnl_ether_addr_list *veal; + int len, i = 0, count = 0; + struct iavf_mac_filter *f; + bool more = false; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot add filters, command %d pending\n", + adapter->current_op); + return; + } + + spin_lock_bh(&adapter->mac_vlan_list_lock); + + list_for_each_entry(f, &adapter->mac_filter_list, list) { + if (f->add) + count++; + } + if (!count) { + adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_MAC_FILTER; + spin_unlock_bh(&adapter->mac_vlan_list_lock); + return; + } + adapter->current_op = VIRTCHNL_OP_ADD_ETH_ADDR; + + len = sizeof(struct virtchnl_ether_addr_list) + + (count * sizeof(struct virtchnl_ether_addr)); + if (len > IAVF_MAX_AQ_BUF_SIZE) { + dev_warn(&adapter->pdev->dev, "Too many add MAC changes in one request\n"); + count = (IAVF_MAX_AQ_BUF_SIZE - + sizeof(struct virtchnl_ether_addr_list)) / + sizeof(struct virtchnl_ether_addr); + len = sizeof(struct virtchnl_ether_addr_list) + + (count * sizeof(struct virtchnl_ether_addr)); + more = true; + } + + veal = kzalloc(len, GFP_ATOMIC); + if (!veal) { + spin_unlock_bh(&adapter->mac_vlan_list_lock); + return; + } + + veal->vsi_id = adapter->vsi_res->vsi_id; + veal->num_elements = count; + list_for_each_entry(f, &adapter->mac_filter_list, list) { + if (f->add) { + ether_addr_copy(veal->list[i].addr, f->macaddr); + i++; + f->add = false; + if (i == count) + break; + } + } + if (!more) + adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_MAC_FILTER; + + spin_unlock_bh(&adapter->mac_vlan_list_lock); + + iavf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_ETH_ADDR, (u8 *)veal, len); + kfree(veal); +} + +/** + * iavf_del_ether_addrs + * @adapter: adapter structure + * + * Request that the PF remove one or more addresses from our filters. + **/ +void iavf_del_ether_addrs(struct iavf_adapter *adapter) +{ + struct virtchnl_ether_addr_list *veal; + struct iavf_mac_filter *f, *ftmp; + int len, i = 0, count = 0; + bool more = false; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot remove filters, command %d pending\n", + adapter->current_op); + return; + } + + spin_lock_bh(&adapter->mac_vlan_list_lock); + + list_for_each_entry(f, &adapter->mac_filter_list, list) { + if (f->remove) + count++; + } + if (!count) { + adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_MAC_FILTER; + spin_unlock_bh(&adapter->mac_vlan_list_lock); + return; + } + adapter->current_op = VIRTCHNL_OP_DEL_ETH_ADDR; + + len = sizeof(struct virtchnl_ether_addr_list) + + (count * sizeof(struct virtchnl_ether_addr)); + if (len > IAVF_MAX_AQ_BUF_SIZE) { + dev_warn(&adapter->pdev->dev, "Too many delete MAC changes in one request\n"); + count = (IAVF_MAX_AQ_BUF_SIZE - + sizeof(struct virtchnl_ether_addr_list)) / + sizeof(struct virtchnl_ether_addr); + len = sizeof(struct virtchnl_ether_addr_list) + + (count * sizeof(struct virtchnl_ether_addr)); + more = true; + } + veal = kzalloc(len, GFP_ATOMIC); + if (!veal) { + spin_unlock_bh(&adapter->mac_vlan_list_lock); + return; + } + + veal->vsi_id = adapter->vsi_res->vsi_id; + veal->num_elements = count; + list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) { + if (f->remove) { + ether_addr_copy(veal->list[i].addr, f->macaddr); + i++; + list_del(&f->list); + kfree(f); + if (i == count) + break; + } + } + if (!more) + adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_MAC_FILTER; + + spin_unlock_bh(&adapter->mac_vlan_list_lock); + + iavf_send_pf_msg(adapter, VIRTCHNL_OP_DEL_ETH_ADDR, (u8 *)veal, len); + kfree(veal); +} + +/** + * iavf_add_vlans + * @adapter: adapter structure + * + * Request that the PF add one or more VLAN filters to our VSI. + **/ +void iavf_add_vlans(struct iavf_adapter *adapter) +{ + struct virtchnl_vlan_filter_list *vvfl; + int len, i = 0, count = 0; + struct iavf_vlan_filter *f; + bool more = false; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot add VLANs, command %d pending\n", + adapter->current_op); + return; + } + + spin_lock_bh(&adapter->mac_vlan_list_lock); + + list_for_each_entry(f, &adapter->vlan_filter_list, list) { + if (f->add) + count++; + } + if (!count) { + adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_VLAN_FILTER; + spin_unlock_bh(&adapter->mac_vlan_list_lock); + return; + } + adapter->current_op = VIRTCHNL_OP_ADD_VLAN; + + len = sizeof(struct virtchnl_vlan_filter_list) + + (count * sizeof(u16)); + if (len > IAVF_MAX_AQ_BUF_SIZE) { + dev_warn(&adapter->pdev->dev, "Too many add VLAN changes in one request\n"); + count = (IAVF_MAX_AQ_BUF_SIZE - + sizeof(struct virtchnl_vlan_filter_list)) / + sizeof(u16); + len = sizeof(struct virtchnl_vlan_filter_list) + + (count * sizeof(u16)); + more = true; + } + vvfl = kzalloc(len, GFP_ATOMIC); + if (!vvfl) { + spin_unlock_bh(&adapter->mac_vlan_list_lock); + return; + } + + vvfl->vsi_id = adapter->vsi_res->vsi_id; + vvfl->num_elements = count; + list_for_each_entry(f, &adapter->vlan_filter_list, list) { + if (f->add) { + vvfl->vlan_id[i] = f->vlan; + i++; + f->add = false; + if (i == count) + break; + } + } + if (!more) + adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_VLAN_FILTER; + + spin_unlock_bh(&adapter->mac_vlan_list_lock); + + iavf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_VLAN, (u8 *)vvfl, len); + kfree(vvfl); +} + +/** + * iavf_del_vlans + * @adapter: adapter structure + * + * Request that the PF remove one or more VLAN filters from our VSI. + **/ +void iavf_del_vlans(struct iavf_adapter *adapter) +{ + struct virtchnl_vlan_filter_list *vvfl; + struct iavf_vlan_filter *f, *ftmp; + int len, i = 0, count = 0; + bool more = false; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot remove VLANs, command %d pending\n", + adapter->current_op); + return; + } + + spin_lock_bh(&adapter->mac_vlan_list_lock); + + list_for_each_entry(f, &adapter->vlan_filter_list, list) { + if (f->remove) + count++; + } + if (!count) { + adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_VLAN_FILTER; + spin_unlock_bh(&adapter->mac_vlan_list_lock); + return; + } + adapter->current_op = VIRTCHNL_OP_DEL_VLAN; + + len = sizeof(struct virtchnl_vlan_filter_list) + + (count * sizeof(u16)); + if (len > IAVF_MAX_AQ_BUF_SIZE) { + dev_warn(&adapter->pdev->dev, "Too many delete VLAN changes in one request\n"); + count = (IAVF_MAX_AQ_BUF_SIZE - + sizeof(struct virtchnl_vlan_filter_list)) / + sizeof(u16); + len = sizeof(struct virtchnl_vlan_filter_list) + + (count * sizeof(u16)); + more = true; + } + vvfl = kzalloc(len, GFP_ATOMIC); + if (!vvfl) { + spin_unlock_bh(&adapter->mac_vlan_list_lock); + return; + } + + vvfl->vsi_id = adapter->vsi_res->vsi_id; + vvfl->num_elements = count; + list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) { + if (f->remove) { + vvfl->vlan_id[i] = f->vlan; + i++; + list_del(&f->list); + kfree(f); + if (i == count) + break; + } + } + if (!more) + adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_VLAN_FILTER; + + spin_unlock_bh(&adapter->mac_vlan_list_lock); + + iavf_send_pf_msg(adapter, VIRTCHNL_OP_DEL_VLAN, (u8 *)vvfl, len); + kfree(vvfl); +} + +/** + * iavf_set_promiscuous + * @adapter: adapter structure + * @flags: bitmask to control unicast/multicast promiscuous. + * + * Request that the PF enable promiscuous mode for our VSI. + **/ +void iavf_set_promiscuous(struct iavf_adapter *adapter, int flags) +{ + struct virtchnl_promisc_info vpi; + int promisc_all; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot set promiscuous mode, command %d pending\n", + adapter->current_op); + return; + } + + promisc_all = FLAG_VF_UNICAST_PROMISC | + FLAG_VF_MULTICAST_PROMISC; + if ((flags & promisc_all) == promisc_all) { + adapter->flags |= IAVF_FLAG_PROMISC_ON; + adapter->aq_required &= ~IAVF_FLAG_AQ_REQUEST_PROMISC; + dev_info(&adapter->pdev->dev, "Entering promiscuous mode\n"); + } + + if (flags & FLAG_VF_MULTICAST_PROMISC) { + adapter->flags |= IAVF_FLAG_ALLMULTI_ON; + adapter->aq_required &= ~IAVF_FLAG_AQ_REQUEST_ALLMULTI; + dev_info(&adapter->pdev->dev, "Entering multicast promiscuous mode\n"); + } + + if (!flags) { + adapter->flags &= ~(IAVF_FLAG_PROMISC_ON | + IAVF_FLAG_ALLMULTI_ON); + adapter->aq_required &= ~(IAVF_FLAG_AQ_RELEASE_PROMISC | + IAVF_FLAG_AQ_RELEASE_ALLMULTI); + dev_info(&adapter->pdev->dev, "Leaving promiscuous mode\n"); + } + + adapter->current_op = VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE; + vpi.vsi_id = adapter->vsi_res->vsi_id; + vpi.flags = flags; + iavf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE, + (u8 *)&vpi, sizeof(vpi)); +} + +/** + * iavf_request_stats + * @adapter: adapter structure + * + * Request VSI statistics from PF. + **/ +void iavf_request_stats(struct iavf_adapter *adapter) +{ + struct virtchnl_queue_select vqs; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* no error message, this isn't crucial */ + return; + } + adapter->current_op = VIRTCHNL_OP_GET_STATS; + vqs.vsi_id = adapter->vsi_res->vsi_id; + /* queue maps are ignored for this message - only the vsi is used */ + if (iavf_send_pf_msg(adapter, VIRTCHNL_OP_GET_STATS, (u8 *)&vqs, + sizeof(vqs))) + /* if the request failed, don't lock out others */ + adapter->current_op = VIRTCHNL_OP_UNKNOWN; +} + +/** + * iavf_get_hena + * @adapter: adapter structure + * + * Request hash enable capabilities from PF + **/ +void iavf_get_hena(struct iavf_adapter *adapter) +{ + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot get RSS hash capabilities, command %d pending\n", + adapter->current_op); + return; + } + adapter->current_op = VIRTCHNL_OP_GET_RSS_HENA_CAPS; + adapter->aq_required &= ~IAVF_FLAG_AQ_GET_HENA; + iavf_send_pf_msg(adapter, VIRTCHNL_OP_GET_RSS_HENA_CAPS, NULL, 0); +} + +/** + * iavf_set_hena + * @adapter: adapter structure + * + * Request the PF to set our RSS hash capabilities + **/ +void iavf_set_hena(struct iavf_adapter *adapter) +{ + struct virtchnl_rss_hena vrh; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot set RSS hash enable, command %d pending\n", + adapter->current_op); + return; + } + vrh.hena = adapter->hena; + adapter->current_op = VIRTCHNL_OP_SET_RSS_HENA; + adapter->aq_required &= ~IAVF_FLAG_AQ_SET_HENA; + iavf_send_pf_msg(adapter, VIRTCHNL_OP_SET_RSS_HENA, (u8 *)&vrh, + sizeof(vrh)); +} + +/** + * iavf_set_rss_key + * @adapter: adapter structure + * + * Request the PF to set our RSS hash key + **/ +void iavf_set_rss_key(struct iavf_adapter *adapter) +{ + struct virtchnl_rss_key *vrk; + int len; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot set RSS key, command %d pending\n", + adapter->current_op); + return; + } + len = sizeof(struct virtchnl_rss_key) + + (adapter->rss_key_size * sizeof(u8)) - 1; + vrk = kzalloc(len, GFP_KERNEL); + if (!vrk) + return; + vrk->vsi_id = adapter->vsi.id; + vrk->key_len = adapter->rss_key_size; + memcpy(vrk->key, adapter->rss_key, adapter->rss_key_size); + + adapter->current_op = VIRTCHNL_OP_CONFIG_RSS_KEY; + adapter->aq_required &= ~IAVF_FLAG_AQ_SET_RSS_KEY; + iavf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_RSS_KEY, (u8 *)vrk, len); + kfree(vrk); +} + +/** + * iavf_set_rss_lut + * @adapter: adapter structure + * + * Request the PF to set our RSS lookup table + **/ +void iavf_set_rss_lut(struct iavf_adapter *adapter) +{ + struct virtchnl_rss_lut *vrl; + int len; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot set RSS LUT, command %d pending\n", + adapter->current_op); + return; + } + len = sizeof(struct virtchnl_rss_lut) + + (adapter->rss_lut_size * sizeof(u8)) - 1; + vrl = kzalloc(len, GFP_KERNEL); + if (!vrl) + return; + vrl->vsi_id = adapter->vsi.id; + vrl->lut_entries = adapter->rss_lut_size; + memcpy(vrl->lut, adapter->rss_lut, adapter->rss_lut_size); + adapter->current_op = VIRTCHNL_OP_CONFIG_RSS_LUT; + adapter->aq_required &= ~IAVF_FLAG_AQ_SET_RSS_LUT; + iavf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_RSS_LUT, (u8 *)vrl, len); + kfree(vrl); +} + +/** + * iavf_enable_vlan_stripping + * @adapter: adapter structure + * + * Request VLAN header stripping to be enabled + **/ +void iavf_enable_vlan_stripping(struct iavf_adapter *adapter) +{ + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot enable stripping, command %d pending\n", + adapter->current_op); + return; + } + adapter->current_op = VIRTCHNL_OP_ENABLE_VLAN_STRIPPING; + adapter->aq_required &= ~IAVF_FLAG_AQ_ENABLE_VLAN_STRIPPING; + iavf_send_pf_msg(adapter, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING, NULL, 0); +} + +/** + * iavf_disable_vlan_stripping + * @adapter: adapter structure + * + * Request VLAN header stripping to be disabled + **/ +void iavf_disable_vlan_stripping(struct iavf_adapter *adapter) +{ + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot disable stripping, command %d pending\n", + adapter->current_op); + return; + } + adapter->current_op = VIRTCHNL_OP_DISABLE_VLAN_STRIPPING; + adapter->aq_required &= ~IAVF_FLAG_AQ_DISABLE_VLAN_STRIPPING; + iavf_send_pf_msg(adapter, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING, NULL, 0); +} + +/** + * iavf_print_link_message - print link up or down + * @adapter: adapter structure + * + * Log a message telling the world of our wonderous link status + */ +static void iavf_print_link_message(struct iavf_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + char *speed = "Unknown "; + + if (!adapter->link_up) { + netdev_info(netdev, "NIC Link is Down\n"); + return; + } + + switch (adapter->link_speed) { + case I40E_LINK_SPEED_40GB: + speed = "40 G"; + break; + case I40E_LINK_SPEED_25GB: + speed = "25 G"; + break; + case I40E_LINK_SPEED_20GB: + speed = "20 G"; + break; + case I40E_LINK_SPEED_10GB: + speed = "10 G"; + break; + case I40E_LINK_SPEED_1GB: + speed = "1000 M"; + break; + case I40E_LINK_SPEED_100MB: + speed = "100 M"; + break; + default: + break; + } + + netdev_info(netdev, "NIC Link is Up %sbps Full Duplex\n", speed); +} + +/** + * iavf_enable_channel + * @adapter: adapter structure + * + * Request that the PF enable channels as specified by + * the user via tc tool. + **/ +void iavf_enable_channels(struct iavf_adapter *adapter) +{ + struct virtchnl_tc_info *vti = NULL; + u16 len; + int i; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot configure mqprio, command %d pending\n", + adapter->current_op); + return; + } + + len = (adapter->num_tc * sizeof(struct virtchnl_channel_info)) + + sizeof(struct virtchnl_tc_info); + + vti = kzalloc(len, GFP_KERNEL); + if (!vti) + return; + vti->num_tc = adapter->num_tc; + for (i = 0; i < vti->num_tc; i++) { + vti->list[i].count = adapter->ch_config.ch_info[i].count; + vti->list[i].offset = adapter->ch_config.ch_info[i].offset; + vti->list[i].pad = 0; + vti->list[i].max_tx_rate = + adapter->ch_config.ch_info[i].max_tx_rate; + } + + adapter->ch_config.state = __IAVF_TC_RUNNING; + adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED; + adapter->current_op = VIRTCHNL_OP_ENABLE_CHANNELS; + adapter->aq_required &= ~IAVF_FLAG_AQ_ENABLE_CHANNELS; + iavf_send_pf_msg(adapter, VIRTCHNL_OP_ENABLE_CHANNELS, (u8 *)vti, len); + kfree(vti); +} + +/** + * iavf_disable_channel + * @adapter: adapter structure + * + * Request that the PF disable channels that are configured + **/ +void iavf_disable_channels(struct iavf_adapter *adapter) +{ + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot configure mqprio, command %d pending\n", + adapter->current_op); + return; + } + + adapter->ch_config.state = __IAVF_TC_INVALID; + adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED; + adapter->current_op = VIRTCHNL_OP_DISABLE_CHANNELS; + adapter->aq_required &= ~IAVF_FLAG_AQ_DISABLE_CHANNELS; + iavf_send_pf_msg(adapter, VIRTCHNL_OP_DISABLE_CHANNELS, NULL, 0); +} + +/** + * iavf_print_cloud_filter + * @adapter: adapter structure + * @f: cloud filter to print + * + * Print the cloud filter + **/ +static void iavf_print_cloud_filter(struct iavf_adapter *adapter, + struct virtchnl_filter *f) +{ + switch (f->flow_type) { + case VIRTCHNL_TCP_V4_FLOW: + dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI4 src_ip %pI4 dst_port %hu src_port %hu\n", + &f->data.tcp_spec.dst_mac, + &f->data.tcp_spec.src_mac, + ntohs(f->data.tcp_spec.vlan_id), + &f->data.tcp_spec.dst_ip[0], + &f->data.tcp_spec.src_ip[0], + ntohs(f->data.tcp_spec.dst_port), + ntohs(f->data.tcp_spec.src_port)); + break; + case VIRTCHNL_TCP_V6_FLOW: + dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI6 src_ip %pI6 dst_port %hu src_port %hu\n", + &f->data.tcp_spec.dst_mac, + &f->data.tcp_spec.src_mac, + ntohs(f->data.tcp_spec.vlan_id), + &f->data.tcp_spec.dst_ip, + &f->data.tcp_spec.src_ip, + ntohs(f->data.tcp_spec.dst_port), + ntohs(f->data.tcp_spec.src_port)); + break; + } +} + +/** + * iavf_add_cloud_filter + * @adapter: adapter structure + * + * Request that the PF add cloud filters as specified + * by the user via tc tool. + **/ +void iavf_add_cloud_filter(struct iavf_adapter *adapter) +{ + struct iavf_cloud_filter *cf; + struct virtchnl_filter *f; + int len = 0, count = 0; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot add cloud filter, command %d pending\n", + adapter->current_op); + return; + } + list_for_each_entry(cf, &adapter->cloud_filter_list, list) { + if (cf->add) { + count++; + break; + } + } + if (!count) { + adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_CLOUD_FILTER; + return; + } + adapter->current_op = VIRTCHNL_OP_ADD_CLOUD_FILTER; + + len = sizeof(struct virtchnl_filter); + f = kzalloc(len, GFP_KERNEL); + if (!f) + return; + + list_for_each_entry(cf, &adapter->cloud_filter_list, list) { + if (cf->add) { + memcpy(f, &cf->f, sizeof(struct virtchnl_filter)); + cf->add = false; + cf->state = __IAVF_CF_ADD_PENDING; + iavf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_CLOUD_FILTER, + (u8 *)f, len); + } + } + kfree(f); +} + +/** + * iavf_del_cloud_filter + * @adapter: adapter structure + * + * Request that the PF delete cloud filters as specified + * by the user via tc tool. + **/ +void iavf_del_cloud_filter(struct iavf_adapter *adapter) +{ + struct iavf_cloud_filter *cf, *cftmp; + struct virtchnl_filter *f; + int len = 0, count = 0; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot remove cloud filter, command %d pending\n", + adapter->current_op); + return; + } + list_for_each_entry(cf, &adapter->cloud_filter_list, list) { + if (cf->del) { + count++; + break; + } + } + if (!count) { + adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_CLOUD_FILTER; + return; + } + adapter->current_op = VIRTCHNL_OP_DEL_CLOUD_FILTER; + + len = sizeof(struct virtchnl_filter); + f = kzalloc(len, GFP_KERNEL); + if (!f) + return; + + list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) { + if (cf->del) { + memcpy(f, &cf->f, sizeof(struct virtchnl_filter)); + cf->del = false; + cf->state = __IAVF_CF_DEL_PENDING; + iavf_send_pf_msg(adapter, VIRTCHNL_OP_DEL_CLOUD_FILTER, + (u8 *)f, len); + } + } + kfree(f); +} + +/** + * iavf_request_reset + * @adapter: adapter structure + * + * Request that the PF reset this VF. No response is expected. + **/ +void iavf_request_reset(struct iavf_adapter *adapter) +{ + /* Don't check CURRENT_OP - this is always higher priority */ + iavf_send_pf_msg(adapter, VIRTCHNL_OP_RESET_VF, NULL, 0); + adapter->current_op = VIRTCHNL_OP_UNKNOWN; +} + +/** + * iavf_virtchnl_completion + * @adapter: adapter structure + * @v_opcode: opcode sent by PF + * @v_retval: retval sent by PF + * @msg: message sent by PF + * @msglen: message length + * + * Asynchronous completion function for admin queue messages. Rather than busy + * wait, we fire off our requests and assume that no errors will be returned. + * This function handles the reply messages. + **/ +void iavf_virtchnl_completion(struct iavf_adapter *adapter, + enum virtchnl_ops v_opcode, iavf_status v_retval, + u8 *msg, u16 msglen) +{ + struct net_device *netdev = adapter->netdev; + + if (v_opcode == VIRTCHNL_OP_EVENT) { + struct virtchnl_pf_event *vpe = + (struct virtchnl_pf_event *)msg; + bool link_up = vpe->event_data.link_event.link_status; + + switch (vpe->event) { + case VIRTCHNL_EVENT_LINK_CHANGE: + adapter->link_speed = + vpe->event_data.link_event.link_speed; + + /* we've already got the right link status, bail */ + if (adapter->link_up == link_up) + break; + + if (link_up) { + /* If we get link up message and start queues + * before our queues are configured it will + * trigger a TX hang. In that case, just ignore + * the link status message,we'll get another one + * after we enable queues and actually prepared + * to send traffic. + */ + if (adapter->state != __IAVF_RUNNING) + break; + + /* For ADq enabled VF, we reconfigure VSIs and + * re-allocate queues. Hence wait till all + * queues are enabled. + */ + if (adapter->flags & + IAVF_FLAG_QUEUES_DISABLED) + break; + } + + adapter->link_up = link_up; + if (link_up) { + netif_tx_start_all_queues(netdev); + netif_carrier_on(netdev); + } else { + netif_tx_stop_all_queues(netdev); + netif_carrier_off(netdev); + } + iavf_print_link_message(adapter); + break; + case VIRTCHNL_EVENT_RESET_IMPENDING: + dev_info(&adapter->pdev->dev, "Reset warning received from the PF\n"); + if (!(adapter->flags & IAVF_FLAG_RESET_PENDING)) { + adapter->flags |= IAVF_FLAG_RESET_PENDING; + dev_info(&adapter->pdev->dev, "Scheduling reset task\n"); + schedule_work(&adapter->reset_task); + } + break; + default: + dev_err(&adapter->pdev->dev, "Unknown event %d from PF\n", + vpe->event); + break; + } + return; + } + if (v_retval) { + switch (v_opcode) { + case VIRTCHNL_OP_ADD_VLAN: + dev_err(&adapter->pdev->dev, "Failed to add VLAN filter, error %s\n", + iavf_stat_str(&adapter->hw, v_retval)); + break; + case VIRTCHNL_OP_ADD_ETH_ADDR: + dev_err(&adapter->pdev->dev, "Failed to add MAC filter, error %s\n", + iavf_stat_str(&adapter->hw, v_retval)); + break; + case VIRTCHNL_OP_DEL_VLAN: + dev_err(&adapter->pdev->dev, "Failed to delete VLAN filter, error %s\n", + iavf_stat_str(&adapter->hw, v_retval)); + break; + case VIRTCHNL_OP_DEL_ETH_ADDR: + dev_err(&adapter->pdev->dev, "Failed to delete MAC filter, error %s\n", + iavf_stat_str(&adapter->hw, v_retval)); + break; + case VIRTCHNL_OP_ENABLE_CHANNELS: + dev_err(&adapter->pdev->dev, "Failed to configure queue channels, error %s\n", + iavf_stat_str(&adapter->hw, v_retval)); + adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; + adapter->ch_config.state = __IAVF_TC_INVALID; + netdev_reset_tc(netdev); + netif_tx_start_all_queues(netdev); + break; + case VIRTCHNL_OP_DISABLE_CHANNELS: + dev_err(&adapter->pdev->dev, "Failed to disable queue channels, error %s\n", + iavf_stat_str(&adapter->hw, v_retval)); + adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; + adapter->ch_config.state = __IAVF_TC_RUNNING; + netif_tx_start_all_queues(netdev); + break; + case VIRTCHNL_OP_ADD_CLOUD_FILTER: { + struct iavf_cloud_filter *cf, *cftmp; + + list_for_each_entry_safe(cf, cftmp, + &adapter->cloud_filter_list, + list) { + if (cf->state == __IAVF_CF_ADD_PENDING) { + cf->state = __IAVF_CF_INVALID; + dev_info(&adapter->pdev->dev, "Failed to add cloud filter, error %s\n", + iavf_stat_str(&adapter->hw, + v_retval)); + iavf_print_cloud_filter(adapter, + &cf->f); + list_del(&cf->list); + kfree(cf); + adapter->num_cloud_filters--; + } + } + } + break; + case VIRTCHNL_OP_DEL_CLOUD_FILTER: { + struct iavf_cloud_filter *cf; + + list_for_each_entry(cf, &adapter->cloud_filter_list, + list) { + if (cf->state == __IAVF_CF_DEL_PENDING) { + cf->state = __IAVF_CF_ACTIVE; + dev_info(&adapter->pdev->dev, "Failed to del cloud filter, error %s\n", + iavf_stat_str(&adapter->hw, + v_retval)); + iavf_print_cloud_filter(adapter, + &cf->f); + } + } + } + break; + default: + dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n", + v_retval, iavf_stat_str(&adapter->hw, v_retval), + v_opcode); + } + } + switch (v_opcode) { + case VIRTCHNL_OP_GET_STATS: { + struct i40e_eth_stats *stats = + (struct i40e_eth_stats *)msg; + netdev->stats.rx_packets = stats->rx_unicast + + stats->rx_multicast + + stats->rx_broadcast; + netdev->stats.tx_packets = stats->tx_unicast + + stats->tx_multicast + + stats->tx_broadcast; + netdev->stats.rx_bytes = stats->rx_bytes; + netdev->stats.tx_bytes = stats->tx_bytes; + netdev->stats.tx_errors = stats->tx_errors; + netdev->stats.rx_dropped = stats->rx_discards; + netdev->stats.tx_dropped = stats->tx_discards; + adapter->current_stats = *stats; + } + break; + case VIRTCHNL_OP_GET_VF_RESOURCES: { + u16 len = sizeof(struct virtchnl_vf_resource) + + I40E_MAX_VF_VSI * + sizeof(struct virtchnl_vsi_resource); + memcpy(adapter->vf_res, msg, min(msglen, len)); + iavf_validate_num_queues(adapter); + iavf_vf_parse_hw_config(&adapter->hw, adapter->vf_res); + if (is_zero_ether_addr(adapter->hw.mac.addr)) { + /* restore current mac address */ + ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr); + } else { + /* refresh current mac address if changed */ + ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr); + ether_addr_copy(netdev->perm_addr, + adapter->hw.mac.addr); + } + iavf_process_config(adapter); + } + break; + case VIRTCHNL_OP_ENABLE_QUEUES: + /* enable transmits */ + iavf_irq_enable(adapter, true); + adapter->flags &= ~IAVF_FLAG_QUEUES_DISABLED; + break; + case VIRTCHNL_OP_DISABLE_QUEUES: + iavf_free_all_tx_resources(adapter); + iavf_free_all_rx_resources(adapter); + if (adapter->state == __IAVF_DOWN_PENDING) { + adapter->state = __IAVF_DOWN; + wake_up(&adapter->down_waitqueue); + } + break; + case VIRTCHNL_OP_VERSION: + case VIRTCHNL_OP_CONFIG_IRQ_MAP: + /* Don't display an error if we get these out of sequence. + * If the firmware needed to get kicked, we'll get these and + * it's no problem. + */ + if (v_opcode != adapter->current_op) + return; + break; + case VIRTCHNL_OP_IWARP: + /* Gobble zero-length replies from the PF. They indicate that + * a previous message was received OK, and the client doesn't + * care about that. + */ + if (msglen && CLIENT_ENABLED(adapter)) + iavf_notify_client_message(&adapter->vsi, msg, msglen); + break; + + case VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP: + adapter->client_pending &= + ~(BIT(VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP)); + break; + case VIRTCHNL_OP_GET_RSS_HENA_CAPS: { + struct virtchnl_rss_hena *vrh = (struct virtchnl_rss_hena *)msg; + + if (msglen == sizeof(*vrh)) + adapter->hena = vrh->hena; + else + dev_warn(&adapter->pdev->dev, + "Invalid message %d from PF\n", v_opcode); + } + break; + case VIRTCHNL_OP_REQUEST_QUEUES: { + struct virtchnl_vf_res_request *vfres = + (struct virtchnl_vf_res_request *)msg; + + if (vfres->num_queue_pairs != adapter->num_req_queues) { + dev_info(&adapter->pdev->dev, + "Requested %d queues, PF can support %d\n", + adapter->num_req_queues, + vfres->num_queue_pairs); + adapter->num_req_queues = 0; + adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; + } + } + break; + case VIRTCHNL_OP_ADD_CLOUD_FILTER: { + struct iavf_cloud_filter *cf; + + list_for_each_entry(cf, &adapter->cloud_filter_list, list) { + if (cf->state == __IAVF_CF_ADD_PENDING) + cf->state = __IAVF_CF_ACTIVE; + } + } + break; + case VIRTCHNL_OP_DEL_CLOUD_FILTER: { + struct iavf_cloud_filter *cf, *cftmp; + + list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, + list) { + if (cf->state == __IAVF_CF_DEL_PENDING) { + cf->state = __IAVF_CF_INVALID; + list_del(&cf->list); + kfree(cf); + adapter->num_cloud_filters--; + } + } + } + break; + default: + if (adapter->current_op && (v_opcode != adapter->current_op)) + dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n", + adapter->current_op, v_opcode); + break; + } /* switch v_opcode */ + adapter->current_op = VIRTCHNL_OP_UNKNOWN; +}