__napi_alloc_skb() is napi_alloc_skb() with the added flexibility
of choosing gfp_mask. This is a NAPI function, so GFP_ATOMIC is
implied. The only practical choice the caller has is whether to
set __GFP_NOWARN. But that's a false choice, too, allocation failures
in atomic context will happen, and printing warnings in logs,
effectively for a packet drop, is both too much and very likely
non-actionable.
This leads me to a conclusion that most uses of napi_alloc_skb()
are simply misguided, and should use __GFP_NOWARN in the first
place. We also have a "standard" way of reporting allocation
failures via the queue stat API (qstats::rx-alloc-fail).
The direct motivation for this patch is that one of the drivers
used at Meta calls napi_alloc_skb() (so prior to this patch without
__GFP_NOWARN), and the resulting OOM warning is the top networking
warning in our fleet.
Reviewed-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20240327040213.3153864-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
The network stack uses two separate caches per CPU to handle fragment
allocation. The netdev_alloc_cache is used by callers making use of the
netdev_alloc_frag and __netdev_alloc_skb calls. The napi_alloc_cache is
-used by callers of the __napi_alloc_frag and __napi_alloc_skb calls. The
+used by callers of the __napi_alloc_frag and napi_alloc_skb calls. The
main difference between these two calls is the context in which they may be
called. The "netdev" prefixed functions are usable in any context as these
functions will disable interrupts, while the "napi" prefixed functions are
网络堆栈在每个CPU使用两个独立的缓存来处理碎片分配。netdev_alloc_cache被使用
netdev_alloc_frag和__netdev_alloc_skb调用的调用者使用。napi_alloc_cache
-被调用__napi_alloc_frag和__napi_alloc_skb的调用者使用。这两个调用的主要区别是
+被调用__napi_alloc_frag和napi_alloc_skb的调用者使用。这两个调用的主要区别是
它们可能被调用的环境。“netdev” 前缀的函数可以在任何上下文中使用,因为这些函数
将禁用中断,而 ”napi“ 前缀的函数只可以在softirq上下文中使用。
*/
/* allocate a skb to store the frags */
- skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
- I40E_RX_HDR_SIZE,
- GFP_ATOMIC | __GFP_NOWARN);
+ skb = napi_alloc_skb(&rx_ring->q_vector->napi, I40E_RX_HDR_SIZE);
if (unlikely(!skb))
return NULL;
net_prefetch(xdp->data_meta);
/* allocate a skb to store the frags */
- skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize,
- GFP_ATOMIC | __GFP_NOWARN);
+ skb = napi_alloc_skb(&rx_ring->q_vector->napi, totalsize);
if (unlikely(!skb))
goto out;
net_prefetch(va);
/* allocate a skb to store the frags */
- skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
- IAVF_RX_HDR_SIZE,
- GFP_ATOMIC | __GFP_NOWARN);
+ skb = napi_alloc_skb(&rx_ring->q_vector->napi, IAVF_RX_HDR_SIZE);
if (unlikely(!skb))
return NULL;
}
/* allocate a skb to store the frags */
- skb = __napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE,
- GFP_ATOMIC | __GFP_NOWARN);
+ skb = napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE);
if (unlikely(!skb))
return NULL;
}
net_prefetch(xdp->data_meta);
- skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize,
- GFP_ATOMIC | __GFP_NOWARN);
+ skb = napi_alloc_skb(&rx_ring->q_vector->napi, totalsize);
if (unlikely(!skb))
return NULL;
/* prefetch first cache line of first page */
net_prefetch(va);
/* allocate a skb to store the frags */
- skb = __napi_alloc_skb(&rxq->q_vector->napi, IDPF_RX_HDR_SIZE,
- GFP_ATOMIC);
+ skb = napi_alloc_skb(&rxq->q_vector->napi, IDPF_RX_HDR_SIZE);
if (unlikely(!skb)) {
idpf_rx_put_page(rx_buf);
struct sk_buff *skb;
/* allocate a skb to store the frags */
- skb = __napi_alloc_skb(&rxq->q_vector->napi, size, GFP_ATOMIC);
+ skb = napi_alloc_skb(&rxq->q_vector->napi, size);
if (unlikely(!skb))
return NULL;
net_prefetch(xdp->data_meta);
- skb = __napi_alloc_skb(&ring->q_vector->napi, totalsize,
- GFP_ATOMIC | __GFP_NOWARN);
+ skb = napi_alloc_skb(&ring->q_vector->napi, totalsize);
if (unlikely(!skb))
return NULL;
net_prefetch(xdp->data_meta);
/* allocate a skb to store the frags */
- skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize,
- GFP_ATOMIC | __GFP_NOWARN);
+ skb = napi_alloc_skb(&rx_ring->q_vector->napi, totalsize);
if (unlikely(!skb))
return NULL;
unsigned int datasize = xdp->data_end - xdp->data;
struct sk_buff *skb;
- skb = __napi_alloc_skb(&ch->rxtx_napi,
- xdp->data_end - xdp->data_hard_start,
- GFP_ATOMIC | __GFP_NOWARN);
+ skb = napi_alloc_skb(&ch->rxtx_napi,
+ xdp->data_end - xdp->data_hard_start);
if (unlikely(!skb))
return NULL;
return __napi_alloc_frag_align(fragsz, -align);
}
-struct sk_buff *__napi_alloc_skb(struct napi_struct *napi,
- unsigned int length, gfp_t gfp_mask);
-static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi,
- unsigned int length)
-{
- return __napi_alloc_skb(napi, length, GFP_ATOMIC);
-}
+struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int length);
void napi_consume_skb(struct sk_buff *skb, int budget);
void napi_skb_free_stolen_head(struct sk_buff *skb);
EXPORT_SYMBOL(__netdev_alloc_skb);
/**
- * __napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance
+ * napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance
* @napi: napi instance this buffer was allocated for
* @len: length to allocate
- * @gfp_mask: get_free_pages mask, passed to alloc_skb and alloc_pages
*
* Allocate a new sk_buff for use in NAPI receive. This buffer will
* attempt to allocate the head from a special reserved region used
*
* %NULL is returned if there is no free memory.
*/
-struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
- gfp_t gfp_mask)
+struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
{
+ gfp_t gfp_mask = GFP_ATOMIC | __GFP_NOWARN;
struct napi_alloc_cache *nc;
struct sk_buff *skb;
bool pfmemalloc;
skb_fail:
return skb;
}
-EXPORT_SYMBOL(__napi_alloc_skb);
+EXPORT_SYMBOL(napi_alloc_skb);
void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem,
int off, int size, unsigned int truesize)