bpf, cpumap: Bulk skb using netif_receive_skb_list
authorLorenzo Bianconi <lorenzo@kernel.org>
Fri, 23 Apr 2021 09:27:27 +0000 (11:27 +0200)
committerDaniel Borkmann <daniel@iogearbox.net>
Tue, 27 Apr 2021 15:13:49 +0000 (17:13 +0200)
Rely on netif_receive_skb_list routine to send skbs converted from
xdp_frames in cpu_map_kthread_run in order to improve i-cache usage.
The proposed patch has been tested running xdp_redirect_cpu bpf sample
available in the kernel tree that is used to redirect UDP frames from
ixgbe driver to a cpumap entry and then to the networking stack. UDP
frames are generated using pktgen. Packets are discarded by the UDP
layer.

$ xdp_redirect_cpu  --cpu <cpu> --progname xdp_cpu_map0 --dev <eth>

bpf-next: ~2.35Mpps
bpf-next + cpumap skb-list: ~2.72Mpps

Rename drops counter in kmem_alloc_drops since now it reports just
kmem_cache_alloc_bulk failures

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Link: https://lore.kernel.org/bpf/c729f83e5d7482d9329e0f165bdbe5adcefd1510.1619169700.git.lorenzo@kernel.org
kernel/bpf/cpumap.c

index 0cf2791d5099b90553a785ecfa18bf9af3f4081c..5dd3e866599a7a74b6d902d87299745c011cfcb1 100644 (file)
@@ -27,7 +27,7 @@
 #include <linux/capability.h>
 #include <trace/events/xdp.h>
 
-#include <linux/netdevice.h>   /* netif_receive_skb_core */
+#include <linux/netdevice.h>   /* netif_receive_skb_list */
 #include <linux/etherdevice.h> /* eth_type_trans */
 
 /* General idea: XDP packets getting XDP redirected to another CPU,
@@ -252,11 +252,12 @@ static int cpu_map_kthread_run(void *data)
         */
        while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) {
                struct xdp_cpumap_stats stats = {}; /* zero stats */
+               unsigned int kmem_alloc_drops = 0, sched = 0;
                gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
-               unsigned int drops = 0, sched = 0;
                void *frames[CPUMAP_BATCH];
                void *skbs[CPUMAP_BATCH];
                int i, n, m, nframes;
+               LIST_HEAD(list);
 
                /* Release CPU reschedule checks */
                if (__ptr_ring_empty(rcpu->queue)) {
@@ -297,7 +298,7 @@ static int cpu_map_kthread_run(void *data)
                        if (unlikely(m == 0)) {
                                for (i = 0; i < nframes; i++)
                                        skbs[i] = NULL; /* effect: xdp_return_frame */
-                               drops += nframes;
+                               kmem_alloc_drops += nframes;
                        }
                }
 
@@ -305,7 +306,6 @@ static int cpu_map_kthread_run(void *data)
                for (i = 0; i < nframes; i++) {
                        struct xdp_frame *xdpf = frames[i];
                        struct sk_buff *skb = skbs[i];
-                       int ret;
 
                        skb = __xdp_build_skb_from_frame(xdpf, skb,
                                                         xdpf->dev_rx);
@@ -314,13 +314,13 @@ static int cpu_map_kthread_run(void *data)
                                continue;
                        }
 
-                       /* Inject into network stack */
-                       ret = netif_receive_skb_core(skb);
-                       if (ret == NET_RX_DROP)
-                               drops++;
+                       list_add_tail(&skb->list, &list);
                }
+               netif_receive_skb_list(&list);
+
                /* Feedback loop via tracepoint */
-               trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched, &stats);
+               trace_xdp_cpumap_kthread(rcpu->map_id, n, kmem_alloc_drops,
+                                        sched, &stats);
 
                local_bh_enable(); /* resched point, may call do_softirq() */
        }