tcp: read multiple skbs in tcp_read_skb()
authorCong Wang <cong.wang@bytedance.com>
Mon, 12 Sep 2022 17:35:53 +0000 (10:35 -0700)
committerPaolo Abeni <pabeni@redhat.com>
Tue, 20 Sep 2022 12:47:21 +0000 (14:47 +0200)
Before we switched to ->read_skb(), ->read_sock() was passed with
desc.count=1, which technically indicates we only read one skb per
->sk_data_ready() call. However, for TCP, this is not true.

TCP at least has sk_rcvlowat which intentionally holds skb's in
receive queue until this watermark is reached. This means when
->sk_data_ready() is invoked there could be multiple skb's in the
queue, therefore we have to read multiple skbs in tcp_read_skb()
instead of one.

Fixes: 965b57b469a5 ("net: Introduce a new proto_ops ->read_skb()")
Reported-by: Peilin Ye <peilin.ye@bytedance.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Jakub Sitnicki <jakub@cloudflare.com>
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Link: https://lore.kernel.org/r/20220912173553.235838-1-xiyou.wangcong@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
net/ipv4/tcp.c

index 3488388eea5dd0d4c5fa03c1b4ec2bcaf30a1120..e373dde1f46f7b286c01aef022117cb10ba64e45 100644 (file)
@@ -1761,19 +1761,28 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
        if (sk->sk_state == TCP_LISTEN)
                return -ENOTCONN;
 
-       skb = tcp_recv_skb(sk, seq, &offset);
-       if (!skb)
-               return 0;
+       while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
+               u8 tcp_flags;
+               int used;
 
-       __skb_unlink(skb, &sk->sk_receive_queue);
-       WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk));
-       copied = recv_actor(sk, skb);
-       if (copied >= 0) {
-               seq += copied;
-               if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
+               __skb_unlink(skb, &sk->sk_receive_queue);
+               WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk));
+               tcp_flags = TCP_SKB_CB(skb)->tcp_flags;
+               used = recv_actor(sk, skb);
+               consume_skb(skb);
+               if (used < 0) {
+                       if (!copied)
+                               copied = used;
+                       break;
+               }
+               seq += used;
+               copied += used;
+
+               if (tcp_flags & TCPHDR_FIN) {
                        ++seq;
+                       break;
+               }
        }
-       consume_skb(skb);
        WRITE_ONCE(tp->copied_seq, seq);
 
        tcp_rcv_space_adjust(sk);