ipv4, ipv6: Use splice_eof() to flush
authorDavid Howells <dhowells@redhat.com>
Wed, 7 Jun 2023 18:19:13 +0000 (19:19 +0100)
committerJakub Kicinski <kuba@kernel.org>
Fri, 9 Jun 2023 02:40:30 +0000 (19:40 -0700)
Allow splice to undo the effects of MSG_MORE after prematurely ending a
splice/sendfile due to getting an EOF condition (->splice_read() returned
0) after splice had called sendmsg() with MSG_MORE set when the user didn't
set MSG_MORE.

For UDP, a pending packet will not be emitted if the socket is closed
before it is flushed; with this change, it be flushed by ->splice_eof().

For TCP, it's not clear that MSG_MORE is actually effective.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/CAHk-=wh=V579PDYvkpnTobCLGczbgxpMgGmmhqiTyE34Cpi5Gg@mail.gmail.com/
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Kuniyuki Iwashima <kuniyu@amazon.com>
cc: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
cc: David Ahern <dsahern@kernel.org>
cc: Jens Axboe <axboe@kernel.dk>
cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
include/net/inet_common.h
include/net/tcp.h
include/net/udp.h
net/ipv4/af_inet.c
net/ipv4/tcp.c
net/ipv4/tcp_ipv4.c
net/ipv4/udp.c
net/ipv6/af_inet6.c
net/ipv6/tcp_ipv6.c
net/ipv6/udp.c

index 77f4b0ef5b9277471f556a9e2972423fb889b752..a75333342c4ec5da036f4bcbb04bbe567b0e53db 100644 (file)
@@ -35,6 +35,7 @@ void __inet_accept(struct socket *sock, struct socket *newsock,
                   struct sock *newsk);
 int inet_send_prepare(struct sock *sk);
 int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size);
+void inet_splice_eof(struct socket *sock);
 ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
                      size_t size, int flags);
 int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
index 68990a8f556af1461da1c7af617580dd424c3a80..49611af31bb7693cbc18cba61fe8ae2fd1d8695f 100644 (file)
@@ -327,6 +327,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size);
 int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied,
                         size_t size, struct ubuf_info *uarg);
+void tcp_splice_eof(struct socket *sock);
 int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
                 int flags);
 int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
index 5cad44318d71c11e179cecc748b0de6a66cf6fb6..4ed0b47c5582505ae021cd698971c77c08be2747 100644 (file)
@@ -278,6 +278,7 @@ int udp_get_port(struct sock *sk, unsigned short snum,
 int udp_err(struct sk_buff *, u32);
 int udp_abort(struct sock *sk, int err);
 int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
+void udp_splice_eof(struct socket *sock);
 int udp_push_pending_frames(struct sock *sk);
 void udp_flush_pending_frames(struct sock *sk);
 int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size);
index b5735b3551cfcd7cc7adbd3b3379931ce688d7ee..fd233c4195acc2f3148253576bcede80299495f4 100644 (file)
@@ -831,6 +831,21 @@ int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 }
 EXPORT_SYMBOL(inet_sendmsg);
 
+void inet_splice_eof(struct socket *sock)
+{
+       const struct proto *prot;
+       struct sock *sk = sock->sk;
+
+       if (unlikely(inet_send_prepare(sk)))
+               return;
+
+       /* IPV6_ADDRFORM can change sk->sk_prot under us. */
+       prot = READ_ONCE(sk->sk_prot);
+       if (prot->splice_eof)
+               prot->splice_eof(sock);
+}
+EXPORT_SYMBOL_GPL(inet_splice_eof);
+
 ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
                      size_t size, int flags)
 {
@@ -1050,6 +1065,7 @@ const struct proto_ops inet_stream_ops = {
 #ifdef CONFIG_MMU
        .mmap              = tcp_mmap,
 #endif
+       .splice_eof        = inet_splice_eof,
        .sendpage          = inet_sendpage,
        .splice_read       = tcp_splice_read,
        .read_sock         = tcp_read_sock,
@@ -1084,6 +1100,7 @@ const struct proto_ops inet_dgram_ops = {
        .read_skb          = udp_read_skb,
        .recvmsg           = inet_recvmsg,
        .mmap              = sock_no_mmap,
+       .splice_eof        = inet_splice_eof,
        .sendpage          = inet_sendpage,
        .set_peek_off      = sk_set_peek_off,
 #ifdef CONFIG_COMPAT
@@ -1115,6 +1132,7 @@ static const struct proto_ops inet_sockraw_ops = {
        .sendmsg           = inet_sendmsg,
        .recvmsg           = inet_recvmsg,
        .mmap              = sock_no_mmap,
+       .splice_eof        = inet_splice_eof,
        .sendpage          = inet_sendpage,
 #ifdef CONFIG_COMPAT
        .compat_ioctl      = inet_compat_ioctl,
index 53b7751b68e1652d81e4af89658e2e9f7284d828..09f03221a6f1597114162360a4c1aeed0758812d 100644 (file)
@@ -1371,6 +1371,22 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 }
 EXPORT_SYMBOL(tcp_sendmsg);
 
+void tcp_splice_eof(struct socket *sock)
+{
+       struct sock *sk = sock->sk;
+       struct tcp_sock *tp = tcp_sk(sk);
+       int mss_now, size_goal;
+
+       if (!tcp_write_queue_tail(sk))
+               return;
+
+       lock_sock(sk);
+       mss_now = tcp_send_mss(sk, &size_goal, 0);
+       tcp_push(sk, 0, mss_now, tp->nonagle, size_goal);
+       release_sock(sk);
+}
+EXPORT_SYMBOL_GPL(tcp_splice_eof);
+
 /*
  *     Handle reading urgent data. BSD has very simple semantics for
  *     this, no blocking and very strange errors 8)
index 53e9ce2f05bb6e63b8c86bf830f860959e0e29ca..84a5d557dc1a1de16644ac6a14bc1ab70e1292d7 100644 (file)
@@ -3116,6 +3116,7 @@ struct proto tcp_prot = {
        .keepalive              = tcp_set_keepalive,
        .recvmsg                = tcp_recvmsg,
        .sendmsg                = tcp_sendmsg,
+       .splice_eof             = tcp_splice_eof,
        .sendpage               = tcp_sendpage,
        .backlog_rcv            = tcp_v4_do_rcv,
        .release_cb             = tcp_release_cb,
index fd3dae081f3a6ca22d9ba574d83f9a1ba1563698..df5e407286d7c43c926c9947fb0cd27ec69f54fc 100644 (file)
@@ -1324,6 +1324,21 @@ do_confirm:
 }
 EXPORT_SYMBOL(udp_sendmsg);
 
+void udp_splice_eof(struct socket *sock)
+{
+       struct sock *sk = sock->sk;
+       struct udp_sock *up = udp_sk(sk);
+
+       if (!up->pending || READ_ONCE(up->corkflag))
+               return;
+
+       lock_sock(sk);
+       if (up->pending && !READ_ONCE(up->corkflag))
+               udp_push_pending_frames(sk);
+       release_sock(sk);
+}
+EXPORT_SYMBOL_GPL(udp_splice_eof);
+
 int udp_sendpage(struct sock *sk, struct page *page, int offset,
                 size_t size, int flags)
 {
@@ -2918,6 +2933,7 @@ struct proto udp_prot = {
        .getsockopt             = udp_getsockopt,
        .sendmsg                = udp_sendmsg,
        .recvmsg                = udp_recvmsg,
+       .splice_eof             = udp_splice_eof,
        .sendpage               = udp_sendpage,
        .release_cb             = ip4_datagram_release_cb,
        .hash                   = udp_lib_hash,
index 2bbf13216a3dd4f1a6d03220d9bb210a39e14a10..564942bee0679bbad15a838aebd1625f438d6cef 100644 (file)
@@ -695,6 +695,7 @@ const struct proto_ops inet6_stream_ops = {
 #ifdef CONFIG_MMU
        .mmap              = tcp_mmap,
 #endif
+       .splice_eof        = inet_splice_eof,
        .sendpage          = inet_sendpage,
        .sendmsg_locked    = tcp_sendmsg_locked,
        .sendpage_locked   = tcp_sendpage_locked,
index d657713d1c71df880fccfbee891039444a53e750..c17c8ff94b7971ad745ad8dd727cd2bba59fb2a1 100644 (file)
@@ -2150,6 +2150,7 @@ struct proto tcpv6_prot = {
        .keepalive              = tcp_set_keepalive,
        .recvmsg                = tcp_recvmsg,
        .sendmsg                = tcp_sendmsg,
+       .splice_eof             = tcp_splice_eof,
        .sendpage               = tcp_sendpage,
        .backlog_rcv            = tcp_v6_do_rcv,
        .release_cb             = tcp_release_cb,
index e5a337e6b97050d4be2e5b9d15678760679b8516..317b01c9bc39f9283c2e81ceaaecd26bc3532b80 100644 (file)
@@ -1653,6 +1653,20 @@ do_confirm:
 }
 EXPORT_SYMBOL(udpv6_sendmsg);
 
+static void udpv6_splice_eof(struct socket *sock)
+{
+       struct sock *sk = sock->sk;
+       struct udp_sock *up = udp_sk(sk);
+
+       if (!up->pending || READ_ONCE(up->corkflag))
+               return;
+
+       lock_sock(sk);
+       if (up->pending && !READ_ONCE(up->corkflag))
+               udp_v6_push_pending_frames(sk);
+       release_sock(sk);
+}
+
 void udpv6_destroy_sock(struct sock *sk)
 {
        struct udp_sock *up = udp_sk(sk);
@@ -1764,6 +1778,7 @@ struct proto udpv6_prot = {
        .getsockopt             = udpv6_getsockopt,
        .sendmsg                = udpv6_sendmsg,
        .recvmsg                = udpv6_recvmsg,
+       .splice_eof             = udpv6_splice_eof,
        .release_cb             = ip6_datagram_release_cb,
        .hash                   = udp_lib_hash,
        .unhash                 = udp_lib_unhash,