From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 26 Apr 2022 23:39:50 +0000 (-0600)
Subject: io_uring: return hint on whether more data is available after receive
X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=f548a12efd5ab97e6b1fb332e5634ce44b3d9328;p=linux.git

io_uring: return hint on whether more data is available after receive

For now just use a CQE flag for this, with big CQE support we could
return the actual number of bytes left.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5a0388bac42c5..20c5d29e5b6c9 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5948,6 +5948,7 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
 	struct io_sr_msg *sr = &req->sr_msg;
 	struct socket *sock;
 	struct io_buffer *kbuf;
+	unsigned int cflags;
 	unsigned flags;
 	int ret, min_ret = 0;
 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
@@ -5981,6 +5982,8 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
 	if (flags & MSG_WAITALL)
 		min_ret = iov_iter_count(&kmsg->msg.msg_iter);
 
+	kmsg->msg.msg_get_inq = 1;
+
 	ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.umsg,
 					kmsg->uaddr, flags);
 	if (ret < min_ret) {
@@ -6006,7 +6009,10 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
 		ret += sr->done_io;
 	else if (sr->done_io)
 		ret = sr->done_io;
-	__io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags));
+	cflags = io_put_kbuf(req, issue_flags);
+	if (kmsg->msg.msg_inq)
+		cflags |= IORING_CQE_F_SOCK_NONEMPTY;
+	__io_req_complete(req, issue_flags, ret, cflags);
 	return 0;
 }
 
@@ -6018,6 +6024,7 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
 	void __user *buf = sr->buf;
 	struct socket *sock;
 	struct iovec iov;
+	unsigned int cflags;
 	unsigned flags;
 	int ret, min_ret = 0;
 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
@@ -6038,11 +6045,12 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
 		goto out_free;
 
 	msg.msg_name = NULL;
+	msg.msg_namelen = 0;
 	msg.msg_control = NULL;
+	msg.msg_get_inq = 1;
+	msg.msg_flags = 0;
 	msg.msg_controllen = 0;
-	msg.msg_namelen = 0;
 	msg.msg_iocb = NULL;
-	msg.msg_flags = 0;
 
 	flags = req->sr_msg.msg_flags;
 	if (force_nonblock)
@@ -6073,7 +6081,10 @@ out_free:
 		ret += sr->done_io;
 	else if (sr->done_io)
 		ret = sr->done_io;
-	__io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags));
+	cflags = io_put_kbuf(req, issue_flags);
+	if (msg.msg_inq)
+		cflags |= IORING_CQE_F_SOCK_NONEMPTY;
+	__io_req_complete(req, issue_flags, ret, cflags);
 	return 0;
 }
 
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 49d1f3994f8d6..92d1799892b27 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -220,9 +220,11 @@ struct io_uring_cqe {
  *
  * IORING_CQE_F_BUFFER	If set, the upper 16 bits are the buffer ID
  * IORING_CQE_F_MORE	If set, parent SQE will generate more CQE entries
+ * IORING_CQE_F_SOCK_NONEMPTY	If set, more data to read after socket recv
  */
 #define IORING_CQE_F_BUFFER		(1U << 0)
 #define IORING_CQE_F_MORE		(1U << 1)
+#define IORING_CQE_F_SOCK_NONEMPTY	(1U << 2)
 
 enum {
 	IORING_CQE_BUFFER_SHIFT		= 16,