From: Linus Torvalds Date: Mon, 23 May 2022 19:42:33 +0000 (-0700) Subject: Merge tag 'for-5.19/io_uring-socket-2022-05-22' of git://git.kernel.dk/linux-block X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=368da430d04dbe31aded44e5f5c255ff0899ad1d;p=linux.git Merge tag 'for-5.19/io_uring-socket-2022-05-22' of git://git.kernel.dk/linux-block Pull io_uring socket() support from Jens Axboe: "This adds support for socket(2) for io_uring. This is handy when using direct / registered file descriptors with io_uring. Outside of those two patches, a small series from Dylan on top that improves the tracing by providing a text representation of the opcode rather than needing to decode this by reading the header file every time. That sits in this branch as it was the last opcode added (until it wasn't...)" * tag 'for-5.19/io_uring-socket-2022-05-22' of git://git.kernel.dk/linux-block: io_uring: use the text representation of ops in trace io_uring: rename op -> opcode io_uring: add io_uring_get_opcode io_uring: add type to op enum io_uring: add socket(2) support net: add __sys_socket_file() --- 368da430d04dbe31aded44e5f5c255ff0899ad1d diff --cc fs/io_uring.c index e4c9f776919b5,dfebbf3a272a3..d9529275a0306 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@@ -6211,32 -6125,69 +6326,88 @@@ retry fd_install(fd, file); ret = fd; } else { - ret = io_install_fixed_file(req, file, issue_flags, - accept->file_slot - 1); + ret = io_fixed_fd_install(req, issue_flags, file, + accept->file_slot); } - __io_req_complete(req, issue_flags, ret, 0); - return 0; + + if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { + __io_req_complete(req, issue_flags, ret, 0); + return 0; + } + if (ret >= 0) { + bool filled; + + spin_lock(&ctx->completion_lock); + filled = io_fill_cqe_aux(ctx, req->cqe.user_data, ret, + IORING_CQE_F_MORE); + io_commit_cqring(ctx); + spin_unlock(&ctx->completion_lock); + if (filled) { + io_cqring_ev_posted(ctx); + goto retry; + } + ret = -ECANCELED; + } + + return ret; } + static int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) + { + struct io_socket *sock = &req->sock; + + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; + if (sqe->ioprio || sqe->addr || sqe->rw_flags || sqe->buf_index) + return -EINVAL; + + sock->domain = READ_ONCE(sqe->fd); + sock->type = READ_ONCE(sqe->off); + sock->protocol = READ_ONCE(sqe->len); + sock->file_slot = READ_ONCE(sqe->file_index); + sock->nofile = rlimit(RLIMIT_NOFILE); + + sock->flags = sock->type & ~SOCK_TYPE_MASK; + if (sock->file_slot && (sock->flags & SOCK_CLOEXEC)) + return -EINVAL; + if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + return -EINVAL; + return 0; + } + + static int io_socket(struct io_kiocb *req, unsigned int issue_flags) + { + struct io_socket *sock = &req->sock; + bool fixed = !!sock->file_slot; + struct file *file; + int ret, fd; + + if (!fixed) { + fd = __get_unused_fd_flags(sock->flags, sock->nofile); + if (unlikely(fd < 0)) + return fd; + } + file = __sys_socket_file(sock->domain, sock->type, sock->protocol); + if (IS_ERR(file)) { + if (!fixed) + put_unused_fd(fd); + ret = PTR_ERR(file); + if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) + return -EAGAIN; + if (ret == -ERESTARTSYS) + ret = -EINTR; + req_set_fail(req); + } else if (!fixed) { + fd_install(fd, file); + ret = fd; + } else { + ret = io_install_fixed_file(req, file, issue_flags, + sock->file_slot - 1); + } + __io_req_complete(req, issue_flags, ret, 0); + return 0; + } + static int io_connect_prep_async(struct io_kiocb *req) { struct io_async_connect *io = req->async_data; diff --cc include/uapi/linux/io_uring.h index 199bbf73a7527,49d1f3994f8d6..804916814e516 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@@ -113,22 -104,8 +113,22 @@@ enum #define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */ #define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */ #define IORING_SETUP_SUBMIT_ALL (1U << 7) /* continue submit on error */ +/* + * Cooperative task running. When requests complete, they often require + * forcing the submitter to transition to the kernel to complete. If this + * flag is set, work will be done when the task transitions anyway, rather + * than force an inter-processor interrupt reschedule. This avoids interrupting + * a task running in userspace, and saves an IPI. + */ +#define IORING_SETUP_COOP_TASKRUN (1U << 8) +/* + * If COOP_TASKRUN is set, get notified if task work is available for + * running and a kernel transition would be needed to run it. This sets + * IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN. + */ +#define IORING_SETUP_TASKRUN_FLAG (1U << 9) - enum { + enum io_uring_op { IORING_OP_NOP, IORING_OP_READV, IORING_OP_WRITEV,