io_uring: add support for IORING_REGISTER_FILES_UPDATE
authorJens Axboe <axboe@kernel.dk>
Thu, 3 Oct 2019 19:59:56 +0000 (13:59 -0600)
committerJens Axboe <axboe@kernel.dk>
Tue, 29 Oct 2019 16:22:44 +0000 (10:22 -0600)
Allows the application to remove/replace/add files to/from a file set.
Passes in a struct:

struct io_uring_files_update {
__u32 offset;
__s32 *fds;
};

that holds an array of fds, size of array passed in through the usual
nr_args part of the io_uring_register() system call. The logic is as
follows:

1) If ->fds[i] is -1, the existing file at i + ->offset is removed from
   the set.
2) If ->fds[i] is a valid fd, the existing file at i + ->offset is
   replaced with ->fds[i].

For case #2, is the existing file is currently empty (fd == -1), the
new fd is simply added to the array.

Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
fs/io_uring.c
include/uapi/linux/io_uring.h

index b85e5feb774a0925fc141e2f4a423d56ae61ee6e..77774abb1074da6f71018a60e23c20baca4bbdc7 100644 (file)
@@ -3224,6 +3224,178 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
        return ret;
 }
 
+static void io_sqe_file_unregister(struct io_ring_ctx *ctx, int index)
+{
+#if defined(CONFIG_UNIX)
+       struct file *file = ctx->user_files[index];
+       struct sock *sock = ctx->ring_sock->sk;
+       struct sk_buff_head list, *head = &sock->sk_receive_queue;
+       struct sk_buff *skb;
+       int i;
+
+       __skb_queue_head_init(&list);
+
+       /*
+        * Find the skb that holds this file in its SCM_RIGHTS. When found,
+        * remove this entry and rearrange the file array.
+        */
+       skb = skb_dequeue(head);
+       while (skb) {
+               struct scm_fp_list *fp;
+
+               fp = UNIXCB(skb).fp;
+               for (i = 0; i < fp->count; i++) {
+                       int left;
+
+                       if (fp->fp[i] != file)
+                               continue;
+
+                       unix_notinflight(fp->user, fp->fp[i]);
+                       left = fp->count - 1 - i;
+                       if (left) {
+                               memmove(&fp->fp[i], &fp->fp[i + 1],
+                                               left * sizeof(struct file *));
+                       }
+                       fp->count--;
+                       if (!fp->count) {
+                               kfree_skb(skb);
+                               skb = NULL;
+                       } else {
+                               __skb_queue_tail(&list, skb);
+                       }
+                       fput(file);
+                       file = NULL;
+                       break;
+               }
+
+               if (!file)
+                       break;
+
+               __skb_queue_tail(&list, skb);
+
+               skb = skb_dequeue(head);
+       }
+
+       if (skb_peek(&list)) {
+               spin_lock_irq(&head->lock);
+               while ((skb = __skb_dequeue(&list)) != NULL)
+                       __skb_queue_tail(head, skb);
+               spin_unlock_irq(&head->lock);
+       }
+#else
+       fput(ctx->user_files[index]);
+#endif
+}
+
+static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file,
+                               int index)
+{
+#if defined(CONFIG_UNIX)
+       struct sock *sock = ctx->ring_sock->sk;
+       struct sk_buff_head *head = &sock->sk_receive_queue;
+       struct sk_buff *skb;
+
+       /*
+        * See if we can merge this file into an existing skb SCM_RIGHTS
+        * file set. If there's no room, fall back to allocating a new skb
+        * and filling it in.
+        */
+       spin_lock_irq(&head->lock);
+       skb = skb_peek(head);
+       if (skb) {
+               struct scm_fp_list *fpl = UNIXCB(skb).fp;
+
+               if (fpl->count < SCM_MAX_FD) {
+                       __skb_unlink(skb, head);
+                       spin_unlock_irq(&head->lock);
+                       fpl->fp[fpl->count] = get_file(file);
+                       unix_inflight(fpl->user, fpl->fp[fpl->count]);
+                       fpl->count++;
+                       spin_lock_irq(&head->lock);
+                       __skb_queue_head(head, skb);
+               } else {
+                       skb = NULL;
+               }
+       }
+       spin_unlock_irq(&head->lock);
+
+       if (skb) {
+               fput(file);
+               return 0;
+       }
+
+       return __io_sqe_files_scm(ctx, 1, index);
+#else
+       return 0;
+#endif
+}
+
+static int io_sqe_files_update(struct io_ring_ctx *ctx, void __user *arg,
+                              unsigned nr_args)
+{
+       struct io_uring_files_update up;
+       __s32 __user *fds;
+       int fd, i, err;
+       __u32 done;
+
+       if (!ctx->user_files)
+               return -ENXIO;
+       if (!nr_args)
+               return -EINVAL;
+       if (copy_from_user(&up, arg, sizeof(up)))
+               return -EFAULT;
+       if (check_add_overflow(up.offset, nr_args, &done))
+               return -EOVERFLOW;
+       if (done > ctx->nr_user_files)
+               return -EINVAL;
+
+       done = 0;
+       fds = (__s32 __user *) up.fds;
+       while (nr_args) {
+               err = 0;
+               if (copy_from_user(&fd, &fds[done], sizeof(fd))) {
+                       err = -EFAULT;
+                       break;
+               }
+               i = array_index_nospec(up.offset, ctx->nr_user_files);
+               if (ctx->user_files[i]) {
+                       io_sqe_file_unregister(ctx, i);
+                       ctx->user_files[i] = NULL;
+               }
+               if (fd != -1) {
+                       struct file *file;
+
+                       file = fget(fd);
+                       if (!file) {
+                               err = -EBADF;
+                               break;
+                       }
+                       /*
+                        * Don't allow io_uring instances to be registered. If
+                        * UNIX isn't enabled, then this causes a reference
+                        * cycle and this instance can never get freed. If UNIX
+                        * is enabled we'll handle it just fine, but there's
+                        * still no point in allowing a ring fd as it doesn't
+                        * support regular read/write anyway.
+                        */
+                       if (file->f_op == &io_uring_fops) {
+                               fput(file);
+                               err = -EBADF;
+                               break;
+                       }
+                       ctx->user_files[i] = file;
+                       err = io_sqe_file_register(ctx, file, i);
+                       if (err)
+                               break;
+               }
+               nr_args--;
+               done++;
+               up.offset++;
+       }
+
+       return done ? done : err;
+}
+
 static int io_sq_offload_start(struct io_ring_ctx *ctx,
                               struct io_uring_params *p)
 {
@@ -4031,6 +4203,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
                        break;
                ret = io_sqe_files_unregister(ctx);
                break;
+       case IORING_REGISTER_FILES_UPDATE:
+               ret = io_sqe_files_update(ctx, arg, nr_args);
+               break;
        case IORING_REGISTER_EVENTFD:
                ret = -EINVAL;
                if (nr_args != 1)
index ea57526a5b89bbc33cfd13c15a3acd09056da750..4f532d9c055406b527bdf1c3aab15f4f9b0d4376 100644 (file)
@@ -150,5 +150,11 @@ struct io_uring_params {
 #define IORING_UNREGISTER_FILES                3
 #define IORING_REGISTER_EVENTFD                4
 #define IORING_UNREGISTER_EVENTFD      5
+#define IORING_REGISTER_FILES_UPDATE   6
+
+struct io_uring_files_update {
+       __u32 offset;
+       __s32 *fds;
+};
 
 #endif