io_uring: add support for IORING_REGISTER_FILES_UPDATE

author Jens Axboe <axboe@kernel.dk>

Thu, 3 Oct 2019 19:59:56 +0000 (13:59 -0600)

committer Jens Axboe <axboe@kernel.dk>

Tue, 29 Oct 2019 16:22:44 +0000 (10:22 -0600)
author Jens Axboe <axboe@kernel.dk>
Thu, 3 Oct 2019 19:59:56 +0000 (13:59 -0600)
committer Jens Axboe <axboe@kernel.dk>
Tue, 29 Oct 2019 16:22:44 +0000 (10:22 -0600)
diff --git a/fs/io_uring.c b/fs/io_uring.c

index b85e5feb774a0925fc141e2f4a423d56ae61ee6e..77774abb1074da6f71018a60e23c20baca4bbdc7 100644 (file)
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3224,6 +3224,178 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
         return ret;
  }
  
+static void io_sqe_file_unregister(struct io_ring_ctx *ctx, int index)
+{
+#if defined(CONFIG_UNIX)
+       struct file *file = ctx->user_files[index];
+       struct sock *sock = ctx->ring_sock->sk;
+       struct sk_buff_head list, *head = &sock->sk_receive_queue;
+       struct sk_buff *skb;
+       int i;
+
+       __skb_queue_head_init(&list);
+
+       /*
+        * Find the skb that holds this file in its SCM_RIGHTS. When found,
+        * remove this entry and rearrange the file array.
+        */
+       skb = skb_dequeue(head);
+       while (skb) {
+               struct scm_fp_list *fp;
+
+               fp = UNIXCB(skb).fp;
+               for (i = 0; i < fp->count; i++) {
+                       int left;
+
+                       if (fp->fp[i] != file)
+                               continue;
+
+                       unix_notinflight(fp->user, fp->fp[i]);
+                       left = fp->count - 1 - i;
+                       if (left) {
+                               memmove(&fp->fp[i], &fp->fp[i + 1],
+                                               left * sizeof(struct file *));
+                       }
+                       fp->count--;
+                       if (!fp->count) {
+                               kfree_skb(skb);
+                               skb = NULL;
+                       } else {
+                               __skb_queue_tail(&list, skb);
+                       }
+                       fput(file);
+                       file = NULL;
+                       break;
+               }
+
+               if (!file)
+                       break;
+
+               __skb_queue_tail(&list, skb);
+
+               skb = skb_dequeue(head);
+       }
+
+       if (skb_peek(&list)) {
+               spin_lock_irq(&head->lock);
+               while ((skb = __skb_dequeue(&list)) != NULL)
+                       __skb_queue_tail(head, skb);
+               spin_unlock_irq(&head->lock);
+       }
+#else
+       fput(ctx->user_files[index]);
+#endif
+}
+
+static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file,
+                               int index)
+{
+#if defined(CONFIG_UNIX)
+       struct sock *sock = ctx->ring_sock->sk;
+       struct sk_buff_head *head = &sock->sk_receive_queue;
+       struct sk_buff *skb;
+
+       /*
+        * See if we can merge this file into an existing skb SCM_RIGHTS
+        * file set. If there's no room, fall back to allocating a new skb
+        * and filling it in.
+        */
+       spin_lock_irq(&head->lock);
+       skb = skb_peek(head);
+       if (skb) {
+               struct scm_fp_list *fpl = UNIXCB(skb).fp;
+
+               if (fpl->count < SCM_MAX_FD) {
+                       __skb_unlink(skb, head);
+                       spin_unlock_irq(&head->lock);
+                       fpl->fp[fpl->count] = get_file(file);
+                       unix_inflight(fpl->user, fpl->fp[fpl->count]);
+                       fpl->count++;
+                       spin_lock_irq(&head->lock);
+                       __skb_queue_head(head, skb);
+               } else {
+                       skb = NULL;
+               }
+       }
+       spin_unlock_irq(&head->lock);
+
+       if (skb) {
+               fput(file);
+               return 0;
+       }
+
+       return __io_sqe_files_scm(ctx, 1, index);
+#else
+       return 0;
+#endif
+}
+
+static int io_sqe_files_update(struct io_ring_ctx *ctx, void __user *arg,
+                              unsigned nr_args)
+{
+       struct io_uring_files_update up;
+       __s32 __user *fds;
+       int fd, i, err;
+       __u32 done;
+
+       if (!ctx->user_files)
+               return -ENXIO;
+       if (!nr_args)
+               return -EINVAL;
+       if (copy_from_user(&up, arg, sizeof(up)))
+               return -EFAULT;
+       if (check_add_overflow(up.offset, nr_args, &done))
+               return -EOVERFLOW;
+       if (done > ctx->nr_user_files)
+               return -EINVAL;
+
+       done = 0;
+       fds = (__s32 __user *) up.fds;
+       while (nr_args) {
+               err = 0;
+               if (copy_from_user(&fd, &fds[done], sizeof(fd))) {
+                       err = -EFAULT;
+                       break;
+               }
+               i = array_index_nospec(up.offset, ctx->nr_user_files);
+               if (ctx->user_files[i]) {
+                       io_sqe_file_unregister(ctx, i);
+                       ctx->user_files[i] = NULL;
+               }
+               if (fd != -1) {
+                       struct file *file;
+
+                       file = fget(fd);
+                       if (!file) {
+                               err = -EBADF;
+                               break;
+                       }
+                       /*
+                        * Don't allow io_uring instances to be registered. If
+                        * UNIX isn't enabled, then this causes a reference
+                        * cycle and this instance can never get freed. If UNIX
+                        * is enabled we'll handle it just fine, but there's
+                        * still no point in allowing a ring fd as it doesn't
+                        * support regular read/write anyway.
+                        */
+                       if (file->f_op == &io_uring_fops) {
+                               fput(file);
+                               err = -EBADF;
+                               break;
+                       }
+                       ctx->user_files[i] = file;
+                       err = io_sqe_file_register(ctx, file, i);
+                       if (err)
+                               break;
+               }
+               nr_args--;
+               done++;
+               up.offset++;
+       }
+
+       return done ? done : err;
+}
+
  static int io_sq_offload_start(struct io_ring_ctx *ctx,
                                struct io_uring_params *p)
  {
@@ -4031,6 +4203,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
                         break;
                 ret = io_sqe_files_unregister(ctx);
                 break;
+       case IORING_REGISTER_FILES_UPDATE:
+               ret = io_sqe_files_update(ctx, arg, nr_args);
+               break;
         case IORING_REGISTER_EVENTFD:
                 ret = -EINVAL;
                 if (nr_args != 1)
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h

index ea57526a5b89bbc33cfd13c15a3acd09056da750..4f532d9c055406b527bdf1c3aab15f4f9b0d4376 100644 (file)
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -150,5 +150,11 @@ struct io_uring_params {
  #define IORING_UNREGISTER_FILES                3
  #define IORING_REGISTER_EVENTFD                4
  #define IORING_UNREGISTER_EVENTFD      5
+#define IORING_REGISTER_FILES_UPDATE   6
+
+struct io_uring_files_update {
+       __u32 offset;
+       __s32 *fds;
+};
  
  #endif
author	Jens Axboe <axboe@kernel.dk>
	Thu, 3 Oct 2019 19:59:56 +0000 (13:59 -0600)
committer	Jens Axboe <axboe@kernel.dk>
	Tue, 29 Oct 2019 16:22:44 +0000 (10:22 -0600)
fs/io_uring.c		patch \| blob \| history
include/uapi/linux/io_uring.h		patch \| blob \| history