splice: Add a func to do a splice from a buffered file without ITER_PIPE
authorDavid Howells <dhowells@redhat.com>
Tue, 14 Feb 2023 15:01:42 +0000 (15:01 +0000)
committerSteve French <stfrench@microsoft.com>
Mon, 20 Feb 2023 23:25:43 +0000 (17:25 -0600)
Provide a function to do splice read from a buffered file, pulling the
folios out of the pagecache directly by calling filemap_get_pages() to do
any required reading and then pasting the returned folios into the pipe.

A helper function is provided to do the actual folio pasting and will
handle multipage folios by splicing as many of the relevant subpages as
will fit into the pipe.

The code is loosely based on filemap_read() and might belong in
mm/filemap.c with that as it needs to use filemap_get_pages().

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
cc: Christoph Hellwig <hch@lst.de>
cc: Al Viro <viro@zeniv.linux.org.uk>
cc: David Hildenbrand <david@redhat.com>
cc: John Hubbard <jhubbard@nvidia.com>
cc: linux-mm@kvack.org
cc: linux-block@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Steve French <stfrench@microsoft.com>
include/linux/fs.h
include/linux/pipe_fs_i.h
lib/iov_iter.c
mm/filemap.c
mm/internal.h

index c1769a2c5d7082858e3fac64b261b5e5244d9b01..28743e38df918e51f762864edc61ae27c268776f 100644 (file)
@@ -3163,6 +3163,9 @@ ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
                            struct iov_iter *iter);
 
 /* fs/splice.c */
+ssize_t filemap_splice_read(struct file *in, loff_t *ppos,
+                           struct pipe_inode_info *pipe,
+                           size_t len, unsigned int flags);
 extern ssize_t generic_file_splice_read(struct file *, loff_t *,
                struct pipe_inode_info *, size_t, unsigned int);
 extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
index 6cb65df3e3ba56576e00c3193e93532db4d68fc0..d2c3f16cf6b18062500f25907bc7b9d3222c8ee4 100644 (file)
@@ -156,6 +156,26 @@ static inline bool pipe_full(unsigned int head, unsigned int tail,
        return pipe_occupancy(head, tail) >= limit;
 }
 
+/**
+ * pipe_buf - Return the pipe buffer for the specified slot in the pipe ring
+ * @pipe: The pipe to access
+ * @slot: The slot of interest
+ */
+static inline struct pipe_buffer *pipe_buf(const struct pipe_inode_info *pipe,
+                                          unsigned int slot)
+{
+       return &pipe->bufs[slot & (pipe->ring_size - 1)];
+}
+
+/**
+ * pipe_head_buf - Return the pipe buffer at the head of the pipe ring
+ * @pipe: The pipe to access
+ */
+static inline struct pipe_buffer *pipe_head_buf(const struct pipe_inode_info *pipe)
+{
+       return pipe_buf(pipe, pipe->head);
+}
+
 /**
  * pipe_buf_get - get a reference to a pipe_buffer
  * @pipe:      the pipe that the buffer belongs to
index f9a3ff37ecd1aefc656c3b65941ad6c82158dedb..47c484551c59a9b035cf533c56661d3532e68750 100644 (file)
@@ -186,12 +186,6 @@ static int copyin(void *to, const void __user *from, size_t n)
        return res;
 }
 
-static inline struct pipe_buffer *pipe_buf(const struct pipe_inode_info *pipe,
-                                          unsigned int slot)
-{
-       return &pipe->bufs[slot & (pipe->ring_size - 1)];
-}
-
 #ifdef PIPE_PARANOIA
 static bool sanity(const struct iov_iter *i)
 {
index 2d5377012284dbec37d48861fa66a80beb530dba..162b4daaeb9929a3674fbe5435de68570a02d7c1 100644 (file)
@@ -42,6 +42,8 @@
 #include <linux/ramfs.h>
 #include <linux/page_idle.h>
 #include <linux/migrate.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/splice.h>
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include "internal.h"
@@ -2843,6 +2845,133 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 }
 EXPORT_SYMBOL(generic_file_read_iter);
 
+/*
+ * Splice subpages from a folio into a pipe.
+ */
+size_t splice_folio_into_pipe(struct pipe_inode_info *pipe,
+                             struct folio *folio, loff_t fpos, size_t size)
+{
+       struct page *page;
+       size_t spliced = 0, offset = offset_in_folio(folio, fpos);
+
+       page = folio_page(folio, offset / PAGE_SIZE);
+       size = min(size, folio_size(folio) - offset);
+       offset %= PAGE_SIZE;
+
+       while (spliced < size &&
+              !pipe_full(pipe->head, pipe->tail, pipe->max_usage)) {
+               struct pipe_buffer *buf = pipe_head_buf(pipe);
+               size_t part = min_t(size_t, PAGE_SIZE - offset, size - spliced);
+
+               *buf = (struct pipe_buffer) {
+                       .ops    = &page_cache_pipe_buf_ops,
+                       .page   = page,
+                       .offset = offset,
+                       .len    = part,
+               };
+               folio_get(folio);
+               pipe->head++;
+               page++;
+               spliced += part;
+               offset = 0;
+       }
+
+       return spliced;
+}
+
+/*
+ * Splice folios from the pagecache of a buffered (ie. non-O_DIRECT) file into
+ * a pipe.
+ */
+ssize_t filemap_splice_read(struct file *in, loff_t *ppos,
+                           struct pipe_inode_info *pipe,
+                           size_t len, unsigned int flags)
+{
+       struct folio_batch fbatch;
+       struct kiocb iocb;
+       size_t total_spliced = 0, used, npages;
+       loff_t isize, end_offset;
+       bool writably_mapped;
+       int i, error = 0;
+
+       init_sync_kiocb(&iocb, in);
+       iocb.ki_pos = *ppos;
+
+       /* Work out how much data we can actually add into the pipe */
+       used = pipe_occupancy(pipe->head, pipe->tail);
+       npages = max_t(ssize_t, pipe->max_usage - used, 0);
+       len = min_t(size_t, len, npages * PAGE_SIZE);
+
+       folio_batch_init(&fbatch);
+
+       do {
+               cond_resched();
+
+               if (*ppos >= i_size_read(file_inode(in)))
+                       break;
+
+               iocb.ki_pos = *ppos;
+               error = filemap_get_pages(&iocb, len, &fbatch, true);
+               if (error < 0)
+                       break;
+
+               /*
+                * i_size must be checked after we know the pages are Uptodate.
+                *
+                * Checking i_size after the check allows us to calculate
+                * the correct value for "nr", which means the zero-filled
+                * part of the page is not copied back to userspace (unless
+                * another truncate extends the file - this is desired though).
+                */
+               isize = i_size_read(file_inode(in));
+               if (unlikely(*ppos >= isize))
+                       break;
+               end_offset = min_t(loff_t, isize, *ppos + len);
+
+               /*
+                * Once we start copying data, we don't want to be touching any
+                * cachelines that might be contended:
+                */
+               writably_mapped = mapping_writably_mapped(in->f_mapping);
+
+               for (i = 0; i < folio_batch_count(&fbatch); i++) {
+                       struct folio *folio = fbatch.folios[i];
+                       size_t n;
+
+                       if (folio_pos(folio) >= end_offset)
+                               goto out;
+                       folio_mark_accessed(folio);
+
+                       /*
+                        * If users can be writing to this folio using arbitrary
+                        * virtual addresses, take care of potential aliasing
+                        * before reading the folio on the kernel side.
+                        */
+                       if (writably_mapped)
+                               flush_dcache_folio(folio);
+
+                       n = min_t(loff_t, len, isize - *ppos);
+                       n = splice_folio_into_pipe(pipe, folio, *ppos, n);
+                       if (!n)
+                               goto out;
+                       len -= n;
+                       total_spliced += n;
+                       *ppos += n;
+                       in->f_ra.prev_pos = *ppos;
+                       if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
+                               goto out;
+               }
+
+               folio_batch_release(&fbatch);
+       } while (len);
+
+out:
+       folio_batch_release(&fbatch);
+       file_accessed(in);
+
+       return total_spliced ? total_spliced : error;
+}
+
 static inline loff_t folio_seek_hole_data(struct xa_state *xas,
                struct address_space *mapping, struct folio *folio,
                loff_t start, loff_t end, bool seek_data)
index bcf75a8b032dead6f09b894027e6cf94e08c46e7..6d4ca98f384495ceb844bf9e51dd9a5a28f9b40b 100644 (file)
@@ -794,6 +794,12 @@ struct migration_target_control {
        gfp_t gfp_mask;
 };
 
+/*
+ * mm/filemap.c
+ */
+size_t splice_folio_into_pipe(struct pipe_inode_info *pipe,
+                             struct folio *folio, loff_t fpos, size_t size);
+
 /*
  * mm/vmalloc.c
  */