netfs: Make netfs_read_folio() handle streaming-write pages
authorDavid Howells <dhowells@redhat.com>
Mon, 2 Oct 2023 11:51:19 +0000 (12:51 +0100)
committerDavid Howells <dhowells@redhat.com>
Thu, 28 Dec 2023 09:45:22 +0000 (09:45 +0000)
netfs_read_folio() needs to handle partially-valid pages that are marked
dirty, but not uptodate in the event that someone tries to read a page was
used to cache data by a streaming write.

In such a case, make netfs_read_folio() set up a bvec iterator that points
to the parts of the folio that need filling and to a sink page for the data
that should be discarded and use that instead of i_pages as the iterator to
be written to.

This requires netfs_rreq_unlock_folios() to convert the page into a normal
dirty uptodate page, getting rid of the partial write record and bumping
the group pointer over to folio->private.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org

fs/netfs/buffered_read.c
include/trace/events/netfs.h

index 73a6e4d61f9de489e15de246da779aec5c101009..950f63fc156a8cde9578e6b0aeabccceea83ef7f 100644 (file)
@@ -16,6 +16,7 @@
 void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
 {
        struct netfs_io_subrequest *subreq;
+       struct netfs_folio *finfo;
        struct folio *folio;
        pgoff_t start_page = rreq->start / PAGE_SIZE;
        pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
@@ -87,6 +88,15 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
 
                if (!pg_failed) {
                        flush_dcache_folio(folio);
+                       finfo = netfs_folio_info(folio);
+                       if (finfo) {
+                               trace_netfs_folio(folio, netfs_folio_trace_filled_gaps);
+                               if (finfo->netfs_group)
+                                       folio_change_private(folio, finfo->netfs_group);
+                               else
+                                       folio_detach_private(folio);
+                               kfree(finfo);
+                       }
                        folio_mark_uptodate(folio);
                }
 
@@ -239,6 +249,7 @@ int netfs_read_folio(struct file *file, struct folio *folio)
        struct address_space *mapping = folio_file_mapping(folio);
        struct netfs_io_request *rreq;
        struct netfs_inode *ctx = netfs_inode(mapping->host);
+       struct folio *sink = NULL;
        int ret;
 
        _enter("%lx", folio_index(folio));
@@ -259,12 +270,56 @@ int netfs_read_folio(struct file *file, struct folio *folio)
        trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage);
 
        /* Set up the output buffer */
-       iov_iter_xarray(&rreq->iter, ITER_DEST, &mapping->i_pages,
-                       rreq->start, rreq->len);
+       if (folio_test_dirty(folio)) {
+               /* Handle someone trying to read from an unflushed streaming
+                * write.  We fiddle the buffer so that a gap at the beginning
+                * and/or a gap at the end get copied to, but the middle is
+                * discarded.
+                */
+               struct netfs_folio *finfo = netfs_folio_info(folio);
+               struct bio_vec *bvec;
+               unsigned int from = finfo->dirty_offset;
+               unsigned int to = from + finfo->dirty_len;
+               unsigned int off = 0, i = 0;
+               size_t flen = folio_size(folio);
+               size_t nr_bvec = flen / PAGE_SIZE + 2;
+               size_t part;
+
+               ret = -ENOMEM;
+               bvec = kmalloc_array(nr_bvec, sizeof(*bvec), GFP_KERNEL);
+               if (!bvec)
+                       goto discard;
+
+               sink = folio_alloc(GFP_KERNEL, 0);
+               if (!sink)
+                       goto discard;
+
+               trace_netfs_folio(folio, netfs_folio_trace_read_gaps);
+
+               rreq->direct_bv = bvec;
+               rreq->direct_bv_count = nr_bvec;
+               if (from > 0) {
+                       bvec_set_folio(&bvec[i++], folio, from, 0);
+                       off = from;
+               }
+               while (off < to) {
+                       part = min_t(size_t, to - off, PAGE_SIZE);
+                       bvec_set_folio(&bvec[i++], sink, part, 0);
+                       off += part;
+               }
+               if (to < flen)
+                       bvec_set_folio(&bvec[i++], folio, flen - to, to);
+               iov_iter_bvec(&rreq->iter, ITER_DEST, bvec, i, rreq->len);
+       } else {
+               iov_iter_xarray(&rreq->iter, ITER_DEST, &mapping->i_pages,
+                               rreq->start, rreq->len);
+       }
 
        ret = netfs_begin_read(rreq, true);
+       if (sink)
+               folio_put(sink);
        netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
-       return ret;
+       return ret < 0 ? ret : 0;
 
 discard:
        netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
index 8308b81f36bed2634b5efbea968e906480e97fc7..082a5e717b58ff420c2cdab45fe97ad1809216b6 100644 (file)
        EM(netfs_folio_trace_clear_g,           "clear-g")      \
        EM(netfs_folio_trace_copy_to_cache,     "copy")         \
        EM(netfs_folio_trace_end_copy,          "end-copy")     \
+       EM(netfs_folio_trace_filled_gaps,       "filled-gaps")  \
        EM(netfs_folio_trace_kill,              "kill")         \
        EM(netfs_folio_trace_mkwrite,           "mkwrite")      \
        EM(netfs_folio_trace_mkwrite_plus,      "mkwrite+")     \
+       EM(netfs_folio_trace_read_gaps,         "read-gaps")    \
        EM(netfs_folio_trace_redirty,           "redirty")      \
        EM(netfs_folio_trace_redirtied,         "redirtied")    \
        EM(netfs_folio_trace_store,             "store")        \