truncate,shmem: Handle truncates that split large folios
authorMatthew Wilcox (Oracle) <willy@infradead.org>
Wed, 27 May 2020 21:59:22 +0000 (17:59 -0400)
committerMatthew Wilcox (Oracle) <willy@infradead.org>
Sat, 8 Jan 2022 05:28:41 +0000 (00:28 -0500)
Handle folio splitting in the parts of the truncation functions which
already handle partial pages.  Factor all that code out into a new
function called truncate_inode_partial_folio().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
mm/internal.h
mm/shmem.c
mm/truncate.c

index c52c05dc6b1f5c14f59f185b5689425112b6f019..26af8a5a5be349dc3f0a0fc9ea30d5b33bd530eb 100644 (file)
@@ -98,6 +98,8 @@ unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
                pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices);
 void filemap_free_folio(struct address_space *mapping, struct folio *folio);
 int truncate_inode_folio(struct address_space *mapping, struct folio *folio);
+bool truncate_inode_partial_folio(struct folio *folio, loff_t start,
+               loff_t end);
 
 /**
  * folio_evictable - Test whether a folio is evictable.
index bbfa2d05e787869dfaeb68e81d4ad194594d7624..e4c9e5c7081f6661a1781cdbaa2b9a8fb4c24ab4 100644 (file)
@@ -880,30 +880,26 @@ void shmem_unlock_mapping(struct address_space *mapping)
        }
 }
 
-/*
- * Check whether a hole-punch or truncation needs to split a huge page,
- * returning true if no split was required, or the split has been successful.
- *
- * Eviction (or truncation to 0 size) should never need to split a huge page;
- * but in rare cases might do so, if shmem_undo_range() failed to trylock on
- * head, and then succeeded to trylock on tail.
- *
- * A split can only succeed when there are no additional references on the
- * huge page: so the split below relies upon find_get_entries() having stopped
- * when it found a subpage of the huge page, without getting further references.
- */
-static bool shmem_punch_compound(struct page *page, pgoff_t start, pgoff_t end)
+static struct folio *shmem_get_partial_folio(struct inode *inode, pgoff_t index)
 {
-       if (!PageTransCompound(page))
-               return true;
-
-       /* Just proceed to delete a huge page wholly within the range punched */
-       if (PageHead(page) &&
-           page->index >= start && page->index + HPAGE_PMD_NR <= end)
-               return true;
+       struct folio *folio;
+       struct page *page;
 
-       /* Try to split huge page, so we can truly punch the hole or truncate */
-       return split_huge_page(page) >= 0;
+       /*
+        * At first avoid shmem_getpage(,,,SGP_READ): that fails
+        * beyond i_size, and reports fallocated pages as holes.
+        */
+       folio = __filemap_get_folio(inode->i_mapping, index,
+                                       FGP_ENTRY | FGP_LOCK, 0);
+       if (!xa_is_value(folio))
+               return folio;
+       /*
+        * But read a page back from swap if any of it is within i_size
+        * (although in some cases this is just a waste of time).
+        */
+       page = NULL;
+       shmem_getpage(inode, index, &page, SGP_READ);
+       return page ? page_folio(page) : NULL;
 }
 
 /*
@@ -917,10 +913,10 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
        struct shmem_inode_info *info = SHMEM_I(inode);
        pgoff_t start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
        pgoff_t end = (lend + 1) >> PAGE_SHIFT;
-       unsigned int partial_start = lstart & (PAGE_SIZE - 1);
-       unsigned int partial_end = (lend + 1) & (PAGE_SIZE - 1);
        struct folio_batch fbatch;
        pgoff_t indices[PAGEVEC_SIZE];
+       struct folio *folio;
+       bool same_folio;
        long nr_swaps_freed = 0;
        pgoff_t index;
        int i;
@@ -936,7 +932,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
        while (index < end && find_lock_entries(mapping, index, end - 1,
                        &fbatch, indices)) {
                for (i = 0; i < folio_batch_count(&fbatch); i++) {
-                       struct folio *folio = fbatch.folios[i];
+                       folio = fbatch.folios[i];
 
                        index = indices[i];
 
@@ -959,33 +955,30 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
                index++;
        }
 
-       if (partial_start) {
-               struct page *page = NULL;
-               shmem_getpage(inode, start - 1, &page, SGP_READ);
-               if (page) {
-                       unsigned int top = PAGE_SIZE;
-                       if (start > end) {
-                               top = partial_end;
-                               partial_end = 0;
-                       }
-                       zero_user_segment(page, partial_start, top);
-                       set_page_dirty(page);
-                       unlock_page(page);
-                       put_page(page);
+       same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT);
+       folio = shmem_get_partial_folio(inode, lstart >> PAGE_SHIFT);
+       if (folio) {
+               same_folio = lend < folio_pos(folio) + folio_size(folio);
+               folio_mark_dirty(folio);
+               if (!truncate_inode_partial_folio(folio, lstart, lend)) {
+                       start = folio->index + folio_nr_pages(folio);
+                       if (same_folio)
+                               end = folio->index;
                }
+               folio_unlock(folio);
+               folio_put(folio);
+               folio = NULL;
        }
-       if (partial_end) {
-               struct page *page = NULL;
-               shmem_getpage(inode, end, &page, SGP_READ);
-               if (page) {
-                       zero_user_segment(page, 0, partial_end);
-                       set_page_dirty(page);
-                       unlock_page(page);
-                       put_page(page);
-               }
+
+       if (!same_folio)
+               folio = shmem_get_partial_folio(inode, lend >> PAGE_SHIFT);
+       if (folio) {
+               folio_mark_dirty(folio);
+               if (!truncate_inode_partial_folio(folio, lstart, lend))
+                       end = folio->index;
+               folio_unlock(folio);
+               folio_put(folio);
        }
-       if (start >= end)
-               return;
 
        index = start;
        while (index < end) {
@@ -1001,7 +994,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
                        continue;
                }
                for (i = 0; i < folio_batch_count(&fbatch); i++) {
-                       struct folio *folio = fbatch.folios[i];
+                       folio = fbatch.folios[i];
 
                        index = indices[i];
                        if (xa_is_value(folio)) {
@@ -1019,8 +1012,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
                        folio_lock(folio);
 
                        if (!unfalloc || !folio_test_uptodate(folio)) {
-                               struct page *page = folio_file_page(folio,
-                                                                       index);
                                if (folio_mapping(folio) != mapping) {
                                        /* Page was replaced by swap: retry */
                                        folio_unlock(folio);
@@ -1029,18 +1020,9 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
                                }
                                VM_BUG_ON_FOLIO(folio_test_writeback(folio),
                                                folio);
-                               if (shmem_punch_compound(page, start, end))
-                                       truncate_inode_folio(mapping, folio);
-                               else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
-                                       /* Wipe the page and don't get stuck */
-                                       clear_highpage(page);
-                                       flush_dcache_page(page);
-                                       folio_mark_dirty(folio);
-                                       if (index <
-                                           round_up(start, HPAGE_PMD_NR))
-                                               start = index + 1;
-                               }
+                               truncate_inode_folio(mapping, folio);
                        }
+                       index = folio->index + folio_nr_pages(folio) - 1;
                        folio_unlock(folio);
                }
                folio_batch_remove_exceptionals(&fbatch);
index 2d1dae085acb803a7be99accd1c2c67562073f2e..5c87cdc70e7bf5c60614d445910042ac39a23cc2 100644 (file)
@@ -228,6 +228,58 @@ int truncate_inode_folio(struct address_space *mapping, struct folio *folio)
        return 0;
 }
 
+/*
+ * Handle partial folios.  The folio may be entirely within the
+ * range if a split has raced with us.  If not, we zero the part of the
+ * folio that's within the [start, end] range, and then split the folio if
+ * it's large.  split_page_range() will discard pages which now lie beyond
+ * i_size, and we rely on the caller to discard pages which lie within a
+ * newly created hole.
+ *
+ * Returns false if splitting failed so the caller can avoid
+ * discarding the entire folio which is stubbornly unsplit.
+ */
+bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
+{
+       loff_t pos = folio_pos(folio);
+       unsigned int offset, length;
+
+       if (pos < start)
+               offset = start - pos;
+       else
+               offset = 0;
+       length = folio_size(folio);
+       if (pos + length <= (u64)end)
+               length = length - offset;
+       else
+               length = end + 1 - pos - offset;
+
+       folio_wait_writeback(folio);
+       if (length == folio_size(folio)) {
+               truncate_inode_folio(folio->mapping, folio);
+               return true;
+       }
+
+       /*
+        * We may be zeroing pages we're about to discard, but it avoids
+        * doing a complex calculation here, and then doing the zeroing
+        * anyway if the page split fails.
+        */
+       folio_zero_range(folio, offset, length);
+
+       cleancache_invalidate_page(folio->mapping, &folio->page);
+       if (folio_has_private(folio))
+               do_invalidatepage(&folio->page, offset, length);
+       if (!folio_test_large(folio))
+               return true;
+       if (split_huge_page(&folio->page) == 0)
+               return true;
+       if (folio_test_dirty(folio))
+               return false;
+       truncate_inode_folio(folio->mapping, folio);
+       return true;
+}
+
 /*
  * Used to get rid of pages on hardware memory corruption.
  */
@@ -294,20 +346,16 @@ void truncate_inode_pages_range(struct address_space *mapping,
 {
        pgoff_t         start;          /* inclusive */
        pgoff_t         end;            /* exclusive */
-       unsigned int    partial_start;  /* inclusive */
-       unsigned int    partial_end;    /* exclusive */
        struct folio_batch fbatch;
        pgoff_t         indices[PAGEVEC_SIZE];
        pgoff_t         index;
        int             i;
+       struct folio    *folio;
+       bool            same_folio;
 
        if (mapping_empty(mapping))
                goto out;
 
-       /* Offsets within partial pages */
-       partial_start = lstart & (PAGE_SIZE - 1);
-       partial_end = (lend + 1) & (PAGE_SIZE - 1);
-
        /*
         * 'start' and 'end' always covers the range of pages to be fully
         * truncated. Partial pages are covered with 'partial_start' at the
@@ -340,47 +388,32 @@ void truncate_inode_pages_range(struct address_space *mapping,
                cond_resched();
        }
 
-       if (partial_start) {
-               struct page *page = find_lock_page(mapping, start - 1);
-               if (page) {
-                       unsigned int top = PAGE_SIZE;
-                       if (start > end) {
-                               /* Truncation within a single page */
-                               top = partial_end;
-                               partial_end = 0;
-                       }
-                       wait_on_page_writeback(page);
-                       zero_user_segment(page, partial_start, top);
-                       cleancache_invalidate_page(mapping, page);
-                       if (page_has_private(page))
-                               do_invalidatepage(page, partial_start,
-                                                 top - partial_start);
-                       unlock_page(page);
-                       put_page(page);
+       same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT);
+       folio = __filemap_get_folio(mapping, lstart >> PAGE_SHIFT, FGP_LOCK, 0);
+       if (folio) {
+               same_folio = lend < folio_pos(folio) + folio_size(folio);
+               if (!truncate_inode_partial_folio(folio, lstart, lend)) {
+                       start = folio->index + folio_nr_pages(folio);
+                       if (same_folio)
+                               end = folio->index;
                }
+               folio_unlock(folio);
+               folio_put(folio);
+               folio = NULL;
        }
-       if (partial_end) {
-               struct page *page = find_lock_page(mapping, end);
-               if (page) {
-                       wait_on_page_writeback(page);
-                       zero_user_segment(page, 0, partial_end);
-                       cleancache_invalidate_page(mapping, page);
-                       if (page_has_private(page))
-                               do_invalidatepage(page, 0,
-                                                 partial_end);
-                       unlock_page(page);
-                       put_page(page);
-               }
+
+       if (!same_folio)
+               folio = __filemap_get_folio(mapping, lend >> PAGE_SHIFT,
+                                               FGP_LOCK, 0);
+       if (folio) {
+               if (!truncate_inode_partial_folio(folio, lstart, lend))
+                       end = folio->index;
+               folio_unlock(folio);
+               folio_put(folio);
        }
-       /*
-        * If the truncation happened within a single page no pages
-        * will be released, just zeroed, so we can bail out now.
-        */
-       if (start >= end)
-               goto out;
 
        index = start;
-       for ( ; ; ) {
+       while (index < end) {
                cond_resched();
                if (!find_get_entries(mapping, index, end - 1, &fbatch,
                                indices)) {