From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 23 Mar 2022 00:03:12 +0000 (-0700)
Subject: Merge tag 'folio-5.18c' of git://git.infradead.org/users/willy/pagecache
X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=9030fb0bb9d607908d51f9ee02efdbe01da355ee;p=linux.git

Merge tag 'folio-5.18c' of git://git.infradead.org/users/willy/pagecache

Pull folio updates from Matthew Wilcox:

 - Rewrite how munlock works to massively reduce the contention on
   i_mmap_rwsem (Hugh Dickins):

     https://lore.kernel.org/linux-mm/8e4356d-9622-a7f0-b2c-f116b5f2efea@google.com/

 - Sort out the page refcount mess for ZONE_DEVICE pages (Christoph
   Hellwig):

     https://lore.kernel.org/linux-mm/20220210072828.2930359-1-hch@lst.de/

 - Convert GUP to use folios and make pincount available for order-1
   pages. (Matthew Wilcox)

 - Convert a few more truncation functions to use folios (Matthew
   Wilcox)

 - Convert page_vma_mapped_walk to use PFNs instead of pages (Matthew
   Wilcox)

 - Convert rmap_walk to use folios (Matthew Wilcox)

 - Convert most of shrink_page_list() to use a folio (Matthew Wilcox)

 - Add support for creating large folios in readahead (Matthew Wilcox)

* tag 'folio-5.18c' of git://git.infradead.org/users/willy/pagecache: (114 commits)
  mm/damon: minor cleanup for damon_pa_young
  selftests/vm/transhuge-stress: Support file-backed PMD folios
  mm/filemap: Support VM_HUGEPAGE for file mappings
  mm/readahead: Switch to page_cache_ra_order
  mm/readahead: Align file mappings for non-DAX
  mm/readahead: Add large folio readahead
  mm: Support arbitrary THP sizes
  mm: Make large folios depend on THP
  mm: Fix READ_ONLY_THP warning
  mm/filemap: Allow large folios to be added to the page cache
  mm: Turn can_split_huge_page() into can_split_folio()
  mm/vmscan: Convert pageout() to take a folio
  mm/vmscan: Turn page_check_references() into folio_check_references()
  mm/vmscan: Account large folios correctly
  mm/vmscan: Optimise shrink_page_list for non-PMD-sized folios
  mm/vmscan: Free non-shmem folios without splitting them
  mm/rmap: Constify the rmap_walk_control argument
  mm/rmap: Convert rmap_walk() to take a folio
  mm: Turn page_anon_vma() into folio_anon_vma()
  mm/rmap: Turn page_lock_anon_vma_read() into folio_lock_anon_vma_read()
  ...
---

9030fb0bb9d607908d51f9ee02efdbe01da355ee
diff --cc mm/damon/paddr.c
index 7c263797a9a9c,74c2b6e1ca489..21474ae63bc7a
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@@ -14,16 -14,12 +14,12 @@@
  #include <linux/swap.h>
  
  #include "../internal.h"
 -#include "prmtv-common.h"
 +#include "ops-common.h"
  
- static bool __damon_pa_mkold(struct page *page, struct vm_area_struct *vma,
+ static bool __damon_pa_mkold(struct folio *folio, struct vm_area_struct *vma,
  		unsigned long addr, void *arg)
  {
- 	struct page_vma_mapped_walk pvmw = {
- 		.page = page,
- 		.vma = vma,
- 		.address = addr,
- 	};
+ 	DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0);
  
  	while (page_vma_mapped_walk(&pvmw)) {
  		addr = pvmw.address;
diff --cc mm/huge_memory.c
index 88c83c84325c0,f85b04b31bd12..005fab2f3b73a
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@@ -3210,12 -3148,9 +3158,10 @@@ void remove_migration_pmd(struct page_v
  	if (PageAnon(new))
  		page_add_anon_rmap(new, vma, mmun_start, true);
  	else
- 		page_add_file_rmap(new, true);
+ 		page_add_file_rmap(new, vma, true);
  	set_pmd_at(mm, mmun_start, pvmw->pmd, pmde);
- 	if ((vma->vm_flags & VM_LOCKED) && !PageDoubleMap(new))
- 		mlock_vma_page(new);
 +
 +	/* No need to invalidate - it was non-present before */
  	update_mmu_cache_pmd(vma, address, pvmw->pmd);
  }
  #endif
diff --cc mm/internal.h
index 00d6e3e3ec45d,293eca1360dc3..58dc6adc19c5e
--- a/mm/internal.h
+++ b/mm/internal.h
@@@ -713,6 -752,11 +747,13 @@@ void vunmap_range_noflush(unsigned lon
  int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
  		      unsigned long addr, int page_nid, int *flags);
  
+ void free_zone_device_page(struct page *page);
+ 
+ /*
+  * mm/gup.c
+  */
+ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags);
+ 
 +DECLARE_PER_CPU(struct per_cpu_nodestat, boot_nodestats);
 +
  #endif	/* __MM_INTERNAL_H */
diff --cc mm/memcontrol.c
index f5ad1a6804949,f7fbd5f91e3d9..d495c2acb9f0e
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@@ -7111,10 -7174,9 +7106,10 @@@ void mem_cgroup_swapout(struct folio *f
  	 * important here to have the interrupts disabled because it is the
  	 * only synchronisation we have for updating the per-CPU variables.
  	 */
 -	VM_BUG_ON(!irqs_disabled());
 +	memcg_stats_lock();
  	mem_cgroup_charge_statistics(memcg, -nr_entries);
 +	memcg_stats_unlock();
- 	memcg_check_events(memcg, page_to_nid(page));
+ 	memcg_check_events(memcg, folio_nid(folio));
  
  	css_put(&memcg->css);
  }
diff --cc mm/memory-failure.c
index 1434e0608d5a6,aa8236848949f..dcb6bb9cf7315
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@@ -1411,22 -1413,26 +1413,22 @@@ static bool hwpoison_user_mappings(stru
  	if (kill)
  		collect_procs(hpage, &tokill, flags & MF_ACTION_REQUIRED);
  
 -	if (!PageHuge(hpage)) {
 -		try_to_unmap(folio, ttu);
 +	if (PageHuge(hpage) && !PageAnon(hpage)) {
 +		/*
 +		 * For hugetlb pages in shared mappings, try_to_unmap
 +		 * could potentially call huge_pmd_unshare.  Because of
 +		 * this, take semaphore in write mode here and set
 +		 * TTU_RMAP_LOCKED to indicate we have taken the lock
 +		 * at this higher level.
 +		 */
 +		mapping = hugetlb_page_mapping_lock_write(hpage);
 +		if (mapping) {
- 			try_to_unmap(hpage, ttu|TTU_RMAP_LOCKED);
++			try_to_unmap(folio, ttu|TTU_RMAP_LOCKED);
 +			i_mmap_unlock_write(mapping);
 +		} else
 +			pr_info("Memory failure: %#lx: could not lock mapping for mapped huge page\n", pfn);
  	} else {
- 		try_to_unmap(hpage, ttu);
 -		if (!PageAnon(hpage)) {
 -			/*
 -			 * For hugetlb pages in shared mappings, try_to_unmap
 -			 * could potentially call huge_pmd_unshare.  Because of
 -			 * this, take semaphore in write mode here and set
 -			 * TTU_RMAP_LOCKED to indicate we have taken the lock
 -			 * at this higher level.
 -			 */
 -			mapping = hugetlb_page_mapping_lock_write(hpage);
 -			if (mapping) {
 -				try_to_unmap(folio, ttu|TTU_RMAP_LOCKED);
 -				i_mmap_unlock_write(mapping);
 -			} else
 -				pr_info("Memory failure: %#lx: could not lock mapping for mapped huge page\n", pfn);
 -		} else {
 -			try_to_unmap(folio, ttu);
 -		}
++		try_to_unmap(folio, ttu);
  	}
  
  	unmap_success = !page_mapped(hpage);
diff --cc mm/memory.c
index e0f3410fa70cb,53bd9e5f2e33a..7c40850b7124e
--- a/mm/memory.c
+++ b/mm/memory.c
@@@ -1403,32 -1388,32 +1400,32 @@@ again
  		entry = pte_to_swp_entry(ptent);
  		if (is_device_private_entry(entry) ||
  		    is_device_exclusive_entry(entry)) {
 -			struct page *page = pfn_swap_entry_to_page(entry);
 -
 -			if (unlikely(zap_skip_check_mapping(details, page)))
 +			page = pfn_swap_entry_to_page(entry);
 +			if (unlikely(!should_zap_page(details, page)))
  				continue;
 -			pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
  			rss[mm_counter(page)]--;
  			if (is_device_private_entry(entry))
- 				page_remove_rmap(page, false);
+ 				page_remove_rmap(page, vma, false);
  			put_page(page);
 -			continue;
 -		}
 -
 -		/* If details->check_mapping, we leave swap entries. */
 -		if (unlikely(details))
 -			continue;
 -
 -		if (!non_swap_entry(entry))
 +		} else if (!non_swap_entry(entry)) {
 +			/* Genuine swap entry, hence a private anon page */
 +			if (!should_zap_cows(details))
 +				continue;
  			rss[MM_SWAPENTS]--;
 -		else if (is_migration_entry(entry)) {
 -			struct page *page;
 -
 +			if (unlikely(!free_swap_and_cache(entry)))
 +				print_bad_pte(vma, addr, ptent, NULL);
 +		} else if (is_migration_entry(entry)) {
  			page = pfn_swap_entry_to_page(entry);
 +			if (!should_zap_page(details, page))
 +				continue;
  			rss[mm_counter(page)]--;
 +		} else if (is_hwpoison_entry(entry)) {
 +			if (!should_zap_cows(details))
 +				continue;
 +		} else {
 +			/* We should have covered all the swap entry types */
 +			WARN_ON_ONCE(1);
  		}
 -		if (unlikely(!free_swap_and_cache(entry)))
 -			print_bad_pte(vma, addr, ptent, NULL);
  		pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
  	} while (pte++, addr += PAGE_SIZE, addr != end);
  
diff --cc mm/vmscan.c
index 499fa86e754a0,7db5d0237333b..1678802e03e78
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@@ -986,12 -985,23 +986,12 @@@ static inline int is_page_cache_freeabl
  	 * that isolated the page, the page cache and optional buffer
  	 * heads at page->private.
  	 */
- 	int page_cache_pins = thp_nr_pages(page);
- 	return page_count(page) - page_has_private(page) == 1 + page_cache_pins;
+ 	return folio_ref_count(folio) - folio_test_private(folio) ==
+ 		1 + folio_nr_pages(folio);
  }
  
 -static int may_write_to_inode(struct inode *inode)
 -{
 -	if (current->flags & PF_SWAPWRITE)
 -		return 1;
 -	if (!inode_write_congested(inode))
 -		return 1;
 -	if (inode_to_bdi(inode) == current->backing_dev_info)
 -		return 1;
 -	return 0;
 -}
 -
  /*
-  * We detected a synchronous write error writing a page out.  Probably
+  * We detected a synchronous write error writing a folio out.  Probably
   * -ENOSPC.  We need to propagate that into the address_space for a subsequent
   * fsync(), msync() or close().
   *
@@@ -1191,8 -1201,10 +1191,8 @@@ static pageout_t pageout(struct folio *
  	}
  	if (mapping->a_ops->writepage == NULL)
  		return PAGE_ACTIVATE;
 -	if (!may_write_to_inode(mapping->host))
 -		return PAGE_KEEP;
  
- 	if (clear_page_dirty_for_io(page)) {
+ 	if (folio_clear_dirty_for_io(folio)) {
  		int res;
  		struct writeback_control wbc = {
  			.sync_mode = WB_SYNC_NONE,
@@@ -1384,9 -1402,9 +1390,9 @@@ static enum page_references folio_check
  
  	if (referenced_ptes) {
  		/*
- 		 * All mapped pages start out with page table
+ 		 * All mapped folios start out with page table
  		 * references from the instantiating fault, so we need
- 		 * to look twice if a mapped file/anon page is used more
 -		 * to look twice if a mapped file folio is used more
++		 * to look twice if a mapped file/anon folio is used more
  		 * than once.
  		 *
  		 * Mark it and spare it for another trip around the
@@@ -1566,8 -1586,10 +1574,8 @@@ retry
  		 * end of the LRU a second time.
  		 */
  		mapping = page_mapping(page);
 -		if (((dirty || writeback) && mapping &&
 -		     inode_write_congested(mapping->host)) ||
 -		    (writeback && PageReclaim(page)))
 +		if (writeback && PageReclaim(page))
- 			stat->nr_congested++;
+ 			stat->nr_congested += nr_pages;
  
  		/*
  		 * If a page at the tail of the LRU is under writeback, there