From: Linus Torvalds Date: Wed, 23 Mar 2022 00:03:12 +0000 (-0700) Subject: Merge tag 'folio-5.18c' of git://git.infradead.org/users/willy/pagecache X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=9030fb0bb9d607908d51f9ee02efdbe01da355ee;p=linux.git Merge tag 'folio-5.18c' of git://git.infradead.org/users/willy/pagecache Pull folio updates from Matthew Wilcox: - Rewrite how munlock works to massively reduce the contention on i_mmap_rwsem (Hugh Dickins): https://lore.kernel.org/linux-mm/8e4356d-9622-a7f0-b2c-f116b5f2efea@google.com/ - Sort out the page refcount mess for ZONE_DEVICE pages (Christoph Hellwig): https://lore.kernel.org/linux-mm/20220210072828.2930359-1-hch@lst.de/ - Convert GUP to use folios and make pincount available for order-1 pages. (Matthew Wilcox) - Convert a few more truncation functions to use folios (Matthew Wilcox) - Convert page_vma_mapped_walk to use PFNs instead of pages (Matthew Wilcox) - Convert rmap_walk to use folios (Matthew Wilcox) - Convert most of shrink_page_list() to use a folio (Matthew Wilcox) - Add support for creating large folios in readahead (Matthew Wilcox) * tag 'folio-5.18c' of git://git.infradead.org/users/willy/pagecache: (114 commits) mm/damon: minor cleanup for damon_pa_young selftests/vm/transhuge-stress: Support file-backed PMD folios mm/filemap: Support VM_HUGEPAGE for file mappings mm/readahead: Switch to page_cache_ra_order mm/readahead: Align file mappings for non-DAX mm/readahead: Add large folio readahead mm: Support arbitrary THP sizes mm: Make large folios depend on THP mm: Fix READ_ONLY_THP warning mm/filemap: Allow large folios to be added to the page cache mm: Turn can_split_huge_page() into can_split_folio() mm/vmscan: Convert pageout() to take a folio mm/vmscan: Turn page_check_references() into folio_check_references() mm/vmscan: Account large folios correctly mm/vmscan: Optimise shrink_page_list for non-PMD-sized folios mm/vmscan: Free non-shmem folios without splitting them mm/rmap: Constify the rmap_walk_control argument mm/rmap: Convert rmap_walk() to take a folio mm: Turn page_anon_vma() into folio_anon_vma() mm/rmap: Turn page_lock_anon_vma_read() into folio_lock_anon_vma_read() ... --- 9030fb0bb9d607908d51f9ee02efdbe01da355ee diff --cc mm/damon/paddr.c index 7c263797a9a9c,74c2b6e1ca489..21474ae63bc7a --- a/mm/damon/paddr.c +++ b/mm/damon/paddr.c @@@ -14,16 -14,12 +14,12 @@@ #include #include "../internal.h" -#include "prmtv-common.h" +#include "ops-common.h" - static bool __damon_pa_mkold(struct page *page, struct vm_area_struct *vma, + static bool __damon_pa_mkold(struct folio *folio, struct vm_area_struct *vma, unsigned long addr, void *arg) { - struct page_vma_mapped_walk pvmw = { - .page = page, - .vma = vma, - .address = addr, - }; + DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0); while (page_vma_mapped_walk(&pvmw)) { addr = pvmw.address; diff --cc mm/huge_memory.c index 88c83c84325c0,f85b04b31bd12..005fab2f3b73a --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@@ -3210,12 -3148,9 +3158,10 @@@ void remove_migration_pmd(struct page_v if (PageAnon(new)) page_add_anon_rmap(new, vma, mmun_start, true); else - page_add_file_rmap(new, true); + page_add_file_rmap(new, vma, true); set_pmd_at(mm, mmun_start, pvmw->pmd, pmde); - if ((vma->vm_flags & VM_LOCKED) && !PageDoubleMap(new)) - mlock_vma_page(new); + + /* No need to invalidate - it was non-present before */ update_mmu_cache_pmd(vma, address, pvmw->pmd); } #endif diff --cc mm/internal.h index 00d6e3e3ec45d,293eca1360dc3..58dc6adc19c5e --- a/mm/internal.h +++ b/mm/internal.h @@@ -713,6 -752,11 +747,13 @@@ void vunmap_range_noflush(unsigned lon int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, unsigned long addr, int page_nid, int *flags); + void free_zone_device_page(struct page *page); + + /* + * mm/gup.c + */ + struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags); + +DECLARE_PER_CPU(struct per_cpu_nodestat, boot_nodestats); + #endif /* __MM_INTERNAL_H */ diff --cc mm/memcontrol.c index f5ad1a6804949,f7fbd5f91e3d9..d495c2acb9f0e --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@@ -7111,10 -7174,9 +7106,10 @@@ void mem_cgroup_swapout(struct folio *f * important here to have the interrupts disabled because it is the * only synchronisation we have for updating the per-CPU variables. */ - VM_BUG_ON(!irqs_disabled()); + memcg_stats_lock(); mem_cgroup_charge_statistics(memcg, -nr_entries); + memcg_stats_unlock(); - memcg_check_events(memcg, page_to_nid(page)); + memcg_check_events(memcg, folio_nid(folio)); css_put(&memcg->css); } diff --cc mm/memory-failure.c index 1434e0608d5a6,aa8236848949f..dcb6bb9cf7315 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@@ -1411,22 -1413,26 +1413,22 @@@ static bool hwpoison_user_mappings(stru if (kill) collect_procs(hpage, &tokill, flags & MF_ACTION_REQUIRED); - if (!PageHuge(hpage)) { - try_to_unmap(folio, ttu); + if (PageHuge(hpage) && !PageAnon(hpage)) { + /* + * For hugetlb pages in shared mappings, try_to_unmap + * could potentially call huge_pmd_unshare. Because of + * this, take semaphore in write mode here and set + * TTU_RMAP_LOCKED to indicate we have taken the lock + * at this higher level. + */ + mapping = hugetlb_page_mapping_lock_write(hpage); + if (mapping) { - try_to_unmap(hpage, ttu|TTU_RMAP_LOCKED); ++ try_to_unmap(folio, ttu|TTU_RMAP_LOCKED); + i_mmap_unlock_write(mapping); + } else + pr_info("Memory failure: %#lx: could not lock mapping for mapped huge page\n", pfn); } else { - try_to_unmap(hpage, ttu); - if (!PageAnon(hpage)) { - /* - * For hugetlb pages in shared mappings, try_to_unmap - * could potentially call huge_pmd_unshare. Because of - * this, take semaphore in write mode here and set - * TTU_RMAP_LOCKED to indicate we have taken the lock - * at this higher level. - */ - mapping = hugetlb_page_mapping_lock_write(hpage); - if (mapping) { - try_to_unmap(folio, ttu|TTU_RMAP_LOCKED); - i_mmap_unlock_write(mapping); - } else - pr_info("Memory failure: %#lx: could not lock mapping for mapped huge page\n", pfn); - } else { - try_to_unmap(folio, ttu); - } ++ try_to_unmap(folio, ttu); } unmap_success = !page_mapped(hpage); diff --cc mm/memory.c index e0f3410fa70cb,53bd9e5f2e33a..7c40850b7124e --- a/mm/memory.c +++ b/mm/memory.c @@@ -1403,32 -1388,32 +1400,32 @@@ again entry = pte_to_swp_entry(ptent); if (is_device_private_entry(entry) || is_device_exclusive_entry(entry)) { - struct page *page = pfn_swap_entry_to_page(entry); - - if (unlikely(zap_skip_check_mapping(details, page))) + page = pfn_swap_entry_to_page(entry); + if (unlikely(!should_zap_page(details, page))) continue; - pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); rss[mm_counter(page)]--; if (is_device_private_entry(entry)) - page_remove_rmap(page, false); + page_remove_rmap(page, vma, false); put_page(page); - continue; - } - - /* If details->check_mapping, we leave swap entries. */ - if (unlikely(details)) - continue; - - if (!non_swap_entry(entry)) + } else if (!non_swap_entry(entry)) { + /* Genuine swap entry, hence a private anon page */ + if (!should_zap_cows(details)) + continue; rss[MM_SWAPENTS]--; - else if (is_migration_entry(entry)) { - struct page *page; - + if (unlikely(!free_swap_and_cache(entry))) + print_bad_pte(vma, addr, ptent, NULL); + } else if (is_migration_entry(entry)) { page = pfn_swap_entry_to_page(entry); + if (!should_zap_page(details, page)) + continue; rss[mm_counter(page)]--; + } else if (is_hwpoison_entry(entry)) { + if (!should_zap_cows(details)) + continue; + } else { + /* We should have covered all the swap entry types */ + WARN_ON_ONCE(1); } - if (unlikely(!free_swap_and_cache(entry))) - print_bad_pte(vma, addr, ptent, NULL); pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); } while (pte++, addr += PAGE_SIZE, addr != end); diff --cc mm/vmscan.c index 499fa86e754a0,7db5d0237333b..1678802e03e78 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@@ -986,12 -985,23 +986,12 @@@ static inline int is_page_cache_freeabl * that isolated the page, the page cache and optional buffer * heads at page->private. */ - int page_cache_pins = thp_nr_pages(page); - return page_count(page) - page_has_private(page) == 1 + page_cache_pins; + return folio_ref_count(folio) - folio_test_private(folio) == + 1 + folio_nr_pages(folio); } -static int may_write_to_inode(struct inode *inode) -{ - if (current->flags & PF_SWAPWRITE) - return 1; - if (!inode_write_congested(inode)) - return 1; - if (inode_to_bdi(inode) == current->backing_dev_info) - return 1; - return 0; -} - /* - * We detected a synchronous write error writing a page out. Probably + * We detected a synchronous write error writing a folio out. Probably * -ENOSPC. We need to propagate that into the address_space for a subsequent * fsync(), msync() or close(). * @@@ -1191,8 -1201,10 +1191,8 @@@ static pageout_t pageout(struct folio * } if (mapping->a_ops->writepage == NULL) return PAGE_ACTIVATE; - if (!may_write_to_inode(mapping->host)) - return PAGE_KEEP; - if (clear_page_dirty_for_io(page)) { + if (folio_clear_dirty_for_io(folio)) { int res; struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE, @@@ -1384,9 -1402,9 +1390,9 @@@ static enum page_references folio_check if (referenced_ptes) { /* - * All mapped pages start out with page table + * All mapped folios start out with page table * references from the instantiating fault, so we need - * to look twice if a mapped file/anon page is used more - * to look twice if a mapped file folio is used more ++ * to look twice if a mapped file/anon folio is used more * than once. * * Mark it and spare it for another trip around the @@@ -1566,8 -1586,10 +1574,8 @@@ retry * end of the LRU a second time. */ mapping = page_mapping(page); - if (((dirty || writeback) && mapping && - inode_write_congested(mapping->host)) || - (writeback && PageReclaim(page))) + if (writeback && PageReclaim(page)) - stat->nr_congested++; + stat->nr_congested += nr_pages; /* * If a page at the tail of the LRU is under writeback, there