From: Linus Torvalds Date: Sat, 6 Nov 2021 21:08:17 +0000 (-0700) Subject: Merge branch 'akpm' (patches from Andrew) X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=512b7931ad0561ffe14265f9ff554a3c081b476b;p=linux.git Merge branch 'akpm' (patches from Andrew) Merge misc updates from Andrew Morton: "257 patches. Subsystems affected by this patch series: scripts, ocfs2, vfs, and mm (slab-generic, slab, slub, kconfig, dax, kasan, debug, pagecache, gup, swap, memcg, pagemap, mprotect, mremap, iomap, tracing, vmalloc, pagealloc, memory-failure, hugetlb, userfaultfd, vmscan, tools, memblock, oom-kill, hugetlbfs, migration, thp, readahead, nommu, ksm, vmstat, madvise, memory-hotplug, rmap, zsmalloc, highmem, zram, cleanups, kfence, and damon)" * emailed patches from Andrew Morton : (257 commits) mm/damon: remove return value from before_terminate callback mm/damon: fix a few spelling mistakes in comments and a pr_debug message mm/damon: simplify stop mechanism Docs/admin-guide/mm/pagemap: wordsmith page flags descriptions Docs/admin-guide/mm/damon/start: simplify the content Docs/admin-guide/mm/damon/start: fix a wrong link Docs/admin-guide/mm/damon/start: fix wrong example commands mm/damon/dbgfs: add adaptive_targets list check before enable monitor_on mm/damon: remove unnecessary variable initialization Documentation/admin-guide/mm/damon: add a document for DAMON_RECLAIM mm/damon: introduce DAMON-based Reclamation (DAMON_RECLAIM) selftests/damon: support watermarks mm/damon/dbgfs: support watermarks mm/damon/schemes: activate schemes based on a watermarks mechanism tools/selftests/damon: update for regions prioritization of schemes mm/damon/dbgfs: support prioritization weights mm/damon/vaddr,paddr: support pageout prioritization mm/damon/schemes: prioritize regions within the quotas mm/damon/selftests: support schemes quotas mm/damon/dbgfs: support quotas of schemes ... --- 512b7931ad0561ffe14265f9ff554a3c081b476b diff --cc arch/powerpc/include/asm/machdep.h index a68311077d320,d8a2ca0070823..9c3c9f04129ff --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@@ -29,9 -29,10 +29,9 @@@ struct machdep_calls char *name; #ifdef CONFIG_PPC64 #ifdef CONFIG_PM - void (*iommu_save)(void); void (*iommu_restore)(void); #endif - #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE + #ifdef CONFIG_MEMORY_HOTPLUG unsigned long (*memory_block_size)(void); #endif #endif /* CONFIG_PPC64 */ diff --cc arch/x86/Kconfig index b1d4b481fcdd6,b2fb68da6697a..ed322ac003435 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@@ -61,9 -61,8 +61,9 @@@ config X8 select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI select ARCH_32BIT_OFF_T if X86_32 select ARCH_CLOCKSOURCE_INIT + select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE select ARCH_ENABLE_HUGEPAGE_MIGRATION if X86_64 && HUGETLB_PAGE && MIGRATION - select ARCH_ENABLE_MEMORY_HOTPLUG if X86_64 || (X86_32 && HIGHMEM) + select ARCH_ENABLE_MEMORY_HOTPLUG if X86_64 select ARCH_ENABLE_MEMORY_HOTREMOVE if MEMORY_HOTPLUG select ARCH_ENABLE_SPLIT_PMD_PTLOCK if (PGTABLE_LEVELS > 2) && (X86_64 || X86_PAE) select ARCH_ENABLE_THP_MIGRATION if X86_64 && TRANSPARENT_HUGEPAGE diff --cc drivers/hwmon/occ/p9_sbe.c index e50243580269a,6c540b24b32f6..49b13cc01073a --- a/drivers/hwmon/occ/p9_sbe.c +++ b/drivers/hwmon/occ/p9_sbe.c @@@ -3,13 -3,10 +3,14 @@@ #include #include + #include #include +#include #include +#include #include +#include +#include #include "common.h" diff --cc include/linux/migrate.h index 0d2aeb9b0f66a,2d8130e05dc09..eeb818c4fc782 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@@ -57,10 -40,8 +40,12 @@@ extern int migrate_huge_page_move_mappi struct page *newpage, struct page *page); extern int migrate_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page, int extra_count); +void folio_migrate_flags(struct folio *newfolio, struct folio *folio); +void folio_migrate_copy(struct folio *newfolio, struct folio *folio); +int folio_migrate_mapping(struct address_space *mapping, + struct folio *newfolio, struct folio *folio, int extra_count); + + extern bool numa_demotion_enabled; #else static inline void putback_movable_pages(struct list_head *l) {} diff --cc include/linux/mm.h index a62b91e769c89,b1720aa637276..a7e4a9e7d807a --- a/include/linux/mm.h +++ b/include/linux/mm.h @@@ -902,8 -873,10 +868,10 @@@ void __put_page(struct page *page) void put_pages_list(struct list_head *pages); void split_page(struct page *page, unsigned int order); -void copy_huge_page(struct page *dst, struct page *src); +void folio_copy(struct folio *dst, struct folio *src); + unsigned long nr_free_buffer_pages(void); + /* * Compound pages have a destructor function. Provide a * prototype for that function and accessor functions. diff --cc lib/bootconfig.c index 70e0d52ffd24c,547558d80e64c..74f3201ab8e59 --- a/lib/bootconfig.c +++ b/lib/bootconfig.c @@@ -42,50 -34,6 +42,50 @@@ static int xbc_err_pos __initdata static int open_brace[XBC_DEPTH_MAX] __initdata; static int brace_index __initdata; +#ifdef __KERNEL__ +static inline void * __init xbc_alloc_mem(size_t size) +{ + return memblock_alloc(size, SMP_CACHE_BYTES); +} + +static inline void __init xbc_free_mem(void *addr, size_t size) +{ - memblock_free_ptr(addr, size); ++ memblock_free(addr, size); +} + +#else /* !__KERNEL__ */ + +static inline void *xbc_alloc_mem(size_t size) +{ + return malloc(size); +} + +static inline void xbc_free_mem(void *addr, size_t size) +{ + free(addr); +} +#endif +/** + * xbc_get_info() - Get the information of loaded boot config + * @node_size: A pointer to store the number of nodes. + * @data_size: A pointer to store the size of bootconfig data. + * + * Get the number of used nodes in @node_size if it is not NULL, + * and the size of bootconfig data in @data_size if it is not NULL. + * Return 0 if the boot config is initialized, or return -ENODEV. + */ +int __init xbc_get_info(int *node_size, size_t *data_size) +{ + if (!xbc_data) + return -ENODEV; + + if (node_size) + *node_size = xbc_node_num; + if (data_size) + *data_size = xbc_data_size; + return 0; +} + static int __init xbc_parse_error(const char *msg, const char *p) { xbc_err_msg = msg; diff --cc mm/filemap.c index bfcef6ff7a275,b6140debc2da3..615512caa0b5d --- a/mm/filemap.c +++ b/mm/filemap.c @@@ -1591,10 -1611,11 +1598,11 @@@ void folio_end_writeback(struct folio * BUG(); smp_mb__after_atomic(); - wake_up_page(page, PG_writeback); - acct_reclaim_writeback(page); - put_page(page); + folio_wake(folio, PG_writeback); ++ acct_reclaim_writeback(folio); + folio_put(folio); } -EXPORT_SYMBOL(end_page_writeback); +EXPORT_SYMBOL(folio_end_writeback); /* * After completing I/O on a page, call this routine to update the page diff --cc mm/internal.h index b1001ebeb286b,f3de3a2f3e30b..3b79a5c9427a8 --- a/mm/internal.h +++ b/mm/internal.h @@@ -34,16 -34,27 +34,36 @@@ void page_writeback_init(void); -void __acct_reclaim_writeback(pg_data_t *pgdat, struct page *page, +static inline void *folio_raw_mapping(struct folio *folio) +{ + unsigned long mapping = (unsigned long)folio->mapping; + + return (void *)(mapping & ~PAGE_MAPPING_FLAGS); +} + ++void __acct_reclaim_writeback(pg_data_t *pgdat, struct folio *folio, + int nr_throttled); -static inline void acct_reclaim_writeback(struct page *page) ++static inline void acct_reclaim_writeback(struct folio *folio) + { - pg_data_t *pgdat = page_pgdat(page); ++ pg_data_t *pgdat = folio_pgdat(folio); + int nr_throttled = atomic_read(&pgdat->nr_writeback_throttled); + + if (nr_throttled) - __acct_reclaim_writeback(pgdat, page, nr_throttled); ++ __acct_reclaim_writeback(pgdat, folio, nr_throttled); + } + + static inline void wake_throttle_isolated(pg_data_t *pgdat) + { + wait_queue_head_t *wqh; + + wqh = &pgdat->reclaim_wait[VMSCAN_THROTTLE_ISOLATED]; + if (waitqueue_active(wqh)) + wake_up(wqh); + } + vm_fault_t do_swap_page(struct vm_fault *vmf); +void folio_rotate_reclaimable(struct folio *folio); +bool __folio_end_writeback(struct folio *folio); void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, unsigned long floor, unsigned long ceiling); diff --cc mm/memcontrol.c index 8dab23a71fc4f,965b3cf7046b3..508bcea7df560 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@@ -2746,11 -2769,10 +2783,10 @@@ static inline void cancel_charge(struc if (do_memsw_account()) page_counter_uncharge(&memcg->memsw, nr_pages); } - #endif -static void commit_charge(struct page *page, struct mem_cgroup *memcg) +static void commit_charge(struct folio *folio, struct mem_cgroup *memcg) { - VM_BUG_ON_PAGE(page_memcg(page), page); + VM_BUG_ON_FOLIO(folio_memcg(folio), folio); /* * Any of the following ensures page's memcg stability: * diff --cc mm/mempolicy.c index f4b4be7af4d3a,f1080e0a566a7..10e9c87260ede --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@@ -2196,16 -2196,88 +2196,98 @@@ struct page *alloc_pages(gfp_t gfp, uns } EXPORT_SYMBOL(alloc_pages); +struct folio *folio_alloc(gfp_t gfp, unsigned order) +{ + struct page *page = alloc_pages(gfp | __GFP_COMP, order); + + if (page && order > 1) + prep_transhuge_page(page); + return (struct folio *)page; +} +EXPORT_SYMBOL(folio_alloc); + + static unsigned long alloc_pages_bulk_array_interleave(gfp_t gfp, + struct mempolicy *pol, unsigned long nr_pages, + struct page **page_array) + { + int nodes; + unsigned long nr_pages_per_node; + int delta; + int i; + unsigned long nr_allocated; + unsigned long total_allocated = 0; + + nodes = nodes_weight(pol->nodes); + nr_pages_per_node = nr_pages / nodes; + delta = nr_pages - nodes * nr_pages_per_node; + + for (i = 0; i < nodes; i++) { + if (delta) { + nr_allocated = __alloc_pages_bulk(gfp, + interleave_nodes(pol), NULL, + nr_pages_per_node + 1, NULL, + page_array); + delta--; + } else { + nr_allocated = __alloc_pages_bulk(gfp, + interleave_nodes(pol), NULL, + nr_pages_per_node, NULL, page_array); + } + + page_array += nr_allocated; + total_allocated += nr_allocated; + } + + return total_allocated; + } + + static unsigned long alloc_pages_bulk_array_preferred_many(gfp_t gfp, int nid, + struct mempolicy *pol, unsigned long nr_pages, + struct page **page_array) + { + gfp_t preferred_gfp; + unsigned long nr_allocated = 0; + + preferred_gfp = gfp | __GFP_NOWARN; + preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL); + + nr_allocated = __alloc_pages_bulk(preferred_gfp, nid, &pol->nodes, + nr_pages, NULL, page_array); + + if (nr_allocated < nr_pages) + nr_allocated += __alloc_pages_bulk(gfp, numa_node_id(), NULL, + nr_pages - nr_allocated, NULL, + page_array + nr_allocated); + return nr_allocated; + } + + /* alloc pages bulk and mempolicy should be considered at the + * same time in some situation such as vmalloc. + * + * It can accelerate memory allocation especially interleaving + * allocate memory. + */ + unsigned long alloc_pages_bulk_array_mempolicy(gfp_t gfp, + unsigned long nr_pages, struct page **page_array) + { + struct mempolicy *pol = &default_policy; + + if (!in_interrupt() && !(gfp & __GFP_THISNODE)) + pol = get_task_policy(current); + + if (pol->mode == MPOL_INTERLEAVE) + return alloc_pages_bulk_array_interleave(gfp, pol, + nr_pages, page_array); + + if (pol->mode == MPOL_PREFERRED_MANY) + return alloc_pages_bulk_array_preferred_many(gfp, + numa_node_id(), pol, nr_pages, page_array); + + return __alloc_pages_bulk(gfp, policy_node(gfp, pol, numa_node_id()), + policy_nodemask(gfp, pol), nr_pages, NULL, + page_array); + } + int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) { struct mempolicy *pol = mpol_dup(vma_policy(src)); diff --cc mm/vmscan.c index 71f178f85f5b0,41f5f6007c30b..ef4a6dc7f0005 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@@ -1021,6 -1006,91 +1021,91 @@@ static void handle_write_error(struct a unlock_page(page); } + void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason) + { + wait_queue_head_t *wqh = &pgdat->reclaim_wait[reason]; + long timeout, ret; + DEFINE_WAIT(wait); + + /* + * Do not throttle IO workers, kthreads other than kswapd or + * workqueues. They may be required for reclaim to make + * forward progress (e.g. journalling workqueues or kthreads). + */ + if (!current_is_kswapd() && + current->flags & (PF_IO_WORKER|PF_KTHREAD)) + return; + + /* + * These figures are pulled out of thin air. + * VMSCAN_THROTTLE_ISOLATED is a transient condition based on too many + * parallel reclaimers which is a short-lived event so the timeout is + * short. Failing to make progress or waiting on writeback are + * potentially long-lived events so use a longer timeout. This is shaky + * logic as a failure to make progress could be due to anything from + * writeback to a slow device to excessive references pages at the tail + * of the inactive LRU. + */ + switch(reason) { + case VMSCAN_THROTTLE_WRITEBACK: + timeout = HZ/10; + + if (atomic_inc_return(&pgdat->nr_writeback_throttled) == 1) { + WRITE_ONCE(pgdat->nr_reclaim_start, + node_page_state(pgdat, NR_THROTTLED_WRITTEN)); + } + + break; + case VMSCAN_THROTTLE_NOPROGRESS: + timeout = HZ/2; + break; + case VMSCAN_THROTTLE_ISOLATED: + timeout = HZ/50; + break; + default: + WARN_ON_ONCE(1); + timeout = HZ; + break; + } + + prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); + ret = schedule_timeout(timeout); + finish_wait(wqh, &wait); + + if (reason == VMSCAN_THROTTLE_WRITEBACK) + atomic_dec(&pgdat->nr_writeback_throttled); + + trace_mm_vmscan_throttled(pgdat->node_id, jiffies_to_usecs(timeout), + jiffies_to_usecs(timeout - ret), + reason); + } + + /* + * Account for pages written if tasks are throttled waiting on dirty + * pages to clean. If enough pages have been cleaned since throttling + * started then wakeup the throttled tasks. + */ -void __acct_reclaim_writeback(pg_data_t *pgdat, struct page *page, ++void __acct_reclaim_writeback(pg_data_t *pgdat, struct folio *folio, + int nr_throttled) + { + unsigned long nr_written; + - inc_node_page_state(page, NR_THROTTLED_WRITTEN); ++ node_stat_add_folio(folio, NR_THROTTLED_WRITTEN); + + /* + * This is an inaccurate read as the per-cpu deltas may not + * be synchronised. However, given that the system is + * writeback throttled, it is not worth taking the penalty + * of getting an accurate count. At worst, the throttle + * timeout guarantees forward progress. + */ + nr_written = node_page_state(pgdat, NR_THROTTLED_WRITTEN) - + READ_ONCE(pgdat->nr_reclaim_start); + + if (nr_written > SWAP_CLUSTER_MAX * nr_throttled) + wake_up(&pgdat->reclaim_wait[VMSCAN_THROTTLE_WRITEBACK]); + } + /* possible outcome of pageout() */ typedef enum { /* failed to write page out, page is locked */