From f0c0c115fb81940f4dba0644ac2a8a43b39c83f3 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Mon, 14 Dec 2020 19:07:17 -0800 Subject: [PATCH] mm: memcontrol: account pagetables per node For many workloads, pagetable consumption is significant and it makes sense to expose it in the memory.stat for the memory cgroups. However at the moment, the pagetables are accounted per-zone. Converting them to per-node and using the right interface will correctly account for the memory cgroups as well. [akpm@linux-foundation.org: export __mod_lruvec_page_state to modules for arch/mips/kvm/] Link: https://lkml.kernel.org/r/20201130212541.2781790-3-shakeelb@google.com Signed-off-by: Shakeel Butt Acked-by: Johannes Weiner Acked-by: Roman Gushchin Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/admin-guide/cgroup-v2.rst | 3 +++ arch/nds32/mm/mm-nds32.c | 6 +++--- drivers/base/node.c | 2 +- fs/proc/meminfo.c | 2 +- include/linux/mm.h | 8 ++++---- include/linux/mmzone.h | 2 +- mm/memcontrol.c | 2 ++ mm/page_alloc.c | 6 +++--- mm/vmstat.c | 2 +- 9 files changed, 19 insertions(+), 14 deletions(-) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 515bb13084a02..63521cd36ce53 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1274,6 +1274,9 @@ PAGE_SIZE multiple when read back. kernel_stack Amount of memory allocated to kernel stacks. + pagetables + Amount of memory allocated for page tables. + percpu(npn) Amount of memory used for storing per-cpu kernel data structures. diff --git a/arch/nds32/mm/mm-nds32.c b/arch/nds32/mm/mm-nds32.c index 55bec50ccc033..f2778f2b39f65 100644 --- a/arch/nds32/mm/mm-nds32.c +++ b/arch/nds32/mm/mm-nds32.c @@ -34,8 +34,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm) cpu_dcache_wb_range((unsigned long)new_pgd, (unsigned long)new_pgd + PTRS_PER_PGD * sizeof(pgd_t)); - inc_zone_page_state(virt_to_page((unsigned long *)new_pgd), - NR_PAGETABLE); + inc_lruvec_page_state(virt_to_page((unsigned long *)new_pgd), + NR_PAGETABLE); return new_pgd; } @@ -59,7 +59,7 @@ void pgd_free(struct mm_struct *mm, pgd_t * pgd) pte = pmd_page(*pmd); pmd_clear(pmd); - dec_zone_page_state(virt_to_page((unsigned long *)pgd), NR_PAGETABLE); + dec_lruvec_page_state(virt_to_page((unsigned long *)pgd), NR_PAGETABLE); pte_free(mm, pte); mm_dec_nr_ptes(mm); pmd_free(mm, pmd); diff --git a/drivers/base/node.c b/drivers/base/node.c index 6ffa470e29840..04f71c7bc3f83 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -450,7 +450,7 @@ static ssize_t node_read_meminfo(struct device *dev, #ifdef CONFIG_SHADOW_CALL_STACK nid, node_page_state(pgdat, NR_KERNEL_SCS_KB), #endif - nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)), + nid, K(node_page_state(pgdat, NR_PAGETABLE)), nid, 0UL, nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)), nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 887a5532e4499..d6fc746196255 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -107,7 +107,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) global_node_page_state(NR_KERNEL_SCS_KB)); #endif show_val_kb(m, "PageTables: ", - global_zone_page_state(NR_PAGETABLE)); + global_node_page_state(NR_PAGETABLE)); show_val_kb(m, "NFS_Unstable: ", 0); show_val_kb(m, "Bounce: ", diff --git a/include/linux/mm.h b/include/linux/mm.h index db6ae4d3fb4ed..5bbbf4aeee940 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2203,7 +2203,7 @@ static inline bool pgtable_pte_page_ctor(struct page *page) if (!ptlock_init(page)) return false; __SetPageTable(page); - inc_zone_page_state(page, NR_PAGETABLE); + inc_lruvec_page_state(page, NR_PAGETABLE); return true; } @@ -2211,7 +2211,7 @@ static inline void pgtable_pte_page_dtor(struct page *page) { ptlock_free(page); __ClearPageTable(page); - dec_zone_page_state(page, NR_PAGETABLE); + dec_lruvec_page_state(page, NR_PAGETABLE); } #define pte_offset_map_lock(mm, pmd, address, ptlp) \ @@ -2298,7 +2298,7 @@ static inline bool pgtable_pmd_page_ctor(struct page *page) if (!pmd_ptlock_init(page)) return false; __SetPageTable(page); - inc_zone_page_state(page, NR_PAGETABLE); + inc_lruvec_page_state(page, NR_PAGETABLE); return true; } @@ -2306,7 +2306,7 @@ static inline void pgtable_pmd_page_dtor(struct page *page) { pmd_ptlock_free(page); __ClearPageTable(page); - dec_zone_page_state(page, NR_PAGETABLE); + dec_lruvec_page_state(page, NR_PAGETABLE); } /* diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index fb3bf696c05e8..cca2a4443c9cd 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -152,7 +152,6 @@ enum zone_stat_item { NR_ZONE_UNEVICTABLE, NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ NR_MLOCK, /* mlock()ed pages found and moved off LRU */ - NR_PAGETABLE, /* used for pagetables */ /* Second 128 byte cacheline */ NR_BOUNCE, #if IS_ENABLED(CONFIG_ZSMALLOC) @@ -207,6 +206,7 @@ enum node_stat_item { #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) NR_KERNEL_SCS_KB, /* measured in KiB */ #endif + NR_PAGETABLE, /* used for pagetables */ NR_VM_NODE_STAT_ITEMS }; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 52837d68bbec8..b9419a3605eb4 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -869,6 +869,7 @@ void __mod_lruvec_page_state(struct page *page, enum node_stat_item idx, lruvec = mem_cgroup_lruvec(head->mem_cgroup, pgdat); __mod_lruvec_state(lruvec, idx, val); } +EXPORT_SYMBOL(__mod_lruvec_page_state); void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val) { @@ -1493,6 +1494,7 @@ static struct memory_stat memory_stats[] = { { "anon", PAGE_SIZE, NR_ANON_MAPPED }, { "file", PAGE_SIZE, NR_FILE_PAGES }, { "kernel_stack", 1024, NR_KERNEL_STACK_KB }, + { "pagetables", PAGE_SIZE, NR_PAGETABLE }, { "percpu", 1, MEMCG_PERCPU_B }, { "sock", PAGE_SIZE, MEMCG_SOCK }, { "shmem", PAGE_SIZE, NR_SHMEM }, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index eaa227a479e4a..743fb2bccecc6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5465,7 +5465,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B), global_node_page_state(NR_FILE_MAPPED), global_node_page_state(NR_SHMEM), - global_zone_page_state(NR_PAGETABLE), + global_node_page_state(NR_PAGETABLE), global_zone_page_state(NR_BOUNCE), global_zone_page_state(NR_FREE_PAGES), free_pcp, @@ -5497,6 +5497,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) #ifdef CONFIG_SHADOW_CALL_STACK " shadow_call_stack:%lukB" #endif + " pagetables:%lukB" " all_unreclaimable? %s" "\n", pgdat->node_id, @@ -5522,6 +5523,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) #ifdef CONFIG_SHADOW_CALL_STACK node_page_state(pgdat, NR_KERNEL_SCS_KB), #endif + K(node_page_state(pgdat, NR_PAGETABLE)), pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ? "yes" : "no"); } @@ -5553,7 +5555,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) " present:%lukB" " managed:%lukB" " mlocked:%lukB" - " pagetables:%lukB" " bounce:%lukB" " free_pcp:%lukB" " local_pcp:%ukB" @@ -5574,7 +5575,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) K(zone->present_pages), K(zone_managed_pages(zone)), K(zone_page_state(zone, NR_MLOCK)), - K(zone_page_state(zone, NR_PAGETABLE)), K(zone_page_state(zone, NR_BOUNCE)), K(free_pcp), K(this_cpu_read(zone->pageset->pcp.count)), diff --git a/mm/vmstat.c b/mm/vmstat.c index 698bc0bc18d14..da36e3b0aab24 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1157,7 +1157,6 @@ const char * const vmstat_text[] = { "nr_zone_unevictable", "nr_zone_write_pending", "nr_mlock", - "nr_page_table_pages", "nr_bounce", #if IS_ENABLED(CONFIG_ZSMALLOC) "nr_zspages", @@ -1215,6 +1214,7 @@ const char * const vmstat_text[] = { #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) "nr_shadow_call_stack", #endif + "nr_page_table_pages", /* enum writeback_stat_item counters */ "nr_dirty_threshold", -- 2.30.2