memcg: dynamically allocate lruvec_stats
authorShakeel Butt <shakeel.butt@linux.dev>
Wed, 1 May 2024 17:26:11 +0000 (10:26 -0700)
committerAndrew Morton <akpm@linux-foundation.org>
Tue, 7 May 2024 17:36:59 +0000 (10:36 -0700)
To decouple the dependency of lruvec_stats on NR_VM_NODE_STAT_ITEMS, we
need to dynamically allocate lruvec_stats in the mem_cgroup_per_node
structure.  Also move the definition of lruvec_stats_percpu and
lruvec_stats and related functions to the memcontrol.c to facilitate later
patches.  No functional changes in the patch.

Link: https://lkml.kernel.org/r/20240501172617.678560-3-shakeel.butt@linux.dev
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
Reviewed-by: Yosry Ahmed <yosryahmed@google.com>
Reviewed-by: T.J. Mercier <tjmercier@google.com>
Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/memcontrol.h
mm/memcontrol.c

index 9aba0d0462ca69102396871300f48ac9d7ad3a38..ab8a6e884375d366e766965b43801f8c7b1a9d0a 100644 (file)
@@ -83,6 +83,8 @@ enum mem_cgroup_events_target {
 
 struct memcg_vmstats_percpu;
 struct memcg_vmstats;
+struct lruvec_stats_percpu;
+struct lruvec_stats;
 
 struct mem_cgroup_reclaim_iter {
        struct mem_cgroup *position;
@@ -90,25 +92,6 @@ struct mem_cgroup_reclaim_iter {
        unsigned int generation;
 };
 
-struct lruvec_stats_percpu {
-       /* Local (CPU and cgroup) state */
-       long state[NR_VM_NODE_STAT_ITEMS];
-
-       /* Delta calculation for lockless upward propagation */
-       long state_prev[NR_VM_NODE_STAT_ITEMS];
-};
-
-struct lruvec_stats {
-       /* Aggregated (CPU and subtree) state */
-       long state[NR_VM_NODE_STAT_ITEMS];
-
-       /* Non-hierarchical (CPU aggregated) state */
-       long state_local[NR_VM_NODE_STAT_ITEMS];
-
-       /* Pending child counts during tree propagation */
-       long state_pending[NR_VM_NODE_STAT_ITEMS];
-};
-
 /*
  * per-node information in memory controller.
  */
@@ -116,7 +99,7 @@ struct mem_cgroup_per_node {
        struct lruvec           lruvec;
 
        struct lruvec_stats_percpu __percpu     *lruvec_stats_percpu;
-       struct lruvec_stats                     lruvec_stats;
+       struct lruvec_stats                     *lruvec_stats;
 
        unsigned long           lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
 
@@ -1037,42 +1020,9 @@ static inline void mod_memcg_page_state(struct page *page,
 }
 
 unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx);
-
-static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
-                                             enum node_stat_item idx)
-{
-       struct mem_cgroup_per_node *pn;
-       long x;
-
-       if (mem_cgroup_disabled())
-               return node_page_state(lruvec_pgdat(lruvec), idx);
-
-       pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
-       x = READ_ONCE(pn->lruvec_stats.state[idx]);
-#ifdef CONFIG_SMP
-       if (x < 0)
-               x = 0;
-#endif
-       return x;
-}
-
-static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
-                                                   enum node_stat_item idx)
-{
-       struct mem_cgroup_per_node *pn;
-       long x = 0;
-
-       if (mem_cgroup_disabled())
-               return node_page_state(lruvec_pgdat(lruvec), idx);
-
-       pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
-       x = READ_ONCE(pn->lruvec_stats.state_local[idx]);
-#ifdef CONFIG_SMP
-       if (x < 0)
-               x = 0;
-#endif
-       return x;
-}
+unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx);
+unsigned long lruvec_page_state_local(struct lruvec *lruvec,
+                                     enum node_stat_item idx);
 
 void mem_cgroup_flush_stats(struct mem_cgroup *memcg);
 void mem_cgroup_flush_stats_ratelimited(struct mem_cgroup *memcg);
index 96b72a6e04bac54d4c99d05be3307bb844bfc4b0..cf6819a3310dd1323eefee7b3a0429e1f95eb8ec 100644 (file)
@@ -575,6 +575,60 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
        return mz;
 }
 
+struct lruvec_stats_percpu {
+       /* Local (CPU and cgroup) state */
+       long state[NR_VM_NODE_STAT_ITEMS];
+
+       /* Delta calculation for lockless upward propagation */
+       long state_prev[NR_VM_NODE_STAT_ITEMS];
+};
+
+struct lruvec_stats {
+       /* Aggregated (CPU and subtree) state */
+       long state[NR_VM_NODE_STAT_ITEMS];
+
+       /* Non-hierarchical (CPU aggregated) state */
+       long state_local[NR_VM_NODE_STAT_ITEMS];
+
+       /* Pending child counts during tree propagation */
+       long state_pending[NR_VM_NODE_STAT_ITEMS];
+};
+
+unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx)
+{
+       struct mem_cgroup_per_node *pn;
+       long x;
+
+       if (mem_cgroup_disabled())
+               return node_page_state(lruvec_pgdat(lruvec), idx);
+
+       pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
+       x = READ_ONCE(pn->lruvec_stats->state[idx]);
+#ifdef CONFIG_SMP
+       if (x < 0)
+               x = 0;
+#endif
+       return x;
+}
+
+unsigned long lruvec_page_state_local(struct lruvec *lruvec,
+                                     enum node_stat_item idx)
+{
+       struct mem_cgroup_per_node *pn;
+       long x = 0;
+
+       if (mem_cgroup_disabled())
+               return node_page_state(lruvec_pgdat(lruvec), idx);
+
+       pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
+       x = READ_ONCE(pn->lruvec_stats->state_local[idx]);
+#ifdef CONFIG_SMP
+       if (x < 0)
+               x = 0;
+#endif
+       return x;
+}
+
 /* Subset of vm_event_item to report for memcg event stats */
 static const unsigned int memcg_vm_event_stat[] = {
        PGPGIN,
@@ -5486,18 +5540,25 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
        if (!pn)
                return 1;
 
+       pn->lruvec_stats = kzalloc_node(sizeof(struct lruvec_stats), GFP_KERNEL,
+                                       node);
+       if (!pn->lruvec_stats)
+               goto fail;
+
        pn->lruvec_stats_percpu = alloc_percpu_gfp(struct lruvec_stats_percpu,
                                                   GFP_KERNEL_ACCOUNT);
-       if (!pn->lruvec_stats_percpu) {
-               kfree(pn);
-               return 1;
-       }
+       if (!pn->lruvec_stats_percpu)
+               goto fail;
 
        lruvec_init(&pn->lruvec);
        pn->memcg = memcg;
 
        memcg->nodeinfo[node] = pn;
        return 0;
+fail:
+       kfree(pn->lruvec_stats);
+       kfree(pn);
+       return 1;
 }
 
 static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
@@ -5508,6 +5569,7 @@ static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
                return;
 
        free_percpu(pn->lruvec_stats_percpu);
+       kfree(pn->lruvec_stats);
        kfree(pn);
 }
 
@@ -5860,18 +5922,19 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
 
        for_each_node_state(nid, N_MEMORY) {
                struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid];
-               struct mem_cgroup_per_node *ppn = NULL;
+               struct lruvec_stats *lstats = pn->lruvec_stats;
+               struct lruvec_stats *plstats = NULL;
                struct lruvec_stats_percpu *lstatc;
 
                if (parent)
-                       ppn = parent->nodeinfo[nid];
+                       plstats = parent->nodeinfo[nid]->lruvec_stats;
 
                lstatc = per_cpu_ptr(pn->lruvec_stats_percpu, cpu);
 
                for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
-                       delta = pn->lruvec_stats.state_pending[i];
+                       delta = lstats->state_pending[i];
                        if (delta)
-                               pn->lruvec_stats.state_pending[i] = 0;
+                               lstats->state_pending[i] = 0;
 
                        delta_cpu = 0;
                        v = READ_ONCE(lstatc->state[i]);
@@ -5882,12 +5945,12 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
                        }
 
                        if (delta_cpu)
-                               pn->lruvec_stats.state_local[i] += delta_cpu;
+                               lstats->state_local[i] += delta_cpu;
 
                        if (delta) {
-                               pn->lruvec_stats.state[i] += delta;
-                               if (ppn)
-                                       ppn->lruvec_stats.state_pending[i] += delta;
+                               lstats->state[i] += delta;
+                               if (plstats)
+                                       plstats->state_pending[i] += delta;
                        }
                }
        }