mm, pcp: reduce detecting time of consecutive high order page freeing

author Huang Ying <ying.huang@intel.com>

Mon, 16 Oct 2023 05:30:02 +0000 (13:30 +0800)

committer Andrew Morton <akpm@linux-foundation.org>

Wed, 25 Oct 2023 23:47:11 +0000 (16:47 -0700)
author Huang Ying <ying.huang@intel.com>
Mon, 16 Oct 2023 05:30:02 +0000 (13:30 +0800)
committer Andrew Morton <akpm@linux-foundation.org>
Wed, 25 Oct 2023 23:47:11 +0000 (16:47 -0700)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h

index b92ab001e146856c1591b4e6cf4910430037820e..3c25226beeed4731616f640a4b9d9d5ae05f0054 100644 (file)
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -698,10 +698,10 @@ struct per_cpu_pages {
         int batch;              /* chunk size for buddy add/remove */
         u8 flags;               /* protected by pcp->lock */
         u8 alloc_factor;        /* batch scaling factor during allocate */
-       u8 free_factor;         /* batch scaling factor during free */
  #ifdef CONFIG_NUMA
         u8 expire;              /* When 0, remote pagesets are drained */
  #endif
+       short free_count;       /* consecutive free count */
  
         /* Lists of pages, one per migrate type stored on the pcp-lists */
         struct list_head lists[NR_PCP_LISTS];
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 58ab8389da05a13c046bc818c71f35d900d1e339..d52718284029450371aacec26f0277d4ec301ee0 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2369,13 +2369,10 @@ static int nr_pcp_free(struct per_cpu_pages *pcp, int batch, int high, bool free
         max_nr_free = high - batch;
  
         /*
-        * Double the number of pages freed each time there is subsequent
-        * freeing of pages without any allocation.
+        * Increase the batch number to the number of the consecutive
+        * freed pages to reduce zone lock contention.
          */
-       batch <<= pcp->free_factor;
-       if (batch <= max_nr_free && pcp->free_factor < CONFIG_PCP_BATCH_SCALE_MAX)
-               pcp->free_factor++;
-       batch = clamp(batch, min_nr_free, max_nr_free);
+       batch = clamp_t(int, pcp->free_count, min_nr_free, max_nr_free);
  
         return batch;
  }
@@ -2403,7 +2400,9 @@ static int nr_pcp_high(struct per_cpu_pages *pcp, struct zone *zone,
          * stored on pcp lists
          */
         if (test_bit(ZONE_RECLAIM_ACTIVE, &zone->flags)) {
-               pcp->high = max(high - (batch << pcp->free_factor), high_min);
+               int free_count = max_t(int, pcp->free_count, batch);
+
+               pcp->high = max(high - free_count, high_min);
                 return min(batch << 2, pcp->high);
         }
  
@@ -2411,10 +2410,12 @@ static int nr_pcp_high(struct per_cpu_pages *pcp, struct zone *zone,
                 return high;
  
         if (test_bit(ZONE_BELOW_HIGH, &zone->flags)) {
-               pcp->high = max(high - (batch << pcp->free_factor), high_min);
+               int free_count = max_t(int, pcp->free_count, batch);
+
+               pcp->high = max(high - free_count, high_min);
                 high = max(pcp->count, high_min);
         } else if (pcp->count >= high) {
-               int need_high = (batch << pcp->free_factor) + batch;
+               int need_high = pcp->free_count + batch;
  
                 /* pcp->high should be large enough to hold batch freed pages */
                 if (pcp->high < need_high)
@@ -2451,7 +2452,7 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp,
          * stops will be drained from vmstat refresh context.
          */
         if (order && order <= PAGE_ALLOC_COSTLY_ORDER) {
-               free_high = (pcp->free_factor &&
+               free_high = (pcp->free_count >= batch &&
                              (pcp->flags & PCPF_PREV_FREE_HIGH_ORDER) &&
                              (!(pcp->flags & PCPF_FREE_HIGH_BATCH) ||
                               pcp->count >= READ_ONCE(batch)));
@@ -2459,6 +2460,8 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp,
         } else if (pcp->flags & PCPF_PREV_FREE_HIGH_ORDER) {
                 pcp->flags &= ~PCPF_PREV_FREE_HIGH_ORDER;
         }
+       if (pcp->free_count < (batch << CONFIG_PCP_BATCH_SCALE_MAX))
+               pcp->free_count += (1 << order);
         high = nr_pcp_high(pcp, zone, batch, free_high);
         if (pcp->count >= high) {
                 free_pcppages_bulk(zone, nr_pcp_free(pcp, batch, high, free_high),
@@ -2855,7 +2858,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
          * See nr_pcp_free() where free_factor is increased for subsequent
          * frees.
          */
-       pcp->free_factor >>= 1;
+       pcp->free_count >>= 1;
         list = &pcp->lists[order_to_pindex(migratetype, order)];
         page = __rmqueue_pcplist(zone, order, migratetype, alloc_flags, pcp, list);
         pcp_spin_unlock(pcp);
@@ -5488,7 +5491,7 @@ static void per_cpu_pages_init(struct per_cpu_pages *pcp, struct per_cpu_zonesta
         pcp->high_min = BOOT_PAGESET_HIGH;
         pcp->high_max = BOOT_PAGESET_HIGH;
         pcp->batch = BOOT_PAGESET_BATCH;
-       pcp->free_factor = 0;
+       pcp->free_count = 0;
  }
  
  static void __zone_set_pageset_high_and_batch(struct zone *zone, unsigned long high_min,
author	Huang Ying <ying.huang@intel.com>
	Mon, 16 Oct 2023 05:30:02 +0000 (13:30 +0800)
committer	Andrew Morton <akpm@linux-foundation.org>
	Wed, 25 Oct 2023 23:47:11 +0000 (16:47 -0700)
include/linux/mmzone.h		patch \| blob \| history
mm/page_alloc.c		patch \| blob \| history