percpu: optimize locking in pcpu_balance_workfn()
authorRoman Gushchin <guro@fb.com>
Thu, 17 Jun 2021 19:03:22 +0000 (12:03 -0700)
committerDennis Zhou <dennis@kernel.org>
Thu, 17 Jun 2021 23:05:24 +0000 (23:05 +0000)
pcpu_balance_workfn() unconditionally calls pcpu_balance_free(),
pcpu_reclaim_populated(), pcpu_balance_populated() and
pcpu_balance_free() again.

Each call to pcpu_balance_free() and pcpu_reclaim_populated() will
cause at least one acquisition of the pcpu_lock. So even if the
balancing was scheduled because of a failed atomic allocation,
pcpu_lock will be acquired at least 4 times. This obviously
increases the contention on the pcpu_lock.

To optimize the scheme let's grab the pcpu_lock on the upper level
(in pcpu_balance_workfn()) and keep it generally locked for the whole
duration of the scheduled work, but release conditionally to perform
any slow operations like chunk (de)population and creation of new
chunks.

Signed-off-by: Roman Gushchin <guro@fb.com>
Signed-off-by: Dennis Zhou <dennis@kernel.org>
mm/percpu.c

index 6667dfa0fcec16c4c181540bbd7136b9430cbd35..f4c83217f2175dfb5cf76a466ee11243fe837850 100644 (file)
@@ -1980,6 +1980,9 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
  * If empty_only is %false, reclaim all fully free chunks regardless of the
  * number of populated pages.  Otherwise, only reclaim chunks that have no
  * populated pages.
+ *
+ * CONTEXT:
+ * pcpu_lock (can be dropped temporarily)
  */
 static void pcpu_balance_free(bool empty_only)
 {
@@ -1987,12 +1990,12 @@ static void pcpu_balance_free(bool empty_only)
        struct list_head *free_head = &pcpu_chunk_lists[pcpu_free_slot];
        struct pcpu_chunk *chunk, *next;
 
+       lockdep_assert_held(&pcpu_lock);
+
        /*
         * There's no reason to keep around multiple unused chunks and VM
         * areas can be scarce.  Destroy all free chunks except for one.
         */
-       spin_lock_irq(&pcpu_lock);
-
        list_for_each_entry_safe(chunk, next, free_head, list) {
                WARN_ON(chunk->immutable);
 
@@ -2004,8 +2007,10 @@ static void pcpu_balance_free(bool empty_only)
                        list_move(&chunk->list, &to_free);
        }
 
-       spin_unlock_irq(&pcpu_lock);
+       if (list_empty(&to_free))
+               return;
 
+       spin_unlock_irq(&pcpu_lock);
        list_for_each_entry_safe(chunk, next, &to_free, list) {
                unsigned int rs, re;
 
@@ -2019,6 +2024,7 @@ static void pcpu_balance_free(bool empty_only)
                pcpu_destroy_chunk(chunk);
                cond_resched();
        }
+       spin_lock_irq(&pcpu_lock);
 }
 
 /**
@@ -2029,6 +2035,9 @@ static void pcpu_balance_free(bool empty_only)
  * OOM killer to be triggered.  We should avoid doing so until an actual
  * allocation causes the failure as it is possible that requests can be
  * serviced from already backed regions.
+ *
+ * CONTEXT:
+ * pcpu_lock (can be dropped temporarily)
  */
 static void pcpu_balance_populated(void)
 {
@@ -2037,6 +2046,8 @@ static void pcpu_balance_populated(void)
        struct pcpu_chunk *chunk;
        int slot, nr_to_pop, ret;
 
+       lockdep_assert_held(&pcpu_lock);
+
        /*
         * Ensure there are certain number of free populated pages for
         * atomic allocs.  Fill up from the most packed so that atomic
@@ -2064,13 +2075,11 @@ retry_pop:
                if (!nr_to_pop)
                        break;
 
-               spin_lock_irq(&pcpu_lock);
                list_for_each_entry(chunk, &pcpu_chunk_lists[slot], list) {
                        nr_unpop = chunk->nr_pages - chunk->nr_populated;
                        if (nr_unpop)
                                break;
                }
-               spin_unlock_irq(&pcpu_lock);
 
                if (!nr_unpop)
                        continue;
@@ -2080,12 +2089,13 @@ retry_pop:
                                             chunk->nr_pages) {
                        int nr = min_t(int, re - rs, nr_to_pop);
 
+                       spin_unlock_irq(&pcpu_lock);
                        ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp);
+                       cond_resched();
+                       spin_lock_irq(&pcpu_lock);
                        if (!ret) {
                                nr_to_pop -= nr;
-                               spin_lock_irq(&pcpu_lock);
                                pcpu_chunk_populated(chunk, rs, rs + nr);
-                               spin_unlock_irq(&pcpu_lock);
                        } else {
                                nr_to_pop = 0;
                        }
@@ -2097,11 +2107,12 @@ retry_pop:
 
        if (nr_to_pop) {
                /* ran out of chunks to populate, create a new one and retry */
+               spin_unlock_irq(&pcpu_lock);
                chunk = pcpu_create_chunk(gfp);
+               cond_resched();
+               spin_lock_irq(&pcpu_lock);
                if (chunk) {
-                       spin_lock_irq(&pcpu_lock);
                        pcpu_chunk_relocate(chunk, -1);
-                       spin_unlock_irq(&pcpu_lock);
                        goto retry_pop;
                }
        }
@@ -2117,6 +2128,10 @@ retry_pop:
  * populated pages threshold, reintegrate the chunk if it has empty free pages.
  * Each chunk is scanned in the reverse order to keep populated pages close to
  * the beginning of the chunk.
+ *
+ * CONTEXT:
+ * pcpu_lock (can be dropped temporarily)
+ *
  */
 static void pcpu_reclaim_populated(void)
 {
@@ -2124,7 +2139,7 @@ static void pcpu_reclaim_populated(void)
        struct pcpu_block_md *block;
        int i, end;
 
-       spin_lock_irq(&pcpu_lock);
+       lockdep_assert_held(&pcpu_lock);
 
 restart:
        /*
@@ -2190,8 +2205,6 @@ restart:
                        list_move(&chunk->list,
                                  &pcpu_chunk_lists[pcpu_sidelined_slot]);
        }
-
-       spin_unlock_irq(&pcpu_lock);
 }
 
 /**
@@ -2212,10 +2225,14 @@ static void pcpu_balance_workfn(struct work_struct *work)
         * appropriate.
         */
        mutex_lock(&pcpu_alloc_mutex);
+       spin_lock_irq(&pcpu_lock);
+
        pcpu_balance_free(false);
        pcpu_reclaim_populated();
        pcpu_balance_populated();
        pcpu_balance_free(true);
+
+       spin_unlock_irq(&pcpu_lock);
        mutex_unlock(&pcpu_alloc_mutex);
 }