mm: mlock: update mlock_pte_range to handle large folio

author Yin Fengwei <fengwei.yin@intel.com>

Mon, 18 Sep 2023 07:33:18 +0000 (15:33 +0800)

committer Andrew Morton <akpm@linux-foundation.org>

Wed, 4 Oct 2023 17:32:32 +0000 (10:32 -0700)
author Yin Fengwei <fengwei.yin@intel.com>
Mon, 18 Sep 2023 07:33:18 +0000 (15:33 +0800)
committer Andrew Morton <akpm@linux-foundation.org>
Wed, 4 Oct 2023 17:32:32 +0000 (10:32 -0700)
diff --git a/mm/mlock.c b/mm/mlock.c

index 06bdfab83b58af92f0abd43d068567de52d7f57d..42b6865f8f82848e0b230ed6389ea0c645b48367 100644 (file)
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -305,6 +305,58 @@ void munlock_folio(struct folio *folio)
         local_unlock(&mlock_fbatch.lock);
  }
  
+static inline unsigned int folio_mlock_step(struct folio *folio,
+               pte_t *pte, unsigned long addr, unsigned long end)
+{
+       unsigned int count, i, nr = folio_nr_pages(folio);
+       unsigned long pfn = folio_pfn(folio);
+       pte_t ptent = ptep_get(pte);
+
+       if (!folio_test_large(folio))
+               return 1;
+
+       count = pfn + nr - pte_pfn(ptent);
+       count = min_t(unsigned int, count, (end - addr) >> PAGE_SHIFT);
+
+       for (i = 0; i < count; i++, pte++) {
+               pte_t entry = ptep_get(pte);
+
+               if (!pte_present(entry))
+                       break;
+               if (pte_pfn(entry) - pfn >= nr)
+                       break;
+       }
+
+       return i;
+}
+
+static inline bool allow_mlock_munlock(struct folio *folio,
+               struct vm_area_struct *vma, unsigned long start,
+               unsigned long end, unsigned int step)
+{
+       /*
+        * For unlock, allow munlock large folio which is partially
+        * mapped to VMA. As it's possible that large folio is
+        * mlocked and VMA is split later.
+        *
+        * During memory pressure, such kind of large folio can
+        * be split. And the pages are not in VM_LOCKed VMA
+        * can be reclaimed.
+        */
+       if (!(vma->vm_flags & VM_LOCKED))
+               return true;
+
+       /* folio not in range [start, end), skip mlock */
+       if (!folio_within_range(folio, vma, start, end))
+               return false;
+
+       /* folio is not fully mapped, skip mlock */
+       if (step != folio_nr_pages(folio))
+               return false;
+
+       return true;
+}
+
  static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
                            unsigned long end, struct mm_walk *walk)
  
@@ -314,6 +366,8 @@ static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
         pte_t *start_pte, *pte;
         pte_t ptent;
         struct folio *folio;
+       unsigned int step = 1;
+       unsigned long start = addr;
  
         ptl = pmd_trans_huge_lock(pmd, vma);
         if (ptl) {
@@ -334,6 +388,7 @@ static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
                 walk->action = ACTION_AGAIN;
                 return 0;
         }
+
         for (pte = start_pte; addr != end; pte++, addr += PAGE_SIZE) {
                 ptent = ptep_get(pte);
                 if (!pte_present(ptent))
@@ -341,12 +396,19 @@ static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
                 folio = vm_normal_folio(vma, addr, ptent);
                 if (!folio || folio_is_zone_device(folio))
                         continue;
-               if (folio_test_large(folio))
-                       continue;
+
+               step = folio_mlock_step(folio, pte, addr, end);
+               if (!allow_mlock_munlock(folio, vma, start, end, step))
+                       goto next_entry;
+
                 if (vma->vm_flags & VM_LOCKED)
                         mlock_folio(folio);
                 else
                         munlock_folio(folio);
+
+next_entry:
+               pte += step - 1;
+               addr += (step - 1) << PAGE_SHIFT;
         }
         pte_unmap(start_pte);
  out:
author	Yin Fengwei <fengwei.yin@intel.com>
	Mon, 18 Sep 2023 07:33:18 +0000 (15:33 +0800)
committer	Andrew Morton <akpm@linux-foundation.org>
	Wed, 4 Oct 2023 17:32:32 +0000 (10:32 -0700)