mm: handling Non-LRU pages returned by vm_normal_pages

author Alex Sierra <alex.sierra@amd.com>

Fri, 15 Jul 2022 15:05:11 +0000 (10:05 -0500)

committer akpm <akpm@linux-foundation.org>

Mon, 18 Jul 2022 00:14:28 +0000 (17:14 -0700)
author Alex Sierra <alex.sierra@amd.com>
Fri, 15 Jul 2022 15:05:11 +0000 (10:05 -0500)
committer akpm <akpm@linux-foundation.org>
Mon, 18 Jul 2022 00:14:28 +0000 (17:14 -0700)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c

index 751c19d5bfdd926cd8624a22660f5140772c5281..1d7fd832123b44c3469b505bdfaa80fb132b6329 100644 (file)
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1795,7 +1795,7 @@ static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma,
                 return NULL;
  
         page = vm_normal_page(vma, addr, pte);
-       if (!page)
+       if (!page || is_zone_device_page(page))
                 return NULL;
  
         if (PageReserved(page))
diff --git a/mm/huge_memory.c b/mm/huge_memory.c

index 60d742c33de34d72511008bd71ad6ee910b58ab5..a563de8234c140c3f91083d9c6b7ddfd89278fd2 100644 (file)
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2910,7 +2910,7 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
  
                 if (IS_ERR(page))
                         continue;
-               if (!page)
+               if (!page || is_zone_device_page(page))
                         continue;
  
                 if (!is_transparent_hugepage(page))
diff --git a/mm/khugepaged.c b/mm/khugepaged.c

index 01e0d6336754ece59f91a05eea787828f0ecf6a6..dea102170ab36d3676a9e20bc8760da2e636545c 100644 (file)
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -611,7 +611,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
                         goto out;
                 }
                 page = vm_normal_page(vma, address, pteval);
-               if (unlikely(!page)) {
+               if (unlikely(!page) || unlikely(is_zone_device_page(page))) {
                         result = SCAN_PAGE_NULL;
                         goto out;
                 }
@@ -1261,7 +1261,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
                         writable = true;
  
                 page = vm_normal_page(vma, _address, pteval);
-               if (unlikely(!page)) {
+               if (unlikely(!page) || unlikely(is_zone_device_page(page))) {
                         result = SCAN_PAGE_NULL;
                         goto out_unmap;
                 }
@@ -1472,7 +1472,8 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
                         goto abort;
  
                 page = vm_normal_page(vma, addr, *pte);
-
+               if (WARN_ON_ONCE(page && is_zone_device_page(page)))
+                       page = NULL;
                 /*
                  * Note that uprobe, debugger, or MAP_PRIVATE may change the
                  * page table, but the new page will not be a subpage of hpage.
@@ -1490,6 +1491,8 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
                 if (pte_none(*pte))
                         continue;
                 page = vm_normal_page(vma, addr, *pte);
+               if (WARN_ON_ONCE(page && is_zone_device_page(page)))
+                       goto abort;
                 page_remove_rmap(page, vma, false);
         }
  
diff --git a/mm/ksm.c b/mm/ksm.c

index 8d2dc501c92c6f6e2009ee334745a2b56e576400..55f1d96348690ea55752d1f6d2adc1898d00bc26 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -475,7 +475,7 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
                 cond_resched();
                 page = follow_page(vma, addr,
                                 FOLL_GET | FOLL_MIGRATION | FOLL_REMOTE);
-               if (IS_ERR_OR_NULL(page))
+               if (IS_ERR_OR_NULL(page) || is_zone_device_page(page))
                         break;
                 if (PageKsm(page))
                         ret = handle_mm_fault(vma, addr,
@@ -560,7 +560,7 @@ static struct page *get_mergeable_page(struct rmap_item *rmap_item)
                 goto out;
  
         page = follow_page(vma, addr, FOLL_GET);
-       if (IS_ERR_OR_NULL(page))
+       if (IS_ERR_OR_NULL(page) || is_zone_device_page(page))
                 goto out;
         if (PageAnon(page)) {
                 flush_anon_page(vma, page, addr);
@@ -2308,7 +2308,7 @@ next_mm:
                         if (ksm_test_exit(mm))
                                 break;
                         *page = follow_page(vma, ksm_scan.address, FOLL_GET);
-                       if (IS_ERR_OR_NULL(*page)) {
+                       if (IS_ERR_OR_NULL(*page) || is_zone_device_page(*page)) {
                                 ksm_scan.address += PAGE_SIZE;
                                 cond_resched();
                                 continue;
diff --git a/mm/madvise.c b/mm/madvise.c

index e55108d4e4b2c1dd78cf3310a3557276f221068e..5f0f0948a50e4399a3115f6cd12ae8deae25f1bd 100644 (file)
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -421,7 +421,7 @@ regular_page:
                         continue;
  
                 page = vm_normal_page(vma, addr, ptent);
-               if (!page)
+               if (!page || is_zone_device_page(page))
                         continue;
  
                 /*
@@ -639,7 +639,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
                 }
  
                 page = vm_normal_page(vma, addr, ptent);
-               if (!page)
+               if (!page || is_zone_device_page(page))
                         continue;
  
                 /*
diff --git a/mm/memory.c b/mm/memory.c

index 580c62febe42eb8ea06b35b7de27de68c92dec89..dce0b2e686eb327dd57428b17fb536ca01b8f27c 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -624,6 +624,14 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
                 if (is_zero_pfn(pfn))
                         return NULL;
                 if (pte_devmap(pte))
+               /*
+                * NOTE: New users of ZONE_DEVICE will not set pte_devmap()
+                * and will have refcounts incremented on their struct pages
+                * when they are inserted into PTEs, thus they are safe to
+                * return here. Legacy ZONE_DEVICE pages that set pte_devmap()
+                * do not have refcounts. Example of legacy ZONE_DEVICE is
+                * MEMORY_DEVICE_FS_DAX type in pmem or virtio_fs drivers.
+                */
                         return NULL;
  
                 print_bad_pte(vma, addr, pte, NULL);
@@ -4693,7 +4701,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
         pte = pte_modify(old_pte, vma->vm_page_prot);
  
         page = vm_normal_page(vma, vmf->address, pte);
-       if (!page)
+       if (!page || is_zone_device_page(page))
                 goto out_map;
  
         /* TODO: handle PTE-mapped THP */
diff --git a/mm/mempolicy.c b/mm/mempolicy.c

index f4cd963550c1c5905b371b7ae76f0cf84fa7e050..88a5173c6ff077b74bad2e95a34a60a5575fbf42 100644 (file)
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -523,7 +523,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
                 if (!pte_present(*pte))
                         continue;
                 page = vm_normal_page(vma, addr, *pte);
-               if (!page)
+               if (!page || is_zone_device_page(page))
                         continue;
                 /*
                  * vm_normal_page() filters out zero pages, but there might
diff --git a/mm/migrate.c b/mm/migrate.c

index 7934eebf168912cc98421b691ee195afb1a96111..1649270bc1a777019e4d647ea81fe3331b556179 100644 (file)
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1630,7 +1630,7 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
                 goto out;
  
         err = -ENOENT;
-       if (!page)
+       if (!page || is_zone_device_page(page))
                 goto out;
  
         err = 0;
@@ -1821,7 +1821,7 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
                 if (IS_ERR(page))
                         goto set_status;
  
-               if (page) {
+               if (page && !is_zone_device_page(page)) {
                         err = page_to_nid(page);
                         put_page(page);
                 } else {
diff --git a/mm/mlock.c b/mm/mlock.c

index 716caf851043121dcc8bd94542b6ada0a2acd7c7..b14e929084ccaa5b86d5c9c199d7054b10e0ccab 100644 (file)
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -333,7 +333,7 @@ static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
                 if (!pte_present(*pte))
                         continue;
                 page = vm_normal_page(vma, addr, *pte);
-               if (!page)
+               if (!page || is_zone_device_page(page))
                         continue;
                 if (PageTransCompound(page))
                         continue;
diff --git a/mm/mprotect.c b/mm/mprotect.c

index 996a97e213adcc32fc1ac30d9cfd4648c763ffc0..5ef478b06a7d38d3ee5de8c6851ba8363a4a889a 100644 (file)
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -127,7 +127,7 @@ static unsigned long change_pte_range(struct mmu_gather *tlb,
                                         continue;
  
                                 page = vm_normal_page(vma, addr, oldpte);
-                               if (!page || PageKsm(page))
+                               if (!page || is_zone_device_page(page) || PageKsm(page))
                                         continue;
  
                                 /* Also skip shared copy-on-write pages */
author	Alex Sierra <alex.sierra@amd.com>
	Fri, 15 Jul 2022 15:05:11 +0000 (10:05 -0500)
committer	akpm <akpm@linux-foundation.org>
	Mon, 18 Jul 2022 00:14:28 +0000 (17:14 -0700)
fs/proc/task_mmu.c		patch \| blob \| history
mm/huge_memory.c		patch \| blob \| history
mm/khugepaged.c		patch \| blob \| history
mm/ksm.c		patch \| blob \| history
mm/madvise.c		patch \| blob \| history
mm/memory.c		patch \| blob \| history
mm/mempolicy.c		patch \| blob \| history
mm/migrate.c		patch \| blob \| history
mm/mlock.c		patch \| blob \| history
mm/mprotect.c		patch \| blob \| history