mm: Make pte_mkwrite() take a VMA
authorRick Edgecombe <rick.p.edgecombe@intel.com>
Tue, 13 Jun 2023 00:10:29 +0000 (17:10 -0700)
committerRick Edgecombe <rick.p.edgecombe@intel.com>
Tue, 11 Jul 2023 21:12:15 +0000 (14:12 -0700)
The x86 Shadow stack feature includes a new type of memory called shadow
stack. This shadow stack memory has some unusual properties, which requires
some core mm changes to function properly.

One of these unusual properties is that shadow stack memory is writable,
but only in limited ways. These limits are applied via a specific PTE
bit combination. Nevertheless, the memory is writable, and core mm code
will need to apply the writable permissions in the typical paths that
call pte_mkwrite(). Future patches will make pte_mkwrite() take a VMA, so
that the x86 implementation of it can know whether to create regular
writable or shadow stack mappings.

But there are a couple of challenges to this. Modifying the signatures of
each arch pte_mkwrite() implementation would be error prone because some
are generated with macros and would need to be re-implemented. Also, some
pte_mkwrite() callers operate on kernel memory without a VMA.

So this can be done in a three step process. First pte_mkwrite() can be
renamed to pte_mkwrite_novma() in each arch, with a generic pte_mkwrite()
added that just calls pte_mkwrite_novma(). Next callers without a VMA can
be moved to pte_mkwrite_novma(). And lastly, pte_mkwrite() and all callers
can be changed to take/pass a VMA.

Previous work pte_mkwrite() renamed pte_mkwrite_novma() and converted
callers that don't have a VMA were to use pte_mkwrite_novma(). So now
change pte_mkwrite() to take a VMA and change the remaining callers to
pass a VMA. Apply the same changes for pmd_mkwrite().

No functional change.

Suggested-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Mike Rapoport (IBM) <rppt@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
Link: https://lore.kernel.org/all/20230613001108.3040476-4-rick.p.edgecombe%40intel.com
Documentation/mm/arch_pgtable_helpers.rst
include/linux/mm.h
include/linux/pgtable.h
mm/debug_vm_pgtable.c
mm/huge_memory.c
mm/memory.c
mm/migrate.c
mm/migrate_device.c
mm/mprotect.c
mm/userfaultfd.c

index 69ce1f2aa4d19026738e20d2bf3477bda1b41501..c82e3ee20e51eed961b39eca92899011b18a06f9 100644 (file)
@@ -46,7 +46,8 @@ PTE Page Table Helpers
 +---------------------------+--------------------------------------------------+
 | pte_mkclean               | Creates a clean PTE                              |
 +---------------------------+--------------------------------------------------+
-| pte_mkwrite               | Creates a writable PTE                           |
+| pte_mkwrite               | Creates a writable PTE of the type specified by  |
+|                           | the VMA.                                         |
 +---------------------------+--------------------------------------------------+
 | pte_mkwrite_novma         | Creates a writable PTE, of the conventional type |
 |                           | of writable.                                     |
@@ -121,7 +122,8 @@ PMD Page Table Helpers
 +---------------------------+--------------------------------------------------+
 | pmd_mkclean               | Creates a clean PMD                              |
 +---------------------------+--------------------------------------------------+
-| pmd_mkwrite               | Creates a writable PMD                           |
+| pmd_mkwrite               | Creates a writable PMD of the type specified by  |
+|                           | the VMA.                                         |
 +---------------------------+--------------------------------------------------+
 | pmd_mkwrite_novma         | Creates a writable PMD, of the conventional type |
 |                           | of writable.                                     |
index 2dd73e4f3d8e3accb0c1b64ccc1be058811db21c..d40fa0feb9dccfb8364e3d357acd9f40b47c7c43 100644 (file)
@@ -1277,7 +1277,7 @@ void free_compound_page(struct page *page);
 static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 {
        if (likely(vma->vm_flags & VM_WRITE))
-               pte = pte_mkwrite(pte);
+               pte = pte_mkwrite(pte, vma);
        return pte;
 }
 
index e6ea6e0d7d8d48834573fc594d739d8aade68318..9462f4a87d4287d037e58102e8d147ad17ea2930 100644 (file)
@@ -516,14 +516,14 @@ extern pud_t pudp_huge_clear_flush(struct vm_area_struct *vma,
 #endif
 
 #ifndef pte_mkwrite
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma)
 {
        return pte_mkwrite_novma(pte);
 }
 #endif
 
 #if defined(CONFIG_ARCH_WANT_PMD_MKWRITE) && !defined(pmd_mkwrite)
-static inline pmd_t pmd_mkwrite(pmd_t pmd)
+static inline pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
 {
        return pmd_mkwrite_novma(pmd);
 }
index ee119e33fef133ea49bf2bf0e8468c8fd464885e..b457ca17cef75d194225fbead7834db55ae8baf2 100644 (file)
@@ -109,10 +109,10 @@ static void __init pte_basic_tests(struct pgtable_debug_args *args, int idx)
        WARN_ON(!pte_same(pte, pte));
        WARN_ON(!pte_young(pte_mkyoung(pte_mkold(pte))));
        WARN_ON(!pte_dirty(pte_mkdirty(pte_mkclean(pte))));
-       WARN_ON(!pte_write(pte_mkwrite(pte_wrprotect(pte))));
+       WARN_ON(!pte_write(pte_mkwrite(pte_wrprotect(pte), args->vma)));
        WARN_ON(pte_young(pte_mkold(pte_mkyoung(pte))));
        WARN_ON(pte_dirty(pte_mkclean(pte_mkdirty(pte))));
-       WARN_ON(pte_write(pte_wrprotect(pte_mkwrite(pte))));
+       WARN_ON(pte_write(pte_wrprotect(pte_mkwrite(pte, args->vma))));
        WARN_ON(pte_dirty(pte_wrprotect(pte_mkclean(pte))));
        WARN_ON(!pte_dirty(pte_wrprotect(pte_mkdirty(pte))));
 }
@@ -156,7 +156,7 @@ static void __init pte_advanced_tests(struct pgtable_debug_args *args)
        pte = pte_mkclean(pte);
        set_pte_at(args->mm, args->vaddr, args->ptep, pte);
        flush_dcache_page(page);
-       pte = pte_mkwrite(pte);
+       pte = pte_mkwrite(pte, args->vma);
        pte = pte_mkdirty(pte);
        ptep_set_access_flags(args->vma, args->vaddr, args->ptep, pte, 1);
        pte = ptep_get(args->ptep);
@@ -202,10 +202,10 @@ static void __init pmd_basic_tests(struct pgtable_debug_args *args, int idx)
        WARN_ON(!pmd_same(pmd, pmd));
        WARN_ON(!pmd_young(pmd_mkyoung(pmd_mkold(pmd))));
        WARN_ON(!pmd_dirty(pmd_mkdirty(pmd_mkclean(pmd))));
-       WARN_ON(!pmd_write(pmd_mkwrite(pmd_wrprotect(pmd))));
+       WARN_ON(!pmd_write(pmd_mkwrite(pmd_wrprotect(pmd), args->vma)));
        WARN_ON(pmd_young(pmd_mkold(pmd_mkyoung(pmd))));
        WARN_ON(pmd_dirty(pmd_mkclean(pmd_mkdirty(pmd))));
-       WARN_ON(pmd_write(pmd_wrprotect(pmd_mkwrite(pmd))));
+       WARN_ON(pmd_write(pmd_wrprotect(pmd_mkwrite(pmd, args->vma))));
        WARN_ON(pmd_dirty(pmd_wrprotect(pmd_mkclean(pmd))));
        WARN_ON(!pmd_dirty(pmd_wrprotect(pmd_mkdirty(pmd))));
        /*
@@ -256,7 +256,7 @@ static void __init pmd_advanced_tests(struct pgtable_debug_args *args)
        pmd = pmd_mkclean(pmd);
        set_pmd_at(args->mm, vaddr, args->pmdp, pmd);
        flush_dcache_page(page);
-       pmd = pmd_mkwrite(pmd);
+       pmd = pmd_mkwrite(pmd, args->vma);
        pmd = pmd_mkdirty(pmd);
        pmdp_set_access_flags(args->vma, vaddr, args->pmdp, pmd, 1);
        pmd = READ_ONCE(*args->pmdp);
index eb3678360b97ea7ba7917a58ebe50f49f3952c0e..23c2aa612926c3ba0119dd8f83ac5774c74df2f6 100644 (file)
@@ -551,7 +551,7 @@ __setup("transparent_hugepage=", setup_transparent_hugepage);
 pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
 {
        if (likely(vma->vm_flags & VM_WRITE))
-               pmd = pmd_mkwrite(pmd);
+               pmd = pmd_mkwrite(pmd, vma);
        return pmd;
 }
 
@@ -1572,7 +1572,7 @@ out_map:
        pmd = pmd_modify(oldpmd, vma->vm_page_prot);
        pmd = pmd_mkyoung(pmd);
        if (writable)
-               pmd = pmd_mkwrite(pmd);
+               pmd = pmd_mkwrite(pmd, vma);
        set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd);
        update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
        spin_unlock(vmf->ptl);
@@ -1925,7 +1925,7 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
        /* See change_pte_range(). */
        if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) && !pmd_write(entry) &&
            can_change_pmd_writable(vma, addr, entry))
-               entry = pmd_mkwrite(entry);
+               entry = pmd_mkwrite(entry, vma);
 
        ret = HPAGE_PMD_NR;
        set_pmd_at(mm, addr, pmd, entry);
@@ -2243,7 +2243,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
                } else {
                        entry = mk_pte(page + i, READ_ONCE(vma->vm_page_prot));
                        if (write)
-                               entry = pte_mkwrite(entry);
+                               entry = pte_mkwrite(entry, vma);
                        if (anon_exclusive)
                                SetPageAnonExclusive(page + i);
                        if (!young)
@@ -3287,7 +3287,7 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
        if (pmd_swp_soft_dirty(*pvmw->pmd))
                pmde = pmd_mksoft_dirty(pmde);
        if (is_writable_migration_entry(entry))
-               pmde = pmd_mkwrite(pmde);
+               pmde = pmd_mkwrite(pmde, vma);
        if (pmd_swp_uffd_wp(*pvmw->pmd))
                pmde = pmd_mkuffd_wp(pmde);
        if (!is_migration_entry_young(entry))
index 01f39e8144effd7ae67be7d7239547af45e95118..f093c73512c50eacc616f6a76c98a754319c11c7 100644 (file)
@@ -4119,7 +4119,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
        entry = mk_pte(&folio->page, vma->vm_page_prot);
        entry = pte_sw_mkyoung(entry);
        if (vma->vm_flags & VM_WRITE)
-               entry = pte_mkwrite(pte_mkdirty(entry));
+               entry = pte_mkwrite(pte_mkdirty(entry), vma);
 
        vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
                        &vmf->ptl);
@@ -4808,7 +4808,7 @@ out_map:
        pte = pte_modify(old_pte, vma->vm_page_prot);
        pte = pte_mkyoung(pte);
        if (writable)
-               pte = pte_mkwrite(pte);
+               pte = pte_mkwrite(pte, vma);
        ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte);
        update_mmu_cache(vma, vmf->address, vmf->pte);
        pte_unmap_unlock(vmf->pte, vmf->ptl);
index 24baad2571e314740f3d85f32292e9fac2072648..18f58b7e0affc6f595f8b84d974a62aa82507d72 100644 (file)
@@ -220,7 +220,7 @@ static bool remove_migration_pte(struct folio *folio,
                if (folio_test_dirty(folio) && is_migration_entry_dirty(entry))
                        pte = pte_mkdirty(pte);
                if (is_writable_migration_entry(entry))
-                       pte = pte_mkwrite(pte);
+                       pte = pte_mkwrite(pte, vma);
                else if (pte_swp_uffd_wp(old_pte))
                        pte = pte_mkuffd_wp(pte);
 
index 8365158460ed178277ca7302835b658a43f3ef0e..df280aa461e23e7b9880373315cafdefa721fbca 100644 (file)
@@ -623,7 +623,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
                }
                entry = mk_pte(page, vma->vm_page_prot);
                if (vma->vm_flags & VM_WRITE)
-                       entry = pte_mkwrite(pte_mkdirty(entry));
+                       entry = pte_mkwrite(pte_mkdirty(entry), vma);
        }
 
        ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
index 6f658d4837047836e5a97c7ccf6e373929fc1a74..b342e0196e013067311f1fc11ad0ebd31a68e6fc 100644 (file)
@@ -185,7 +185,7 @@ static long change_pte_range(struct mmu_gather *tlb,
                        if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) &&
                            !pte_write(ptent) &&
                            can_change_pte_writable(vma, addr, ptent))
-                               ptent = pte_mkwrite(ptent);
+                               ptent = pte_mkwrite(ptent, vma);
 
                        ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent);
                        if (pte_needs_flush(oldpte, ptent))
index a2bf37ee276d6b365e61c4d017a7e8179b73d2a5..b322ac54ea205e995b398d4b096c15a03511246d 100644 (file)
@@ -72,7 +72,7 @@ int mfill_atomic_install_pte(pmd_t *dst_pmd,
        if (page_in_cache && !vm_shared)
                writable = false;
        if (writable)
-               _dst_pte = pte_mkwrite(_dst_pte);
+               _dst_pte = pte_mkwrite(_dst_pte, dst_vma);
        if (flags & MFILL_ATOMIC_WP)
                _dst_pte = pte_mkuffd_wp(_dst_pte);