mm: Split huge pages on write-notify or COW
authorThomas Hellstrom (VMware) <thomas_os@shipmail.org>
Tue, 24 Mar 2020 17:47:47 +0000 (18:47 +0100)
committerThomas Hellstrom (VMware) <thomas_os@shipmail.org>
Tue, 24 Mar 2020 17:47:47 +0000 (18:47 +0100)
The functions wp_huge_pmd() and wp_huge_pud() currently relies on the
huge_fault() callback to split huge page table entries if needed.
However for module users that requires export of the split_huge_xxx()
functionality which may be undesired. Instead split pre-existing huge
page-table entries on VM_FAULT_FALLBACK return.

We currently only do COW and write-notify on the PTE level, so if the
huge_fault() handler returns VM_FAULT_FALLBACK on wp faults,
split the huge pages and page-table entries. Also do this for huge PUDs
if there is no huge_fault() handler and the vma is not anonymous, similar
to how it's done for PMDs.

Note that fs/dax.c still does the splitting in the huge_fault() handler,
but as huge_fault() A follow-up patch can remove the dax.c split_huge_pmd()
if needed.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: "Jérôme Glisse" <jglisse@redhat.com>
Cc: "Christian König" <christian.koenig@amd.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Thomas Hellstrom (VMware) <thomas_os@shipmail.org>
Acked-by: Christian König <christian.koenig@amd.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
mm/memory.c

index e8bfdf0d9d1dd98c0341bdfc9adc11ae4a948cb7..efa59b1b109c2595efaaf1d9585ddc6b9295cd48 100644 (file)
@@ -3951,11 +3951,14 @@ static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd)
 {
        if (vma_is_anonymous(vmf->vma))
                return do_huge_pmd_wp_page(vmf, orig_pmd);
-       if (vmf->vma->vm_ops->huge_fault)
-               return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD);
+       if (vmf->vma->vm_ops->huge_fault) {
+               vm_fault_t ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD);
 
-       /* COW handled on pte level: split pmd */
-       VM_BUG_ON_VMA(vmf->vma->vm_flags & VM_SHARED, vmf->vma);
+               if (!(ret & VM_FAULT_FALLBACK))
+                       return ret;
+       }
+
+       /* COW or write-notify handled on pte level: split pmd. */
        __split_huge_pmd(vmf->vma, vmf->pmd, vmf->address, false, NULL);
 
        return VM_FAULT_FALLBACK;
@@ -3968,12 +3971,20 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma)
 
 static vm_fault_t create_huge_pud(struct vm_fault *vmf)
 {
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) &&                    \
+       defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
        /* No support for anonymous transparent PUD pages yet */
        if (vma_is_anonymous(vmf->vma))
-               return VM_FAULT_FALLBACK;
-       if (vmf->vma->vm_ops->huge_fault)
-               return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
+               goto split;
+       if (vmf->vma->vm_ops->huge_fault) {
+               vm_fault_t ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
+
+               if (!(ret & VM_FAULT_FALLBACK))
+                       return ret;
+       }
+split:
+       /* COW or write-notify not handled on PUD level: split pud.*/
+       __split_huge_pud(vmf->vma, vmf->pud, vmf->address);
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
        return VM_FAULT_FALLBACK;
 }