}
 
 static void numa_rebuild_single_mapping(struct vm_fault *vmf, struct vm_area_struct *vma,
+                                       unsigned long fault_addr, pte_t *fault_pte,
                                        bool writable)
 {
        pte_t pte, old_pte;
 
-       old_pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte);
+       old_pte = ptep_modify_prot_start(vma, fault_addr, fault_pte);
        pte = pte_modify(old_pte, vma->vm_page_prot);
        pte = pte_mkyoung(pte);
        if (writable)
                pte = pte_mkwrite(pte, vma);
-       ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte);
-       update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1);
+       ptep_modify_prot_commit(vma, fault_addr, fault_pte, old_pte, pte);
+       update_mmu_cache_range(vmf, vma, fault_addr, fault_pte, 1);
+}
+
+static void numa_rebuild_large_mapping(struct vm_fault *vmf, struct vm_area_struct *vma,
+                                      struct folio *folio, pte_t fault_pte,
+                                      bool ignore_writable, bool pte_write_upgrade)
+{
+       int nr = pte_pfn(fault_pte) - folio_pfn(folio);
+       unsigned long start = max(vmf->address - nr * PAGE_SIZE, vma->vm_start);
+       unsigned long end = min(vmf->address + (folio_nr_pages(folio) - nr) * PAGE_SIZE, vma->vm_end);
+       pte_t *start_ptep = vmf->pte - (vmf->address - start) / PAGE_SIZE;
+       unsigned long addr;
+
+       /* Restore all PTEs' mapping of the large folio */
+       for (addr = start; addr != end; start_ptep++, addr += PAGE_SIZE) {
+               pte_t ptent = ptep_get(start_ptep);
+               bool writable = false;
+
+               if (!pte_present(ptent) || !pte_protnone(ptent))
+                       continue;
+
+               if (pfn_folio(pte_pfn(ptent)) != folio)
+                       continue;
+
+               if (!ignore_writable) {
+                       ptent = pte_modify(ptent, vma->vm_page_prot);
+                       writable = pte_write(ptent);
+                       if (!writable && pte_write_upgrade &&
+                           can_change_pte_writable(vma, addr, ptent))
+                               writable = true;
+               }
+
+               numa_rebuild_single_mapping(vmf, vma, addr, start_ptep, writable);
+       }
 }
 
 static vm_fault_t do_numa_page(struct vm_fault *vmf)
        struct vm_area_struct *vma = vmf->vma;
        struct folio *folio = NULL;
        int nid = NUMA_NO_NODE;
-       bool writable = false;
+       bool writable = false, ignore_writable = false;
+       bool pte_write_upgrade = vma_wants_manual_pte_write_upgrade(vma);
        int last_cpupid;
        int target_nid;
        pte_t pte, old_pte;
-       int flags = 0;
+       int flags = 0, nr_pages;
 
        /*
         * The pte cannot be used safely until we verify, while holding the page
         * is only valid while holding the PT lock.
         */
        writable = pte_write(pte);
-       if (!writable && vma_wants_manual_pte_write_upgrade(vma) &&
+       if (!writable && pte_write_upgrade &&
            can_change_pte_writable(vma, vmf->address, pte))
                writable = true;
 
        if (!folio || folio_is_zone_device(folio))
                goto out_map;
 
-       /* TODO: handle PTE-mapped THP */
-       if (folio_test_large(folio))
-               goto out_map;
-
        /*
         * Avoid grouping on RO pages in general. RO pages shouldn't hurt as
         * much anyway since they can be in shared cache state. This misses
                flags |= TNF_SHARED;
 
        nid = folio_nid(folio);
+       nr_pages = folio_nr_pages(folio);
        /*
         * For memory tiering mode, cpupid of slow memory page is used
         * to record page access time.  So use default value.
        }
        pte_unmap_unlock(vmf->pte, vmf->ptl);
        writable = false;
+       ignore_writable = true;
 
        /* Migrate to the requested node */
        if (migrate_misplaced_folio(folio, vma, target_nid)) {
 
 out:
        if (nid != NUMA_NO_NODE)
-               task_numa_fault(last_cpupid, nid, 1, flags);
+               task_numa_fault(last_cpupid, nid, nr_pages, flags);
        return 0;
 out_map:
        /*
         * Make it present again, depending on how arch implements
         * non-accessible ptes, some can allow access by kernel mode.
         */
-       numa_rebuild_single_mapping(vmf, vma, writable);
+       if (folio && folio_test_large(folio))
+               numa_rebuild_large_mapping(vmf, vma, folio, pte, ignore_writable,
+                                          pte_write_upgrade);
+       else
+               numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte,
+                                           writable);
        pte_unmap_unlock(vmf->pte, vmf->ptl);
        goto out;
 }