#include <linux/backing-dev.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
+#include <linux/mmu_notifier.h>
+
+#include <asm/tlb.h>
 
 /*
  * Any behaviour which results in changes to the vma->vm_flags needs to
        case MADV_REMOVE:
        case MADV_WILLNEED:
        case MADV_DONTNEED:
+       case MADV_FREE:
                return 0;
        default:
                /* be safe, default to 1. list exceptions explicitly */
        return 0;
 }
 
+static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
+                               unsigned long end, struct mm_walk *walk)
+
+{
+       struct mmu_gather *tlb = walk->private;
+       struct mm_struct *mm = tlb->mm;
+       struct vm_area_struct *vma = walk->vma;
+       spinlock_t *ptl;
+       pte_t *orig_pte, *pte, ptent;
+       struct page *page;
+
+       split_huge_pmd(vma, pmd, addr);
+       if (pmd_trans_unstable(pmd))
+               return 0;
+
+       orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+       arch_enter_lazy_mmu_mode();
+       for (; addr != end; pte++, addr += PAGE_SIZE) {
+               ptent = *pte;
+
+               if (!pte_present(ptent))
+                       continue;
+
+               page = vm_normal_page(vma, addr, ptent);
+               if (!page)
+                       continue;
+
+               /*
+                * If pmd isn't transhuge but the page is THP and
+                * is owned by only this process, split it and
+                * deactivate all pages.
+                */
+               if (PageTransCompound(page)) {
+                       if (page_mapcount(page) != 1)
+                               goto out;
+                       get_page(page);
+                       if (!trylock_page(page)) {
+                               put_page(page);
+                               goto out;
+                       }
+                       pte_unmap_unlock(orig_pte, ptl);
+                       if (split_huge_page(page)) {
+                               unlock_page(page);
+                               put_page(page);
+                               pte_offset_map_lock(mm, pmd, addr, &ptl);
+                               goto out;
+                       }
+                       put_page(page);
+                       unlock_page(page);
+                       pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+                       pte--;
+                       addr -= PAGE_SIZE;
+                       continue;
+               }
+
+               VM_BUG_ON_PAGE(PageTransCompound(page), page);
+
+               if (PageSwapCache(page) || PageDirty(page)) {
+                       if (!trylock_page(page))
+                               continue;
+                       /*
+                        * If page is shared with others, we couldn't clear
+                        * PG_dirty of the page.
+                        */
+                       if (page_mapcount(page) != 1) {
+                               unlock_page(page);
+                               continue;
+                       }
+
+                       if (PageSwapCache(page) && !try_to_free_swap(page)) {
+                               unlock_page(page);
+                               continue;
+                       }
+
+                       ClearPageDirty(page);
+                       unlock_page(page);
+               }
+
+               if (pte_young(ptent) || pte_dirty(ptent)) {
+                       /*
+                        * Some of architecture(ex, PPC) don't update TLB
+                        * with set_pte_at and tlb_remove_tlb_entry so for
+                        * the portability, remap the pte with old|clean
+                        * after pte clearing.
+                        */
+                       ptent = ptep_get_and_clear_full(mm, addr, pte,
+                                                       tlb->fullmm);
+
+                       ptent = pte_mkold(ptent);
+                       ptent = pte_mkclean(ptent);
+                       set_pte_at(mm, addr, pte, ptent);
+                       tlb_remove_tlb_entry(tlb, pte, addr);
+               }
+       }
+out:
+       arch_leave_lazy_mmu_mode();
+       pte_unmap_unlock(orig_pte, ptl);
+       cond_resched();
+       return 0;
+}
+
+static void madvise_free_page_range(struct mmu_gather *tlb,
+                            struct vm_area_struct *vma,
+                            unsigned long addr, unsigned long end)
+{
+       struct mm_walk free_walk = {
+               .pmd_entry = madvise_free_pte_range,
+               .mm = vma->vm_mm,
+               .private = tlb,
+       };
+
+       tlb_start_vma(tlb, vma);
+       walk_page_range(addr, end, &free_walk);
+       tlb_end_vma(tlb, vma);
+}
+
+static int madvise_free_single_vma(struct vm_area_struct *vma,
+                       unsigned long start_addr, unsigned long end_addr)
+{
+       unsigned long start, end;
+       struct mm_struct *mm = vma->vm_mm;
+       struct mmu_gather tlb;
+
+       if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
+               return -EINVAL;
+
+       /* MADV_FREE works for only anon vma at the moment */
+       if (!vma_is_anonymous(vma))
+               return -EINVAL;
+
+       start = max(vma->vm_start, start_addr);
+       if (start >= vma->vm_end)
+               return -EINVAL;
+       end = min(vma->vm_end, end_addr);
+       if (end <= vma->vm_start)
+               return -EINVAL;
+
+       lru_add_drain();
+       tlb_gather_mmu(&tlb, mm, start, end);
+       update_hiwater_rss(mm);
+
+       mmu_notifier_invalidate_range_start(mm, start, end);
+       madvise_free_page_range(&tlb, vma, start, end);
+       mmu_notifier_invalidate_range_end(mm, start, end);
+       tlb_finish_mmu(&tlb, start, end);
+
+       return 0;
+}
+
+static long madvise_free(struct vm_area_struct *vma,
+                            struct vm_area_struct **prev,
+                            unsigned long start, unsigned long end)
+{
+       *prev = vma;
+       return madvise_free_single_vma(vma, start, end);
+}
+
 /*
  * Application no longer needs these pages.  If the pages are dirty,
  * it's OK to just throw them away.  The app will be more careful about
                return madvise_remove(vma, prev, start, end);
        case MADV_WILLNEED:
                return madvise_willneed(vma, prev, start, end);
+       case MADV_FREE:
+               /*
+                * XXX: In this implementation, MADV_FREE works like
+                * MADV_DONTNEED on swapless system or full swap.
+                */
+               if (get_nr_swap_pages() > 0)
+                       return madvise_free(vma, prev, start, end);
+               /* passthrough */
        case MADV_DONTNEED:
                return madvise_dontneed(vma, prev, start, end);
        default:
        case MADV_REMOVE:
        case MADV_WILLNEED:
        case MADV_DONTNEED:
+       case MADV_FREE:
 #ifdef CONFIG_KSM
        case MADV_MERGEABLE:
        case MADV_UNMERGEABLE:
 
         */
 }
 
+struct rmap_private {
+       enum ttu_flags flags;
+       int lazyfreed;
+};
+
 /*
  * @arg: enum ttu_flags will be passed to this argument
  */
        pte_t pteval;
        spinlock_t *ptl;
        int ret = SWAP_AGAIN;
-       enum ttu_flags flags = (enum ttu_flags)arg;
+       struct rmap_private *rp = arg;
+       enum ttu_flags flags = rp->flags;
 
        /* munlock has nothing to gain from examining un-locked vmas */
        if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
                 * See handle_pte_fault() ...
                 */
                VM_BUG_ON_PAGE(!PageSwapCache(page), page);
+
+               if (!PageDirty(page) && (flags & TTU_LZFREE)) {
+                       /* It's a freeable page by MADV_FREE */
+                       dec_mm_counter(mm, MM_ANONPAGES);
+                       rp->lazyfreed++;
+                       goto discard;
+               }
+
                if (swap_duplicate(entry) < 0) {
                        set_pte_at(mm, address, pte, pteval);
                        ret = SWAP_FAIL;
        } else
                dec_mm_counter(mm, mm_counter_file(page));
 
+discard:
        page_remove_rmap(page, PageHuge(page));
        page_cache_release(page);
 
 int try_to_unmap(struct page *page, enum ttu_flags flags)
 {
        int ret;
+       struct rmap_private rp = {
+               .flags = flags,
+               .lazyfreed = 0,
+       };
+
        struct rmap_walk_control rwc = {
                .rmap_one = try_to_unmap_one,
-               .arg = (void *)flags,
+               .arg = &rp,
                .done = page_not_mapped,
                .anon_lock = page_lock_anon_vma_read,
        };
 
        ret = rmap_walk(page, &rwc);
 
-       if (ret != SWAP_MLOCK && !page_mapped(page))
+       if (ret != SWAP_MLOCK && !page_mapped(page)) {
                ret = SWAP_SUCCESS;
+               if (rp.lazyfreed && !PageDirty(page))
+                       ret = SWAP_LZFREE;
+       }
        return ret;
 }
 
 int try_to_munlock(struct page *page)
 {
        int ret;
+       struct rmap_private rp = {
+               .flags = TTU_MUNLOCK,
+               .lazyfreed = 0,
+       };
+
        struct rmap_walk_control rwc = {
                .rmap_one = try_to_unmap_one,
-               .arg = (void *)TTU_MUNLOCK,
+               .arg = &rp,
                .done = page_not_mapped,
                .anon_lock = page_lock_anon_vma_read,