fs/proc/task_mmu: add fast paths to get/clear PAGE_IS_WRITTEN flag
authorMuhammad Usama Anjum <usama.anjum@collabora.com>
Mon, 21 Aug 2023 14:15:15 +0000 (19:15 +0500)
committerAndrew Morton <akpm@linux-foundation.org>
Wed, 18 Oct 2023 21:34:13 +0000 (14:34 -0700)
Adding fast code paths to handle specifically only get and/or clear
operation of PAGE_IS_WRITTEN, increases its performance by 0-35%.  The
results of some test cases are given below:

Test-case-1
t1 = (Get + WP) time
t2 = WP time
                       t1            t2
Without this patch:    140-170mcs    90-115mcs
With this patch:       110mcs        80mcs
Worst case diff:       35% faster    30% faster

Test-case-2
t3 = atomic Get and WP
                      t3
Without this patch:   120-140mcs
With this patch:      100-110mcs
Worst case diff:      21% faster

Link: https://lkml.kernel.org/r/20230821141518.870589-4-usama.anjum@collabora.com
Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Cc: Alex Sierra <alex.sierra@amd.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrei Vagin <avagin@gmail.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Gustavo A. R. Silva <gustavoars@kernel.org>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Miroslaw <emmir@google.com>
Cc: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Cc: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Nadav Amit <namit@vmware.com>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Paul Gofman <pgofman@codeweavers.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yun Zhou <yun.zhou@windriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
fs/proc/task_mmu.c

index d4ef9a2bf95dbb9f1ef04d0262aa488fff42c985..1d994505805bee8ca48f6abb9ed3ec5cab077dbc 100644 (file)
@@ -2145,6 +2145,41 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start,
                return 0;
        }
 
+       if (!p->vec_out) {
+               /* Fast path for performing exclusive WP */
+               for (addr = start; addr != end; pte++, addr += PAGE_SIZE) {
+                       if (pte_uffd_wp(ptep_get(pte)))
+                               continue;
+                       make_uffd_wp_pte(vma, addr, pte);
+                       if (!flush_end)
+                               start = addr;
+                       flush_end = addr + PAGE_SIZE;
+               }
+               goto flush_and_return;
+       }
+
+       if (!p->arg.category_anyof_mask && !p->arg.category_inverted &&
+           p->arg.category_mask == PAGE_IS_WRITTEN &&
+           p->arg.return_mask == PAGE_IS_WRITTEN) {
+               for (addr = start; addr < end; pte++, addr += PAGE_SIZE) {
+                       unsigned long next = addr + PAGE_SIZE;
+
+                       if (pte_uffd_wp(ptep_get(pte)))
+                               continue;
+                       ret = pagemap_scan_output(p->cur_vma_category | PAGE_IS_WRITTEN,
+                                                 p, addr, &next);
+                       if (next == addr)
+                               break;
+                       if (~p->arg.flags & PM_SCAN_WP_MATCHING)
+                               continue;
+                       make_uffd_wp_pte(vma, addr, pte);
+                       if (!flush_end)
+                               start = addr;
+                       flush_end = next;
+               }
+               goto flush_and_return;
+       }
+
        for (addr = start; addr != end; pte++, addr += PAGE_SIZE) {
                unsigned long categories = p->cur_vma_category |
                                           pagemap_page_category(p, vma, addr, ptep_get(pte));
@@ -2168,6 +2203,7 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start,
                flush_end = next;
        }
 
+flush_and_return:
        if (flush_end)
                flush_tlb_range(vma, start, addr);