xfs: speed up xfarray sort by sorting xfile page contents directly
authorDarrick J. Wong <djwong@kernel.org>
Thu, 10 Aug 2023 14:48:06 +0000 (07:48 -0700)
committerDarrick J. Wong <djwong@kernel.org>
Thu, 10 Aug 2023 14:48:06 +0000 (07:48 -0700)
If all the records in an xfarray subset live within the same memory
page, we can short-circuit even more quicksort recursion by mapping that
page into the local CPU and using the kernel's heapsort function to sort
the subset.  On the author's computer, this reduces the runtime by
another 15% on a 500,000 element array.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Kent Overstreet <kent.overstreet@linux.dev>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
fs/xfs/scrub/trace.h
fs/xfs/scrub/xfarray.c
fs/xfs/scrub/xfarray.h

index c2e611e2af49333a4b6bacd542503ccb7808cd02..5100745086c82311dfa71f2db4ae5b1bfdddacd6 100644 (file)
@@ -869,6 +869,26 @@ TRACE_EVENT(xfarray_isort,
                  __entry->hi - __entry->lo)
 );
 
+TRACE_EVENT(xfarray_pagesort,
+       TP_PROTO(struct xfarray_sortinfo *si, uint64_t lo, uint64_t hi),
+       TP_ARGS(si, lo, hi),
+       TP_STRUCT__entry(
+               __field(unsigned long, ino)
+               __field(unsigned long long, lo)
+               __field(unsigned long long, hi)
+       ),
+       TP_fast_assign(
+               __entry->ino = file_inode(si->array->xfile->file)->i_ino;
+               __entry->lo = lo;
+               __entry->hi = hi;
+       ),
+       TP_printk("xfino 0x%lx lo %llu hi %llu elts %llu",
+                 __entry->ino,
+                 __entry->lo,
+                 __entry->hi,
+                 __entry->hi - __entry->lo)
+);
+
 TRACE_EVENT(xfarray_qsort,
        TP_PROTO(struct xfarray_sortinfo *si, uint64_t lo, uint64_t hi),
        TP_ARGS(si, lo, hi),
index 2a0599f660d7b89493070611743d1ddaec6f12b5..457e56eac5e155cc131154b9e9e4fd6d9f96b7e4 100644 (file)
@@ -545,6 +545,87 @@ xfarray_isort(
        return xfile_obj_store(si->array->xfile, scratch, len, lo_pos);
 }
 
+/* Grab a page for sorting records. */
+static inline int
+xfarray_sort_get_page(
+       struct xfarray_sortinfo *si,
+       loff_t                  pos,
+       uint64_t                len)
+{
+       int                     error;
+
+       error = xfile_get_page(si->array->xfile, pos, len, &si->xfpage);
+       if (error)
+               return error;
+
+       /*
+        * xfile pages must never be mapped into userspace, so we skip the
+        * dcache flush when mapping the page.
+        */
+       si->page_kaddr = kmap_local_page(si->xfpage.page);
+       return 0;
+}
+
+/* Release a page we grabbed for sorting records. */
+static inline int
+xfarray_sort_put_page(
+       struct xfarray_sortinfo *si)
+{
+       if (!si->page_kaddr)
+               return 0;
+
+       kunmap_local(si->page_kaddr);
+       si->page_kaddr = NULL;
+
+       return xfile_put_page(si->array->xfile, &si->xfpage);
+}
+
+/* Decide if these records are eligible for in-page sorting. */
+static inline bool
+xfarray_want_pagesort(
+       struct xfarray_sortinfo *si,
+       xfarray_idx_t           lo,
+       xfarray_idx_t           hi)
+{
+       pgoff_t                 lo_page;
+       pgoff_t                 hi_page;
+       loff_t                  end_pos;
+
+       /* We can only map one page at a time. */
+       lo_page = xfarray_pos(si->array, lo) >> PAGE_SHIFT;
+       end_pos = xfarray_pos(si->array, hi) + si->array->obj_size - 1;
+       hi_page = end_pos >> PAGE_SHIFT;
+
+       return lo_page == hi_page;
+}
+
+/* Sort a bunch of records that all live in the same memory page. */
+STATIC int
+xfarray_pagesort(
+       struct xfarray_sortinfo *si,
+       xfarray_idx_t           lo,
+       xfarray_idx_t           hi)
+{
+       void                    *startp;
+       loff_t                  lo_pos = xfarray_pos(si->array, lo);
+       uint64_t                len = xfarray_pos(si->array, hi - lo);
+       int                     error = 0;
+
+       trace_xfarray_pagesort(si, lo, hi);
+
+       xfarray_sort_bump_loads(si);
+       error = xfarray_sort_get_page(si, lo_pos, len);
+       if (error)
+               return error;
+
+       xfarray_sort_bump_heapsorts(si);
+       startp = si->page_kaddr + offset_in_page(lo_pos);
+       sort(startp, hi - lo + 1, si->array->obj_size, si->cmp_fn, NULL);
+
+       xfarray_sort_bump_stores(si);
+       return xfarray_sort_put_page(si);
+}
+
 /* Return a pointer to the xfarray pivot record within the sortinfo struct. */
 static inline void *xfarray_sortinfo_pivot(struct xfarray_sortinfo *si)
 {
@@ -699,6 +780,10 @@ xfarray_qsort_push(
  * 4. For small sets, load the records into the scratchpad and run heapsort on
  *    them because that is very fast.  In the author's experience, this yields
  *    a ~10% reduction in runtime.
+ *
+ *    If a small set is contained entirely within a single xfile memory page,
+ *    map the page directly and run heap sort directly on the xfile page
+ *    instead of using the load/store interface.  This halves the runtime.
  */
 
 /*
@@ -744,6 +829,18 @@ xfarray_sort(
                        continue;
                }
 
+               /*
+                * If directly mapping the page and sorting can solve our
+                * problems, we're done.
+                */
+               if (xfarray_want_pagesort(si, lo, hi)) {
+                       error = xfarray_pagesort(si, lo, hi);
+                       if (error)
+                               goto out_free;
+                       si->stack_depth--;
+                       continue;
+               }
+
                /* If insertion sort can solve our problems, we're done. */
                if (xfarray_want_isort(si, lo, hi)) {
                        error = xfarray_isort(si, lo, hi);
index 3661c98272cd50591e5e958525ff4fcef52f3914..091614e7f6836cf3558dfb99c98b5991fe699d29 100644 (file)
@@ -80,6 +80,10 @@ struct xfarray_sortinfo {
        /* XFARRAY_SORT_* flags; see below. */
        unsigned int            flags;
 
+       /* Cache a page here for faster access. */
+       struct xfile_page       xfpage;
+       void                    *page_kaddr;
+
 #ifdef DEBUG
        /* Performance statistics. */
        uint64_t                loads;