lib min_heap: optimize number of comparisons in min_heapify()
authorKuan-Wei Chiu <visitorckw@gmail.com>
Wed, 10 Jan 2024 08:12:13 +0000 (16:12 +0800)
committerAndrew Morton <akpm@linux-foundation.org>
Thu, 22 Feb 2024 23:38:52 +0000 (15:38 -0800)
Optimize the min_heapify() function, resulting in a significant reduction
of approximately 50% in the number of comparisons for large random inputs,
while maintaining identical results.

The current implementation performs two comparisons per level to identify
the minimum among three elements.  In contrast, the proposed bottom-up
variation uses only one comparison per level to assess two children until
reaching the leaves.  Then, it sifts up until the correct position is
determined.

Typically, the process of sifting down proceeds to the leaf level,
resulting in O(1) secondary comparisons instead of log2(n).  This
optimization significantly reduces the number of costly indirect function
calls and improves overall performance.

Link: https://lkml.kernel.org/r/20240110081213.2289636-3-visitorckw@gmail.com
Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/min_heap.h

index 18a581310eb3507778a48a6056c303a641d77b10..d52daf45861b9a51f9dc5e9e29f2700aa3146adb 100644 (file)
@@ -35,31 +35,33 @@ static __always_inline
 void min_heapify(struct min_heap *heap, int pos,
                const struct min_heap_callbacks *func)
 {
-       void *left, *right, *parent, *smallest;
+       void *left, *right;
        void *data = heap->data;
+       void *root = data + pos * func->elem_size;
+       int i = pos, j;
 
+       /* Find the sift-down path all the way to the leaves. */
        for (;;) {
-               if (pos * 2 + 1 >= heap->nr)
+               if (i * 2 + 2 >= heap->nr)
                        break;
+               left = data + (i * 2 + 1) * func->elem_size;
+               right = data + (i * 2 + 2) * func->elem_size;
+               i = func->less(left, right) ? i * 2 + 1 : i * 2 + 2;
+       }
 
-               left = data + ((pos * 2 + 1) * func->elem_size);
-               parent = data + (pos * func->elem_size);
-               smallest = parent;
-               if (func->less(left, smallest))
-                       smallest = left;
-
-               if (pos * 2 + 2 < heap->nr) {
-                       right = data + ((pos * 2 + 2) * func->elem_size);
-                       if (func->less(right, smallest))
-                               smallest = right;
-               }
-               if (smallest == parent)
-                       break;
-               func->swp(smallest, parent);
-               if (smallest == left)
-                       pos = (pos * 2) + 1;
-               else
-                       pos = (pos * 2) + 2;
+       /* Special case for the last leaf with no sibling. */
+       if (i * 2 + 2 == heap->nr)
+               i = i * 2 + 1;
+
+       /* Backtrack to the correct location. */
+       while (i != pos && func->less(root, data + i * func->elem_size))
+               i = (i - 1) / 2;
+
+       /* Shift the element into its correct place. */
+       j = i;
+       while (i != pos) {
+               i = (i - 1) / 2;
+               func->swp(data + i * func->elem_size, data + j * func->elem_size);
        }
 }