KVM: selftests: Use EPOLL in userfaultfd_util reader threads
authorAnish Moorthy <amoorthy@google.com>
Thu, 15 Feb 2024 23:54:03 +0000 (23:54 +0000)
committerSean Christopherson <seanjc@google.com>
Tue, 9 Apr 2024 21:30:14 +0000 (14:30 -0700)
With multiple reader threads POLLing a single UFFD, the demand paging test
suffers from the thundering herd problem: performance degrades as the
number of reader threads is increased. Solve this issue [1] by switching
the the polling mechanism to EPOLL + EPOLLEXCLUSIVE.

Also, change the error-handling convention of uffd_handler_thread_fn.
Instead of just printing errors and returning early from the polling
loop, check for them via TEST_ASSERT(). "return NULL" is reserved for a
successful exit from uffd_handler_thread_fn, i.e. one triggered by a
write to the exit pipe.

Performance samples generated by the command in [2] are given below.

Num Reader Threads, Paging Rate (POLL), Paging Rate (EPOLL)
1      249k      185k
2      201k      235k
4      186k      155k
16     150k      217k
32     89k       198k

[1] Single-vCPU performance does suffer somewhat.
[2] ./demand_paging_test -u MINOR -s shmem -v 4 -o -r <num readers>

Signed-off-by: Anish Moorthy <amoorthy@google.com>
Acked-by: James Houghton <jthoughton@google.com>
Link: https://lore.kernel.org/r/20240215235405.368539-13-amoorthy@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
tools/testing/selftests/kvm/demand_paging_test.c
tools/testing/selftests/kvm/lib/userfaultfd_util.c

index ca258968f6e115d889e318cbd87817cac5ca37cd..056ff1c873450adb5dc85a85d02398d88c44ee8b 100644 (file)
@@ -13,7 +13,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <time.h>
-#include <poll.h>
 #include <pthread.h>
 #include <linux/userfaultfd.h>
 #include <sys/syscall.h>
index 96a831c27c7f1a38bf1abb774ed6a73c18be30ad..0ba866c4af694f25b860d716f1b2176d2e1037c8 100644 (file)
@@ -16,6 +16,7 @@
 #include <poll.h>
 #include <pthread.h>
 #include <linux/userfaultfd.h>
+#include <sys/epoll.h>
 #include <sys/syscall.h>
 
 #include "kvm_util.h"
@@ -32,60 +33,55 @@ static void *uffd_handler_thread_fn(void *arg)
        int64_t pages = 0;
        struct timespec start;
        struct timespec ts_diff;
+       struct epoll_event evt;
+       int epollfd;
+
+       epollfd = epoll_create(1);
+       TEST_ASSERT(epollfd >= 0, "Failed to create epollfd.");
+
+       evt.events = EPOLLIN | EPOLLEXCLUSIVE;
+       evt.data.u32 = 0;
+       TEST_ASSERT(!epoll_ctl(epollfd, EPOLL_CTL_ADD, uffd, &evt),
+                   "Failed to add uffd to epollfd");
+
+       evt.events = EPOLLIN;
+       evt.data.u32 = 1;
+       TEST_ASSERT(!epoll_ctl(epollfd, EPOLL_CTL_ADD, reader_args->pipe, &evt),
+                   "Failed to add pipe to epollfd");
 
        clock_gettime(CLOCK_MONOTONIC, &start);
        while (1) {
                struct uffd_msg msg;
-               struct pollfd pollfd[2];
-               char tmp_chr;
                int r;
 
-               pollfd[0].fd = uffd;
-               pollfd[0].events = POLLIN;
-               pollfd[1].fd = reader_args->pipe;
-               pollfd[1].events = POLLIN;
-
-               r = poll(pollfd, 2, -1);
-               switch (r) {
-               case -1:
-                       pr_info("poll err");
-                       continue;
-               case 0:
-                       continue;
-               case 1:
-                       break;
-               default:
-                       pr_info("Polling uffd returned %d", r);
-                       return NULL;
-               }
+               r = epoll_wait(epollfd, &evt, 1, -1);
+               TEST_ASSERT(r == 1,
+                           "Unexpected number of events (%d) from epoll, errno = %d",
+                           r, errno);
 
-               if (pollfd[0].revents & POLLERR) {
-                       pr_info("uffd revents has POLLERR");
-                       return NULL;
-               }
+               if (evt.data.u32 == 1) {
+                       char tmp_chr;
 
-               if (pollfd[1].revents & POLLIN) {
-                       r = read(pollfd[1].fd, &tmp_chr, 1);
+                       TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)),
+                                   "Reader thread received EPOLLERR or EPOLLHUP on pipe.");
+                       r = read(reader_args->pipe, &tmp_chr, 1);
                        TEST_ASSERT(r == 1,
-                                   "Error reading pipefd in UFFD thread");
+                                   "Error reading pipefd in uffd reader thread");
                        break;
                }
 
-               if (!(pollfd[0].revents & POLLIN))
-                       continue;
+               TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)),
+                           "Reader thread received EPOLLERR or EPOLLHUP on uffd.");
 
                r = read(uffd, &msg, sizeof(msg));
                if (r == -1) {
-                       if (errno == EAGAIN)
-                               continue;
-                       pr_info("Read of uffd got errno %d\n", errno);
-                       return NULL;
+                       TEST_ASSERT(errno == EAGAIN,
+                                   "Error reading from UFFD: errno = %d", errno);
+                       continue;
                }
 
-               if (r != sizeof(msg)) {
-                       pr_info("Read on uffd returned unexpected size: %d bytes", r);
-                       return NULL;
-               }
+               TEST_ASSERT(r == sizeof(msg),
+                           "Read on uffd returned unexpected number of bytes (%d)", r);
 
                if (!(msg.event & UFFD_EVENT_PAGEFAULT))
                        continue;
@@ -93,8 +89,8 @@ static void *uffd_handler_thread_fn(void *arg)
                if (reader_args->delay)
                        usleep(reader_args->delay);
                r = reader_args->handler(reader_args->uffd_mode, uffd, &msg);
-               if (r < 0)
-                       return NULL;
+               TEST_ASSERT(r >= 0,
+                           "Reader thread handler fn returned negative value %d", r);
                pages++;
        }