From: Stefan Hajnoczi Date: Thu, 5 Mar 2020 17:08:01 +0000 (+0000) Subject: aio-posix: move RCU_READ_LOCK() into run_poll_handlers() X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=3aa221b382c9b36db1750ef5ed340b6566aacb8c;p=qemu.git aio-posix: move RCU_READ_LOCK() into run_poll_handlers() Now that run_poll_handlers_once() is only called by run_poll_handlers() we can improve the CPU time profile by moving the expensive RCU_READ_LOCK() out of the polling loop. This reduces the run_poll_handlers() from 40% CPU to 10% CPU in perf's sampling profiler output. Signed-off-by: Stefan Hajnoczi Link: https://lore.kernel.org/r/20200305170806.1313245-3-stefanha@redhat.com Message-Id: <20200305170806.1313245-3-stefanha@redhat.com> --- diff --git a/util/aio-posix.c b/util/aio-posix.c index 65964a2597..11a4971955 100644 --- a/util/aio-posix.c +++ b/util/aio-posix.c @@ -583,16 +583,6 @@ static bool run_poll_handlers_once(AioContext *ctx, int64_t *timeout) bool progress = false; AioHandler *node; - /* - * Optimization: ->io_poll() handlers often contain RCU read critical - * sections and we therefore see many rcu_read_lock() -> rcu_read_unlock() - * -> rcu_read_lock() -> ... sequences with expensive memory - * synchronization primitives. Make the entire polling loop an RCU - * critical section because nested rcu_read_lock()/rcu_read_unlock() calls - * are cheap. - */ - RCU_READ_LOCK_GUARD(); - QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) { if (!QLIST_IS_INSERTED(node, node_deleted) && node->io_poll && aio_node_check(ctx, node->is_external) && @@ -636,6 +626,16 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout) trace_run_poll_handlers_begin(ctx, max_ns, *timeout); + /* + * Optimization: ->io_poll() handlers often contain RCU read critical + * sections and we therefore see many rcu_read_lock() -> rcu_read_unlock() + * -> rcu_read_lock() -> ... sequences with expensive memory + * synchronization primitives. Make the entire polling loop an RCU + * critical section because nested rcu_read_lock()/rcu_read_unlock() calls + * are cheap. + */ + RCU_READ_LOCK_GUARD(); + start_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); do { progress = run_poll_handlers_once(ctx, timeout);