io_uring: re-arrange struct io_ring_ctx to reduce padding
authorJens Axboe <axboe@kernel.dk>
Thu, 8 Feb 2024 16:28:52 +0000 (09:28 -0700)
committerJens Axboe <axboe@kernel.dk>
Thu, 8 Feb 2024 20:27:06 +0000 (13:27 -0700)
Nothing major here, just moving a few things around to reduce the
padding. This reduces the size on a non-debug kernel from 1536 to
1472 bytes, saving a full cacheline.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
include/linux/io_uring_types.h

index 4ddc7b3168f3d4481aa4b2476d1de4ddfaf30a13..addfcc74d851716cc2ddf39d26fb250ccc113652 100644 (file)
@@ -240,6 +240,7 @@ struct io_ring_ctx {
                unsigned int            poll_activated: 1;
                unsigned int            drain_disabled: 1;
                unsigned int            compat: 1;
+               unsigned int            iowq_limits_set : 1;
 
                struct task_struct      *submitter_task;
                struct io_rings         *rings;
@@ -274,10 +275,20 @@ struct io_ring_ctx {
                 */
                struct io_rsrc_node     *rsrc_node;
                atomic_t                cancel_seq;
+
+               /*
+                * ->iopoll_list is protected by the ctx->uring_lock for
+                * io_uring instances that don't use IORING_SETUP_SQPOLL.
+                * For SQPOLL, only the single threaded io_sq_thread() will
+                * manipulate the list, hence no extra locking is needed there.
+                */
+               bool                    poll_multi_queue;
+               struct io_wq_work_list  iopoll_list;
+
                struct io_file_table    file_table;
+               struct io_mapped_ubuf   **user_bufs;
                unsigned                nr_user_files;
                unsigned                nr_user_bufs;
-               struct io_mapped_ubuf   **user_bufs;
 
                struct io_submit_state  submit_state;
 
@@ -288,15 +299,6 @@ struct io_ring_ctx {
                struct io_alloc_cache   apoll_cache;
                struct io_alloc_cache   netmsg_cache;
 
-               /*
-                * ->iopoll_list is protected by the ctx->uring_lock for
-                * io_uring instances that don't use IORING_SETUP_SQPOLL.
-                * For SQPOLL, only the single threaded io_sq_thread() will
-                * manipulate the list, hence no extra locking is needed there.
-                */
-               struct io_wq_work_list  iopoll_list;
-               bool                    poll_multi_queue;
-
                /*
                 * Any cancelable uring_cmd is added to this list in
                 * ->uring_cmd() by io_uring_cmd_insert_cancelable()
@@ -343,8 +345,8 @@ struct io_ring_ctx {
        spinlock_t              completion_lock;
 
        /* IRQ completion list, under ->completion_lock */
-       struct io_wq_work_list  locked_free_list;
        unsigned int            locked_free_nr;
+       struct io_wq_work_list  locked_free_list;
 
        struct list_head        io_buffers_comp;
        struct list_head        cq_overflow_list;
@@ -366,9 +368,6 @@ struct io_ring_ctx {
        unsigned int            file_alloc_start;
        unsigned int            file_alloc_end;
 
-       struct xarray           personalities;
-       u32                     pers_next;
-
        struct list_head        io_buffers_cache;
 
        /* deferred free list, protected by ->uring_lock */
@@ -389,6 +388,9 @@ struct io_ring_ctx {
        struct wait_queue_head          rsrc_quiesce_wq;
        unsigned                        rsrc_quiesce;
 
+       u32                     pers_next;
+       struct xarray           personalities;
+
        /* hashed buffered write serialization */
        struct io_wq_hash               *hash_map;
 
@@ -405,7 +407,6 @@ struct io_ring_ctx {
 
        /* io-wq management, e.g. thread count */
        u32                             iowq_limits[2];
-       bool                            iowq_limits_set;
 
        struct callback_head            poll_wq_task_work;
        struct list_head                defer_list;