io_uring: dump sqe contents if issue fails
authorJens Axboe <axboe@kernel.dk>
Sat, 11 Sep 2021 22:04:50 +0000 (16:04 -0600)
committerJens Axboe <axboe@kernel.dk>
Tue, 19 Oct 2021 11:49:52 +0000 (05:49 -0600)
I recently had to look at a production problem where a request ended
up getting the dreaded -EINVAL error on submit. The most used and
hence useless of error codes, as it just tells you that something
was wrong with your request, but not more than that.

Let's dump the full sqe contents if we run into an issue failure,
that'll allow easier diagnosing of a wide variety of issues.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
fs/io_uring.c
include/trace/events/io_uring.h

index d4631a55a692058fe11748b234aa0a01a6ec0496..5daee36c1f33d1cbc616daf6bbd3c0eaf9a42acb 100644 (file)
@@ -7117,6 +7117,8 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
        ret = io_init_req(ctx, req, sqe);
        if (unlikely(ret)) {
 fail_req:
+               trace_io_uring_req_failed(sqe, ret);
+
                /* fail even hard links since we don't submit */
                if (link->head) {
                        /*
index 0dd30de00e5b44ce0c0d8090299fc1ac8a22fa15..7346f0164cf49d25df22acff1648555240f4e717 100644 (file)
@@ -6,6 +6,7 @@
 #define _TRACE_IO_URING_H
 
 #include <linux/tracepoint.h>
+#include <uapi/linux/io_uring.h>
 
 struct io_wq_work;
 
@@ -497,6 +498,66 @@ TRACE_EVENT(io_uring_task_run,
                  (unsigned long long) __entry->user_data)
 );
 
+/*
+ * io_uring_req_failed - called when an sqe is errored dring submission
+ *
+ * @sqe:               pointer to the io_uring_sqe that failed
+ * @error:             error it failed with
+ *
+ * Allows easier diagnosing of malformed requests in production systems.
+ */
+TRACE_EVENT(io_uring_req_failed,
+
+       TP_PROTO(const struct io_uring_sqe *sqe, int error),
+
+       TP_ARGS(sqe, error),
+
+       TP_STRUCT__entry (
+               __field(  u8,   opcode )
+               __field(  u8,   flags )
+               __field(  u8,   ioprio )
+               __field( u64,   off )
+               __field( u64,   addr )
+               __field( u32,   len )
+               __field( u32,   op_flags )
+               __field( u64,   user_data )
+               __field( u16,   buf_index )
+               __field( u16,   personality )
+               __field( u32,   file_index )
+               __field( u64,   pad1 )
+               __field( u64,   pad2 )
+               __field( int,   error )
+       ),
+
+       TP_fast_assign(
+               __entry->opcode         = sqe->opcode;
+               __entry->flags          = sqe->flags;
+               __entry->ioprio         = sqe->ioprio;
+               __entry->off            = sqe->off;
+               __entry->addr           = sqe->addr;
+               __entry->len            = sqe->len;
+               __entry->op_flags       = sqe->rw_flags;
+               __entry->user_data      = sqe->user_data;
+               __entry->buf_index      = sqe->buf_index;
+               __entry->personality    = sqe->personality;
+               __entry->file_index     = sqe->file_index;
+               __entry->pad1           = sqe->__pad2[0];
+               __entry->pad2           = sqe->__pad2[1];
+               __entry->error          = error;
+       ),
+
+       TP_printk("op %d, flags=0x%x, prio=%d, off=%llu, addr=%llu, "
+                 "len=%u, rw_flags=0x%x, user_data=0x%llx, buf_index=%d, "
+                 "personality=%d, file_index=%d, pad=0x%llx/%llx, error=%d",
+                 __entry->opcode, __entry->flags, __entry->ioprio,
+                 (unsigned long long)__entry->off,
+                 (unsigned long long) __entry->addr, __entry->len,
+                 __entry->op_flags, (unsigned long long) __entry->user_data,
+                 __entry->buf_index, __entry->personality, __entry->file_index,
+                 (unsigned long long) __entry->pad1,
+                 (unsigned long long) __entry->pad2, __entry->error)
+);
+
 #endif /* _TRACE_IO_URING_H */
 
 /* This part must be outside protection */