svcrdma: SQ error tracepoints should report completion IDs
authorChuck Lever <chuck.lever@oracle.com>
Mon, 27 Nov 2023 16:33:37 +0000 (11:33 -0500)
committerChuck Lever <chuck.lever@oracle.com>
Sun, 7 Jan 2024 22:54:27 +0000 (17:54 -0500)
Update the Send Queue's error flow tracepoints to report the
completion ID of the waiting or failing context. This ties the
wait/failure to a particular operation or request, which is a little
more useful than knowing only the transport that is about to close.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
include/trace/events/rpcrdma.h
net/sunrpc/xprtrdma/svc_rdma_rw.c
net/sunrpc/xprtrdma/svc_rdma_sendto.c

index b3445e07c15144a1d22ec2acf6dcd7ee6c8a7982..f1c2022d39ca0e0c7ef5ccb9aa643049409e88e5 100644 (file)
@@ -2143,65 +2143,74 @@ TRACE_EVENT(svcrdma_qp_error,
        )
 );
 
-DECLARE_EVENT_CLASS(svcrdma_sendqueue_event,
+DECLARE_EVENT_CLASS(svcrdma_sendqueue_class,
        TP_PROTO(
-               const struct svcxprt_rdma *rdma
+               const struct svcxprt_rdma *rdma,
+               const struct rpc_rdma_cid *cid
        ),
 
-       TP_ARGS(rdma),
+       TP_ARGS(rdma, cid),
 
        TP_STRUCT__entry(
+               __field(u32, cq_id)
+               __field(int, completion_id)
                __field(int, avail)
                __field(int, depth)
-               __string(addr, rdma->sc_xprt.xpt_remotebuf)
        ),
 
        TP_fast_assign(
+               __entry->cq_id = cid->ci_queue_id;
+               __entry->completion_id = cid->ci_completion_id;
                __entry->avail = atomic_read(&rdma->sc_sq_avail);
                __entry->depth = rdma->sc_sq_depth;
-               __assign_str(addr, rdma->sc_xprt.xpt_remotebuf);
        ),
 
-       TP_printk("addr=%s sc_sq_avail=%d/%d",
-               __get_str(addr), __entry->avail, __entry->depth
+       TP_printk("cq.id=%u cid=%d sc_sq_avail=%d/%d",
+               __entry->cq_id, __entry->completion_id,
+               __entry->avail, __entry->depth
        )
 );
 
 #define DEFINE_SQ_EVENT(name)                                          \
-               DEFINE_EVENT(svcrdma_sendqueue_event, svcrdma_sq_##name,\
-                               TP_PROTO(                               \
-                                       const struct svcxprt_rdma *rdma \
-                               ),                                      \
-                               TP_ARGS(rdma))
+               DEFINE_EVENT(svcrdma_sendqueue_class, name,             \
+                       TP_PROTO(                                       \
+                               const struct svcxprt_rdma *rdma,        \
+                               const struct rpc_rdma_cid *cid          \
+                       ),                                              \
+                       TP_ARGS(rdma, cid)                              \
+               )
 
-DEFINE_SQ_EVENT(full);
-DEFINE_SQ_EVENT(retry);
+DEFINE_SQ_EVENT(svcrdma_sq_full);
+DEFINE_SQ_EVENT(svcrdma_sq_retry);
 
 TRACE_EVENT(svcrdma_sq_post_err,
        TP_PROTO(
                const struct svcxprt_rdma *rdma,
+               const struct rpc_rdma_cid *cid,
                int status
        ),
 
-       TP_ARGS(rdma, status),
+       TP_ARGS(rdma, cid, status),
 
        TP_STRUCT__entry(
+               __field(u32, cq_id)
+               __field(int, completion_id)
                __field(int, avail)
                __field(int, depth)
                __field(int, status)
-               __string(addr, rdma->sc_xprt.xpt_remotebuf)
        ),
 
        TP_fast_assign(
+               __entry->cq_id = cid->ci_queue_id;
+               __entry->completion_id = cid->ci_completion_id;
                __entry->avail = atomic_read(&rdma->sc_sq_avail);
                __entry->depth = rdma->sc_sq_depth;
                __entry->status = status;
-               __assign_str(addr, rdma->sc_xprt.xpt_remotebuf);
        ),
 
-       TP_printk("addr=%s sc_sq_avail=%d/%d status=%d",
-               __get_str(addr), __entry->avail, __entry->depth,
-               __entry->status
+       TP_printk("cq.id=%u cid=%d sc_sq_avail=%d/%d status=%d",
+               __entry->cq_id, __entry->completion_id,
+               __entry->avail, __entry->depth, __entry->status
        )
 );
 
index db2a4bd2f7adf1eafbc3f08b49382b54eb18657e..b06e49cc55fbfa833c7572e424fcff9ee5785c61 100644 (file)
@@ -406,14 +406,14 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
                }
 
                percpu_counter_inc(&svcrdma_stat_sq_starve);
-               trace_svcrdma_sq_full(rdma);
+               trace_svcrdma_sq_full(rdma, &cc->cc_cid);
                atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
                wait_event(rdma->sc_send_wait,
                           atomic_read(&rdma->sc_sq_avail) > cc->cc_sqecount);
-               trace_svcrdma_sq_retry(rdma);
+               trace_svcrdma_sq_retry(rdma, &cc->cc_cid);
        } while (1);
 
-       trace_svcrdma_sq_post_err(rdma, ret);
+       trace_svcrdma_sq_post_err(rdma, &cc->cc_cid, ret);
        svc_xprt_deferred_close(&rdma->sc_xprt);
 
        /* If even one was posted, there will be a completion. */
index 31b711deab5e6c52fbf48046936c2d6609aaa72e..2ee691c45b857835082673a9ef9883745846d129 100644 (file)
@@ -343,13 +343,13 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
        while (1) {
                if ((atomic_dec_return(&rdma->sc_sq_avail) < 0)) {
                        percpu_counter_inc(&svcrdma_stat_sq_starve);
-                       trace_svcrdma_sq_full(rdma);
+                       trace_svcrdma_sq_full(rdma, &ctxt->sc_cid);
                        atomic_inc(&rdma->sc_sq_avail);
                        wait_event(rdma->sc_send_wait,
                                   atomic_read(&rdma->sc_sq_avail) > 1);
                        if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags))
                                return -ENOTCONN;
-                       trace_svcrdma_sq_retry(rdma);
+                       trace_svcrdma_sq_retry(rdma, &ctxt->sc_cid);
                        continue;
                }
 
@@ -360,7 +360,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
                return 0;
        }
 
-       trace_svcrdma_sq_post_err(rdma, ret);
+       trace_svcrdma_sq_post_err(rdma, &ctxt->sc_cid, ret);
        svc_xprt_deferred_close(&rdma->sc_xprt);
        wake_up(&rdma->sc_send_wait);
        return ret;