Currently the rxe driver has three work queue tasks per qp. These are the
req.task, comp.task and resp.task which call rxe_requester(),
rxe_completer() and rxe_responder() respectively directly or on work
queues. Each of these subroutines checks to see if there is work to be
performed on the send queue or on the response packet queue or the request
packet queue and will run until there is no work remaining or yield the
cpu and reschedule itself until there is no work remaining.
This commit combines the req.task and comp.task into a single send.task
and renames the resp.task to the recv.task. The combined send.task calls
rxe_requester() and rxe_completer() serially and continues until all work
on both the send queue and the response packet queue are done.
In various benchmarks the performance is either improved or left the
same. At high scale there is a significant reduction in the load on the
cpu.
This is the first step in combining these two tasks. Once they are
serialized cross rescheduling of req.task and comp.task can be more
efficiently handled by just letting the send.task continue to run. This
will be done in the next several patches.
Link: https://lore.kernel.org/r/20240329145513.35381-7-rpearsonhpe@gmail.com
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
spin_lock_irqsave(&qp->state_lock, flags);
if (qp->valid) {
qp->comp.timeout = 1;
- rxe_sched_task(&qp->comp.task);
+ rxe_sched_task(&qp->send_task);
}
spin_unlock_irqrestore(&qp->state_lock, flags);
}
must_sched = skb_queue_len(&qp->resp_pkts) > 0;
if (must_sched != 0)
- rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_COMPLETER_SCHED);
+ rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_SENDER_SCHED);
skb_queue_tail(&qp->resp_pkts, skb);
if (must_sched)
- rxe_sched_task(&qp->comp.task);
+ rxe_sched_task(&qp->send_task);
else
- rxe_run_task(&qp->comp.task);
+ rxe_run_task(&qp->send_task);
}
static inline enum comp_state get_wqe(struct rxe_qp *qp,
qp->comp.psn = pkt->psn;
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
- rxe_sched_task(&qp->req.task);
+ rxe_sched_task(&qp->send_task);
}
}
return COMPST_ERROR_RETRY;
*/
if (qp->req.wait_fence) {
qp->req.wait_fence = 0;
- rxe_sched_task(&qp->req.task);
+ rxe_sched_task(&qp->send_task);
}
}
if (qp->req.need_rd_atomic) {
qp->comp.timeout_retry = 0;
qp->req.need_rd_atomic = 0;
- rxe_sched_task(&qp->req.task);
+ rxe_sched_task(&qp->send_task);
}
}
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
- rxe_sched_task(&qp->req.task);
+ rxe_sched_task(&qp->send_task);
}
}
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
- rxe_sched_task(&qp->req.task);
+ rxe_sched_task(&qp->send_task);
}
state = COMPST_DONE;
RXE_CNT_COMP_RETRY);
qp->req.need_retry = 1;
qp->comp.started_retry = 1;
- rxe_sched_task(&qp->req.task);
+ rxe_sched_task(&qp->send_task);
}
goto done;
[RXE_CNT_RCV_RNR].name = "rcvd_rnr_err",
[RXE_CNT_SND_RNR].name = "send_rnr_err",
[RXE_CNT_RCV_SEQ_ERR].name = "rcvd_seq_err",
- [RXE_CNT_COMPLETER_SCHED].name = "ack_deferred",
+ [RXE_CNT_SENDER_SCHED].name = "ack_deferred",
[RXE_CNT_RETRY_EXCEEDED].name = "retry_exceeded_err",
[RXE_CNT_RNR_RETRY_EXCEEDED].name = "retry_rnr_exceeded_err",
[RXE_CNT_COMP_RETRY].name = "completer_retry_err",
RXE_CNT_RCV_RNR,
RXE_CNT_SND_RNR,
RXE_CNT_RCV_SEQ_ERR,
- RXE_CNT_COMPLETER_SCHED,
+ RXE_CNT_SENDER_SCHED,
RXE_CNT_RETRY_EXCEEDED,
RXE_CNT_RNR_RETRY_EXCEEDED,
RXE_CNT_COMP_RETRY,
int rxe_completer(struct rxe_qp *qp);
int rxe_requester(struct rxe_qp *qp);
-int rxe_responder(struct rxe_qp *qp);
+int rxe_sender(struct rxe_qp *qp);
+int rxe_receiver(struct rxe_qp *qp);
/* rxe_icrc.c */
int rxe_icrc_init(struct rxe_dev *rxe);
if (unlikely(qp->need_req_skb &&
skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW))
- rxe_sched_task(&qp->req.task);
+ rxe_sched_task(&qp->send_task);
rxe_put(qp);
}
if ((qp_type(qp) != IB_QPT_RC) &&
(pkt->mask & RXE_END_MASK)) {
pkt->wqe->state = wqe_state_done;
- rxe_sched_task(&qp->comp.task);
+ rxe_sched_task(&qp->send_task);
}
rxe_counter_inc(rxe, RXE_CNT_SENT_PKTS);
qp->req.opcode = -1;
qp->comp.opcode = -1;
- rxe_init_task(&qp->req.task, qp, rxe_requester);
- rxe_init_task(&qp->comp.task, qp, rxe_completer);
+ rxe_init_task(&qp->send_task, qp, rxe_sender);
qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */
if (init->qp_type == IB_QPT_RC) {
return err;
}
- rxe_init_task(&qp->resp.task, qp, rxe_responder);
+ rxe_init_task(&qp->recv_task, qp, rxe_receiver);
qp->resp.opcode = OPCODE_NONE;
qp->resp.msn = 0;
static void rxe_qp_reset(struct rxe_qp *qp)
{
/* stop tasks from running */
- rxe_disable_task(&qp->resp.task);
- rxe_disable_task(&qp->comp.task);
- rxe_disable_task(&qp->req.task);
+ rxe_disable_task(&qp->recv_task);
+ rxe_disable_task(&qp->send_task);
/* drain work and packet queuesc */
- rxe_requester(qp);
- rxe_completer(qp);
- rxe_responder(qp);
+ rxe_sender(qp);
+ rxe_receiver(qp);
if (qp->rq.queue)
rxe_queue_reset(qp->rq.queue);
cleanup_rd_atomic_resources(qp);
/* reenable tasks */
- rxe_enable_task(&qp->resp.task);
- rxe_enable_task(&qp->comp.task);
- rxe_enable_task(&qp->req.task);
+ rxe_enable_task(&qp->recv_task);
+ rxe_enable_task(&qp->send_task);
}
/* move the qp to the error state */
qp->attr.qp_state = IB_QPS_ERR;
/* drain work and packet queues */
- rxe_sched_task(&qp->resp.task);
- rxe_sched_task(&qp->comp.task);
- rxe_sched_task(&qp->req.task);
+ rxe_sched_task(&qp->recv_task);
+ rxe_sched_task(&qp->send_task);
spin_unlock_irqrestore(&qp->state_lock, flags);
}
spin_lock_irqsave(&qp->state_lock, flags);
qp->attr.sq_draining = 1;
- rxe_sched_task(&qp->comp.task);
- rxe_sched_task(&qp->req.task);
+ rxe_sched_task(&qp->send_task);
spin_unlock_irqrestore(&qp->state_lock, flags);
}
del_timer_sync(&qp->rnr_nak_timer);
}
- if (qp->resp.task.func)
- rxe_cleanup_task(&qp->resp.task);
+ if (qp->recv_task.func)
+ rxe_cleanup_task(&qp->recv_task);
- if (qp->req.task.func)
- rxe_cleanup_task(&qp->req.task);
-
- if (qp->comp.task.func)
- rxe_cleanup_task(&qp->comp.task);
+ if (qp->send_task.func)
+ rxe_cleanup_task(&qp->send_task);
/* flush out any receive wr's or pending requests */
- rxe_requester(qp);
- rxe_completer(qp);
- rxe_responder(qp);
+ rxe_sender(qp);
+ rxe_receiver(qp);
if (qp->sq.queue)
rxe_queue_cleanup(qp->sq.queue);
/* request a send queue retry */
qp->req.need_retry = 1;
qp->req.wait_for_rnr_timer = 0;
- rxe_sched_task(&qp->req.task);
+ rxe_sched_task(&qp->send_task);
}
spin_unlock_irqrestore(&qp->state_lock, flags);
}
* which can lead to a deadlock. So go ahead and complete
* it now.
*/
- rxe_sched_task(&qp->comp.task);
+ rxe_sched_task(&qp->send_task);
return 0;
}
qp->req.wqe_index);
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
- rxe_sched_task(&qp->comp.task);
+ rxe_sched_task(&qp->send_task);
goto done;
}
payload = mtu;
*/
qp->need_req_skb = 1;
- rxe_sched_task(&qp->req.task);
+ rxe_sched_task(&qp->send_task);
goto exit;
}
out:
return ret;
}
+
+int rxe_sender(struct rxe_qp *qp)
+{
+ int req_ret;
+ int comp_ret;
+
+ /* process the send queue */
+ req_ret = rxe_requester(qp);
+
+ /* process the response queue */
+ comp_ret = rxe_completer(qp);
+
+ /* exit the task loop if both requester and completer
+ * are ready
+ */
+ return (req_ret && comp_ret) ? -EAGAIN : 0;
+}
(skb_queue_len(&qp->req_pkts) > 1);
if (must_sched)
- rxe_sched_task(&qp->resp.task);
+ rxe_sched_task(&qp->recv_task);
else
- rxe_run_task(&qp->resp.task);
+ rxe_run_task(&qp->recv_task);
}
static inline enum resp_states get_req(struct rxe_qp *qp,
qp->resp.wqe = NULL;
}
-int rxe_responder(struct rxe_qp *qp)
+int rxe_receiver(struct rxe_qp *qp)
{
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
enum resp_states state;
/* kickoff processing of any posted wqes */
if (good)
- rxe_sched_task(&qp->req.task);
+ rxe_sched_task(&qp->send_task);
return err;
}
if (qp->is_user) {
/* Utilize process context to do protocol processing */
- rxe_run_task(&qp->req.task);
+ rxe_run_task(&qp->send_task);
} else {
err = rxe_post_send_kernel(qp, wr, bad_wr);
if (err)
spin_lock_irqsave(&qp->state_lock, flags);
if (qp_state(qp) == IB_QPS_ERR)
- rxe_sched_task(&qp->resp.task);
+ rxe_sched_task(&qp->recv_task);
spin_unlock_irqrestore(&qp->state_lock, flags);
return err;
int need_retry;
int wait_for_rnr_timer;
int noack_pkts;
- struct rxe_task task;
};
struct rxe_comp_info {
int started_retry;
u32 retry_cnt;
u32 rnr_retry;
- struct rxe_task task;
};
enum rdatm_res_state {
unsigned int res_head;
unsigned int res_tail;
struct resp_res *res;
- struct rxe_task task;
};
struct rxe_qp {
struct sk_buff_head req_pkts;
struct sk_buff_head resp_pkts;
+ struct rxe_task send_task;
+ struct rxe_task recv_task;
+
struct rxe_req_info req;
struct rxe_comp_info comp;
struct rxe_resp_info resp;