RDMA/erdma: Notify the latest PI to FW for reflushing when necessary
authorCheng Xu <chengyou@linux.alibaba.com>
Wed, 16 Nov 2022 02:31:07 +0000 (10:31 +0800)
committerJason Gunthorpe <jgg@nvidia.com>
Thu, 24 Nov 2022 18:58:52 +0000 (14:58 -0400)
Firmware is responsible for flushing WRs in HW, and it's a little
difficult for firmware to get the latest PI of QPs, especially for RQs
after QP state being changed to ERROR. So we introduce a new CMDQ command,
by which driver can notify to latest PI to FW, and then FW can flush all
posted WRs.

Link: https://lore.kernel.org/r/20221116023107.82835-4-chengyou@linux.alibaba.com
Signed-off-by: Cheng Xu <chengyou@linux.alibaba.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
drivers/infiniband/hw/erdma/erdma_qp.c
drivers/infiniband/hw/erdma/erdma_verbs.h

index 521e97258de77dc61586e7c8fd68e3a96c9f1c6d..d088d6bef431afa8c6936219dd2f60a6ce46942e 100644 (file)
@@ -120,6 +120,7 @@ static int erdma_modify_qp_state_to_stop(struct erdma_qp *qp,
 int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
                             enum erdma_qp_attr_mask mask)
 {
+       bool need_reflush = false;
        int drop_conn, ret = 0;
 
        if (!mask)
@@ -135,6 +136,7 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
                        ret = erdma_modify_qp_state_to_rts(qp, attrs, mask);
                } else if (attrs->state == ERDMA_QP_STATE_ERROR) {
                        qp->attrs.state = ERDMA_QP_STATE_ERROR;
+                       need_reflush = true;
                        if (qp->cep) {
                                erdma_cep_put(qp->cep);
                                qp->cep = NULL;
@@ -145,17 +147,12 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
        case ERDMA_QP_STATE_RTS:
                drop_conn = 0;
 
-               if (attrs->state == ERDMA_QP_STATE_CLOSING) {
+               if (attrs->state == ERDMA_QP_STATE_CLOSING ||
+                   attrs->state == ERDMA_QP_STATE_TERMINATE ||
+                   attrs->state == ERDMA_QP_STATE_ERROR) {
                        ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
                        drop_conn = 1;
-               } else if (attrs->state == ERDMA_QP_STATE_TERMINATE) {
-                       qp->attrs.state = ERDMA_QP_STATE_TERMINATE;
-                       ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
-                       drop_conn = 1;
-               } else if (attrs->state == ERDMA_QP_STATE_ERROR) {
-                       ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
-                       qp->attrs.state = ERDMA_QP_STATE_ERROR;
-                       drop_conn = 1;
+                       need_reflush = true;
                }
 
                if (drop_conn)
@@ -180,6 +177,12 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
                break;
        }
 
+       if (need_reflush && !ret && rdma_is_kernel_res(&qp->ibqp.res)) {
+               qp->flags |= ERDMA_QP_IN_FLUSHING;
+               mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork,
+                                usecs_to_jiffies(100));
+       }
+
        return ret;
 }
 
@@ -527,6 +530,10 @@ int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr,
        }
        spin_unlock_irqrestore(&qp->lock, flags);
 
+       if (unlikely(qp->flags & ERDMA_QP_IN_FLUSHING))
+               mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork,
+                                usecs_to_jiffies(100));
+
        return ret;
 }
 
@@ -580,5 +587,10 @@ int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr,
        }
 
        spin_unlock_irqrestore(&qp->lock, flags);
+
+       if (unlikely(qp->flags & ERDMA_QP_IN_FLUSHING))
+               mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork,
+                                usecs_to_jiffies(100));
+
        return ret;
 }
index 9f341d032069bda0258a208c7fff98085763c4a4..e0a993bc032a44aaa76e399ead33ccf8f6f0734e 100644 (file)
@@ -173,6 +173,10 @@ enum erdma_qp_attr_mask {
        ERDMA_QP_ATTR_MPA = (1 << 7)
 };
 
+enum erdma_qp_flags {
+       ERDMA_QP_IN_FLUSHING = (1 << 0),
+};
+
 struct erdma_qp_attrs {
        enum erdma_qp_state state;
        enum erdma_cc_alg cc; /* Congestion control algorithm */
@@ -197,6 +201,7 @@ struct erdma_qp {
        struct erdma_cep *cep;
        struct rw_semaphore state_lock;
 
+       unsigned long flags;
        struct delayed_work reflush_dwork;
 
        union {