net/smc: don't send CDC/LLC message if link not ready
authorDust Li <dust.li@linux.alibaba.com>
Tue, 28 Dec 2021 09:03:24 +0000 (17:03 +0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 5 Jan 2022 11:42:36 +0000 (12:42 +0100)
[ Upstream commit 90cee52f2e780345d3629e278291aea5ac74f40f ]

We found smc_llc_send_link_delete_all() sometimes wait
for 2s timeout when testing with RDMA link up/down.
It is possible when a smc_link is in ACTIVATING state,
the underlaying QP is still in RESET or RTR state, which
cannot send any messages out.

smc_llc_send_link_delete_all() use smc_link_usable() to
checks whether the link is usable, if the QP is still in
RESET or RTR state, but the smc_link is in ACTIVATING, this
LLC message will always fail without any CQE entering the
CQ, and we will always wait 2s before timeout.

Since we cannot send any messages through the QP before
the QP enter RTS. I add a wrapper smc_link_sendable()
which checks the state of QP along with the link state.
And replace smc_link_usable() with smc_link_sendable()
in all LLC & CDC message sending routine.

Fixes: 5f08318f617b ("smc: connection data control (CDC)")
Signed-off-by: Dust Li <dust.li@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
net/smc/smc_core.c
net/smc/smc_core.h
net/smc/smc_llc.c
net/smc/smc_wr.c
net/smc/smc_wr.h

index 5a9c22ee75fa4bbb22690af2ffdb03239e836209..cb06568cf422fa53dcbccf87d05c48cac0ef0749 100644 (file)
@@ -604,7 +604,7 @@ static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
        for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
                struct smc_link *lnk = &lgr->lnk[i];
 
-               if (smc_link_usable(lnk))
+               if (smc_link_sendable(lnk))
                        lnk->state = SMC_LNK_INACTIVE;
        }
        wake_up_all(&lgr->llc_msg_waiter);
index c043ecdca5c445efe565acabc1ecd69adf90eaf2..51a3e8248ade2c6b1d0a8a1755f38a41a8e5c0e4 100644 (file)
@@ -366,6 +366,12 @@ static inline bool smc_link_usable(struct smc_link *lnk)
        return true;
 }
 
+static inline bool smc_link_sendable(struct smc_link *lnk)
+{
+       return smc_link_usable(lnk) &&
+               lnk->qp_attr.cur_qp_state == IB_QPS_RTS;
+}
+
 static inline bool smc_link_active(struct smc_link *lnk)
 {
        return lnk->state == SMC_LNK_ACTIVE;
index f1d323439a2af3658e7997cc0e8e62319f719100..ee1f0fdba08558690b5f20e8756e9ab83f2138d9 100644 (file)
@@ -1358,7 +1358,7 @@ void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn)
        delllc.reason = htonl(rsn);
 
        for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
-               if (!smc_link_usable(&lgr->lnk[i]))
+               if (!smc_link_sendable(&lgr->lnk[i]))
                        continue;
                if (!smc_llc_send_message_wait(&lgr->lnk[i], &delllc))
                        break;
index c9cd7a4c5acfc714039eae28a6d51b18dc8dfb9f..fcc19420017600ac9b4b46e026ae33dfe2f10b03 100644 (file)
@@ -169,7 +169,7 @@ void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context)
 static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx)
 {
        *idx = link->wr_tx_cnt;
-       if (!smc_link_usable(link))
+       if (!smc_link_sendable(link))
                return -ENOLINK;
        for_each_clear_bit(*idx, link->wr_tx_mask, link->wr_tx_cnt) {
                if (!test_and_set_bit(*idx, link->wr_tx_mask))
@@ -212,7 +212,7 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
        } else {
                rc = wait_event_interruptible_timeout(
                        link->wr_tx_wait,
-                       !smc_link_usable(link) ||
+                       !smc_link_sendable(link) ||
                        lgr->terminating ||
                        (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
                        SMC_WR_TX_WAIT_FREE_SLOT_TIME);
index 2bc626f230a56dca33fb5617972443c8aecf98ee..102d515757ee2633f9d298a08f5de4268cccd533 100644 (file)
@@ -62,7 +62,7 @@ static inline void smc_wr_tx_set_wr_id(atomic_long_t *wr_tx_id, long val)
 
 static inline bool smc_wr_tx_link_hold(struct smc_link *link)
 {
-       if (!smc_link_usable(link))
+       if (!smc_link_sendable(link))
                return false;
        atomic_inc(&link->wr_tx_refcnt);
        return true;