nvme-fabrics: short-circuit reconnect retries
authorHannes Reinecke <hare@suse.de>
Tue, 30 Apr 2024 13:19:27 +0000 (15:19 +0200)
committerKeith Busch <kbusch@kernel.org>
Wed, 1 May 2024 10:07:20 +0000 (03:07 -0700)
Returning a nvme status from nvme_tcp_setup_ctrl() indicates that the
association was established and we have received a status from the
controller; consequently we should honour the DNR bit. If not any future
reconnect attempts will just return the same error, so we can
short-circuit the reconnect attempts and fail the connection directly.

Signed-off-by: Hannes Reinecke <hare@suse.de>
[dwagner: - extended nvme_should_reconnect]
Signed-off-by: Daniel Wagner <dwagner@suse.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
drivers/nvme/host/fabrics.c
drivers/nvme/host/fabrics.h
drivers/nvme/host/fc.c
drivers/nvme/host/rdma.c
drivers/nvme/host/tcp.c

index f7eaf9580b4f40aebcd0ad2f989252288c86d683..36d3e2ff27f3f31638b4e832413d771d8e2159f1 100644 (file)
@@ -559,8 +559,20 @@ out_free_data:
 }
 EXPORT_SYMBOL_GPL(nvmf_connect_io_queue);
 
-bool nvmf_should_reconnect(struct nvme_ctrl *ctrl)
+/*
+ * Evaluate the status information returned by the transport in order to decided
+ * if a reconnect attempt should be scheduled.
+ *
+ * Do not retry when:
+ *
+ * - the DNR bit is set and the specification states no further connect
+ *   attempts with the same set of paramenters should be attempted.
+ */
+bool nvmf_should_reconnect(struct nvme_ctrl *ctrl, int status)
 {
+       if (status > 0 && (status & NVME_SC_DNR))
+               return false;
+
        if (ctrl->opts->max_reconnects == -1 ||
            ctrl->nr_reconnects < ctrl->opts->max_reconnects)
                return true;
index 37c974c38dcb077a5018c728c8f33ffc421e53b9..602135910ae9cc40f9ae24fa6204314f978fdbfe 100644 (file)
@@ -223,7 +223,7 @@ int nvmf_register_transport(struct nvmf_transport_ops *ops);
 void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
 void nvmf_free_options(struct nvmf_ctrl_options *opts);
 int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
-bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
+bool nvmf_should_reconnect(struct nvme_ctrl *ctrl, int status);
 bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
                struct nvmf_ctrl_options *opts);
 void nvmf_set_io_queues(struct nvmf_ctrl_options *opts, u32 nr_io_queues,
index 68a5d971657bb5080f717f5ae1ec5645830aadd5..b330a6a7b63ae263db1dfbcc621edac638f1621d 100644 (file)
@@ -3310,12 +3310,10 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
                dev_info(ctrl->ctrl.device,
                        "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
                        ctrl->cnum, status);
-               if (status > 0 && (status & NVME_SC_DNR))
-                       recon = false;
        } else if (time_after_eq(jiffies, rport->dev_loss_end))
                recon = false;
 
-       if (recon && nvmf_should_reconnect(&ctrl->ctrl)) {
+       if (recon && nvmf_should_reconnect(&ctrl->ctrl, status)) {
                if (portptr->port_state == FC_OBJSTATE_ONLINE)
                        dev_info(ctrl->ctrl.device,
                                "NVME-FC{%d}: Reconnect attempt in %ld "
index 366f0bb4ebfc1d9757aad5bdce2287785142403a..821ab3e0fd3b1cc75868a2cb8ef4cd37d8f0441b 100644 (file)
@@ -982,7 +982,8 @@ free_ctrl:
        kfree(ctrl);
 }
 
-static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
+static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl,
+                                         int status)
 {
        enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl);
 
@@ -992,7 +993,7 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
                return;
        }
 
-       if (nvmf_should_reconnect(&ctrl->ctrl)) {
+       if (nvmf_should_reconnect(&ctrl->ctrl, status)) {
                dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n",
                        ctrl->ctrl.opts->reconnect_delay);
                queue_delayed_work(nvme_wq, &ctrl->reconnect_work,
@@ -1104,10 +1105,12 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 {
        struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
                        struct nvme_rdma_ctrl, reconnect_work);
+       int ret;
 
        ++ctrl->ctrl.nr_reconnects;
 
-       if (nvme_rdma_setup_ctrl(ctrl, false))
+       ret = nvme_rdma_setup_ctrl(ctrl, false);
+       if (ret)
                goto requeue;
 
        dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
@@ -1120,7 +1123,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 requeue:
        dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
                        ctrl->ctrl.nr_reconnects);
-       nvme_rdma_reconnect_or_remove(ctrl);
+       nvme_rdma_reconnect_or_remove(ctrl, ret);
 }
 
 static void nvme_rdma_error_recovery_work(struct work_struct *work)
@@ -1145,7 +1148,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
                return;
        }
 
-       nvme_rdma_reconnect_or_remove(ctrl);
+       nvme_rdma_reconnect_or_remove(ctrl, 0);
 }
 
 static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
@@ -2169,6 +2172,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 {
        struct nvme_rdma_ctrl *ctrl =
                container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
+       int ret;
 
        nvme_stop_ctrl(&ctrl->ctrl);
        nvme_rdma_shutdown_ctrl(ctrl, false);
@@ -2179,14 +2183,15 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
                return;
        }
 
-       if (nvme_rdma_setup_ctrl(ctrl, false))
+       ret = nvme_rdma_setup_ctrl(ctrl, false);
+       if (ret)
                goto out_fail;
 
        return;
 
 out_fail:
        ++ctrl->ctrl.nr_reconnects;
-       nvme_rdma_reconnect_or_remove(ctrl);
+       nvme_rdma_reconnect_or_remove(ctrl, ret);
 }
 
 static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
index fdbcdcedcee99f064cc7258d22b7fe737d285eda..3e0c3332332049e18a6148c1c7de4d33b649e751 100644 (file)
@@ -2155,7 +2155,8 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
        nvme_tcp_destroy_io_queues(ctrl, remove);
 }
 
-static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
+static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl,
+               int status)
 {
        enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
 
@@ -2165,13 +2166,14 @@ static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
                return;
        }
 
-       if (nvmf_should_reconnect(ctrl)) {
+       if (nvmf_should_reconnect(ctrl, status)) {
                dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
                        ctrl->opts->reconnect_delay);
                queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work,
                                ctrl->opts->reconnect_delay * HZ);
        } else {
-               dev_info(ctrl->device, "Removing controller...\n");
+               dev_info(ctrl->device, "Removing controller (%d)...\n",
+                        status);
                nvme_delete_ctrl(ctrl);
        }
 }
@@ -2252,10 +2254,12 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
        struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work),
                        struct nvme_tcp_ctrl, connect_work);
        struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
+       int ret;
 
        ++ctrl->nr_reconnects;
 
-       if (nvme_tcp_setup_ctrl(ctrl, false))
+       ret = nvme_tcp_setup_ctrl(ctrl, false);
+       if (ret)
                goto requeue;
 
        dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n",
@@ -2268,7 +2272,7 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
 requeue:
        dev_info(ctrl->device, "Failed reconnect attempt %d\n",
                        ctrl->nr_reconnects);
-       nvme_tcp_reconnect_or_remove(ctrl);
+       nvme_tcp_reconnect_or_remove(ctrl, ret);
 }
 
 static void nvme_tcp_error_recovery_work(struct work_struct *work)
@@ -2295,7 +2299,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
                return;
        }
 
-       nvme_tcp_reconnect_or_remove(ctrl);
+       nvme_tcp_reconnect_or_remove(ctrl, 0);
 }
 
 static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
@@ -2315,6 +2319,7 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
 {
        struct nvme_ctrl *ctrl =
                container_of(work, struct nvme_ctrl, reset_work);
+       int ret;
 
        nvme_stop_ctrl(ctrl);
        nvme_tcp_teardown_ctrl(ctrl, false);
@@ -2328,14 +2333,15 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
                return;
        }
 
-       if (nvme_tcp_setup_ctrl(ctrl, false))
+       ret = nvme_tcp_setup_ctrl(ctrl, false);
+       if (ret)
                goto out_fail;
 
        return;
 
 out_fail:
        ++ctrl->nr_reconnects;
-       nvme_tcp_reconnect_or_remove(ctrl);
+       nvme_tcp_reconnect_or_remove(ctrl, ret);
 }
 
 static void nvme_tcp_stop_ctrl(struct nvme_ctrl *ctrl)