scsi: ibmvfc: Fix invalid state machine BUG_ON()
authorBrian King <brking@linux.vnet.ibm.com>
Tue, 13 Apr 2021 00:10:09 +0000 (18:10 -0600)
committerMartin K. Petersen <martin.petersen@oracle.com>
Tue, 13 Apr 2021 05:39:14 +0000 (01:39 -0400)
This fixes an issue hitting the BUG_ON() in ibmvfc_do_work(). When going
through a host action of IBMVFC_HOST_ACTION_RESET, we change the action to
IBMVFC_HOST_ACTION_TGT_DEL, then drop the host lock, and reset the CRQ,
which changes the host state to IBMVFC_NO_CRQ. If, prior to setting the
host state to IBMVFC_NO_CRQ, ibmvfc_init_host() is called, it can then end
up changing the host action to IBMVFC_HOST_ACTION_INIT.  If we then change
the host state to IBMVFC_NO_CRQ, we will then hit the BUG_ON().

Make a couple of changes to avoid this. Leave the host action to be
IBMVFC_HOST_ACTION_RESET or IBMVFC_HOST_ACTION_REENABLE until after we drop
the host lock and reset or reenable the CRQ. Also harden the host state
machine to ensure we cannot leave the reset / reenable state until we've
finished processing the reset or reenable.

Link: https://lore.kernel.org/r/20210413001009.902400-1-tyreld@linux.ibm.com
Fixes: 73ee5d867287 ("[SCSI] ibmvfc: Fix soft lockup on resume")
Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
[tyreld: added fixes tag]
Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
[mkp: fix comment checkpatch warnings]
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/ibmvscsi/ibmvfc.c

index 067c7c45a654e76a861897bcb18cf9f074745b6d..8f3b783ae08aafe9e976542a08a6f6307871d6d0 100644 (file)
@@ -604,8 +604,17 @@ static void ibmvfc_set_host_action(struct ibmvfc_host *vhost,
                if (vhost->action == IBMVFC_HOST_ACTION_ALLOC_TGTS)
                        vhost->action = action;
                break;
+       case IBMVFC_HOST_ACTION_REENABLE:
+       case IBMVFC_HOST_ACTION_RESET:
+               vhost->action = action;
+               break;
        case IBMVFC_HOST_ACTION_INIT:
        case IBMVFC_HOST_ACTION_TGT_DEL:
+       case IBMVFC_HOST_ACTION_LOGO:
+       case IBMVFC_HOST_ACTION_QUERY_TGTS:
+       case IBMVFC_HOST_ACTION_TGT_DEL_FAILED:
+       case IBMVFC_HOST_ACTION_NONE:
+       default:
                switch (vhost->action) {
                case IBMVFC_HOST_ACTION_RESET:
                case IBMVFC_HOST_ACTION_REENABLE:
@@ -615,15 +624,6 @@ static void ibmvfc_set_host_action(struct ibmvfc_host *vhost,
                        break;
                }
                break;
-       case IBMVFC_HOST_ACTION_LOGO:
-       case IBMVFC_HOST_ACTION_QUERY_TGTS:
-       case IBMVFC_HOST_ACTION_TGT_DEL_FAILED:
-       case IBMVFC_HOST_ACTION_NONE:
-       case IBMVFC_HOST_ACTION_RESET:
-       case IBMVFC_HOST_ACTION_REENABLE:
-       default:
-               vhost->action = action;
-               break;
        }
 }
 
@@ -5380,30 +5380,49 @@ static void ibmvfc_do_work(struct ibmvfc_host *vhost)
        case IBMVFC_HOST_ACTION_INIT_WAIT:
                break;
        case IBMVFC_HOST_ACTION_RESET:
-               vhost->action = IBMVFC_HOST_ACTION_TGT_DEL;
                list_splice_init(&vhost->purge, &purge);
                spin_unlock_irqrestore(vhost->host->host_lock, flags);
                ibmvfc_complete_purge(&purge);
                rc = ibmvfc_reset_crq(vhost);
+
                spin_lock_irqsave(vhost->host->host_lock, flags);
-               if (rc == H_CLOSED)
+               if (!rc || rc == H_CLOSED)
                        vio_enable_interrupts(to_vio_dev(vhost->dev));
-               if (rc || (rc = ibmvfc_send_crq_init(vhost)) ||
-                   (rc = vio_enable_interrupts(to_vio_dev(vhost->dev)))) {
-                       ibmvfc_link_down(vhost, IBMVFC_LINK_DEAD);
-                       dev_err(vhost->dev, "Error after reset (rc=%d)\n", rc);
+               if (vhost->action == IBMVFC_HOST_ACTION_RESET) {
+                       /*
+                        * The only action we could have changed to would have
+                        * been reenable, in which case, we skip the rest of
+                        * this path and wait until we've done the re-enable
+                        * before sending the crq init.
+                        */
+                       vhost->action = IBMVFC_HOST_ACTION_TGT_DEL;
+
+                       if (rc || (rc = ibmvfc_send_crq_init(vhost)) ||
+                           (rc = vio_enable_interrupts(to_vio_dev(vhost->dev)))) {
+                               ibmvfc_link_down(vhost, IBMVFC_LINK_DEAD);
+                               dev_err(vhost->dev, "Error after reset (rc=%d)\n", rc);
+                       }
                }
                break;
        case IBMVFC_HOST_ACTION_REENABLE:
-               vhost->action = IBMVFC_HOST_ACTION_TGT_DEL;
                list_splice_init(&vhost->purge, &purge);
                spin_unlock_irqrestore(vhost->host->host_lock, flags);
                ibmvfc_complete_purge(&purge);
                rc = ibmvfc_reenable_crq_queue(vhost);
+
                spin_lock_irqsave(vhost->host->host_lock, flags);
-               if (rc || (rc = ibmvfc_send_crq_init(vhost))) {
-                       ibmvfc_link_down(vhost, IBMVFC_LINK_DEAD);
-                       dev_err(vhost->dev, "Error after enable (rc=%d)\n", rc);
+               if (vhost->action == IBMVFC_HOST_ACTION_REENABLE) {
+                       /*
+                        * The only action we could have changed to would have
+                        * been reset, in which case, we skip the rest of this
+                        * path and wait until we've done the reset before
+                        * sending the crq init.
+                        */
+                       vhost->action = IBMVFC_HOST_ACTION_TGT_DEL;
+                       if (rc || (rc = ibmvfc_send_crq_init(vhost))) {
+                               ibmvfc_link_down(vhost, IBMVFC_LINK_DEAD);
+                               dev_err(vhost->dev, "Error after enable (rc=%d)\n", rc);
+                       }
                }
                break;
        case IBMVFC_HOST_ACTION_LOGO: