scsi: lpfc: Exit PRLI completion handling early if ndlp not in PRLI_ISSUE state
authorJustin Tee <justin.tee@broadcom.com>
Mon, 9 Jan 2023 23:33:12 +0000 (15:33 -0800)
committerMartin K. Petersen <martin.petersen@oracle.com>
Thu, 12 Jan 2023 05:03:14 +0000 (00:03 -0500)
In a large SAN testing configuration, frequent target port toggle tests are
occasionally resulting in missing lun path rediscoveries.  An outstanding
PRLI can be inflight when a target RSCN dissappearance occurs, causing the
driver to retry PRLIs using invalid rpi contexts.

Fix by verifying that an ndlp's state was not restarted from PRLI_ISSUE
due to an intermediate RSCN.  If not in a valid state, early exit PRLI
completion handling.

The last follow up RSCN indicating target reappearance retriggers
PLOGI/PRLI with a valid rpi context and is expected to succeed in LUN path
rediscovery.

Signed-off-by: Justin Tee <justin.tee@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/lpfc/lpfc_els.c

index 919741bbe267b3799b898021341ccdbfa17d3438..4d3b8f2036d2f78a1aa2697e8b456c1ba7ce298f 100644 (file)
@@ -2373,15 +2373,30 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                /* PRLI failed */
                lpfc_printf_vlog(vport, mode, loglevel,
                                 "2754 PRLI failure DID:%06X Status:x%x/x%x, "
-                                "data: x%x\n",
+                                "data: x%x x%x\n",
                                 ndlp->nlp_DID, ulp_status,
-                                ulp_word4, ndlp->fc4_prli_sent);
+                                ulp_word4, ndlp->nlp_state,
+                                ndlp->fc4_prli_sent);
 
                /* Do not call DSM for lpfc_els_abort'ed ELS cmds */
                if (!lpfc_error_lost_link(ulp_status, ulp_word4))
                        lpfc_disc_state_machine(vport, ndlp, cmdiocb,
                                                NLP_EVT_CMPL_PRLI);
 
+               /* The following condition catches an inflight transition
+                * mismatch typically caused by an RSCN. Skip any
+                * processing to allow recovery.
+                */
+               if (ndlp->nlp_state >= NLP_STE_PLOGI_ISSUE &&
+                   ndlp->nlp_state <= NLP_STE_REG_LOGIN_ISSUE) {
+                       lpfc_printf_vlog(vport, KERN_WARNING, LOG_NODE,
+                                        "2784 PRLI cmpl: state mismatch "
+                                        "DID x%06x nstate x%x nflag x%x\n",
+                                        ndlp->nlp_DID, ndlp->nlp_state,
+                                        ndlp->nlp_flag);
+                               goto out;
+               }
+
                /*
                 * For P2P topology, retain the node so that PLOGI can be
                 * attempted on it again.
@@ -4673,6 +4688,15 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                                /* the nameserver fails */
                                maxretry = 0;
                                delay = 100;
+                       } else if (cmd == ELS_CMD_PRLI &&
+                                  ndlp->nlp_state != NLP_STE_PRLI_ISSUE) {
+                               /* State-command disagreement.  The PRLI was
+                                * failed with an invalid rpi meaning there
+                                * some unexpected state change.  Don't retry.
+                                */
+                               maxretry = 0;
+                               retry = 0;
+                               break;
                        }
                        retry = 1;
                        break;