git: de8bb30885a4 - stable/13 - mpr: fix freeze / release mismatch in timeout code

From: Warner Losh <imp_at_FreeBSD.org>
Date: Mon, 06 Dec 2021 15:56:45 UTC
The branch stable/13 has been updated by imp:

URL: https://cgit.FreeBSD.org/src/commit/?id=de8bb30885a4e4f43ef92c8af34557636f9e8216

commit de8bb30885a4e4f43ef92c8af34557636f9e8216
Author:     Warner Losh <imp@FreeBSD.org>
AuthorDate: 2021-11-21 15:50:46 +0000
Commit:     Warner Losh <imp@FreeBSD.org>
CommitDate: 2021-12-06 15:56:00 +0000

    mpr: fix freeze / release mismatch in timeout code
    
    So, if we're processing a timeout, and we've sent an ABORT to the
    firmware for that timeout, but not yet received the response from the
    firmware, AND we get another timeout, we queue the timeout and freeze
    the queue. However, when we've finally processed them all, we only
    release the queue once. This causes all I/O to halt as the devq remains
    frozen forever.
    
    Instead, only freeze the queue when we start the process (eg set INRESET
    on the target). This will allow the release when all the timed out I/Os
    have finished ABORTing.
    
    Sponsored by:           Netflix
    Reviewed by:            mav
    Differential Revision:  https://reviews.freebsd.org/D33054
    
    (cherry picked from commit a8837c77efd0bb9d934657edf87a9a66baac7479)
---
 sys/dev/mpr/mpr_sas.c | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/sys/dev/mpr/mpr_sas.c b/sys/dev/mpr/mpr_sas.c
index 09fa6ac4bc92..3a0bf5aca702 100644
--- a/sys/dev/mpr/mpr_sas.c
+++ b/sys/dev/mpr/mpr_sas.c
@@ -248,7 +248,8 @@ mprsas_free_tm(struct mpr_softc *sc, struct mpr_command *tm)
 	 * INRESET flag as well or scsi I/O will not work.
 	 */
 	if (tm->cm_ccb) {
-		mpr_dprint(sc, MPR_XINFO, "Unfreezing devq for target ID %d\n",
+		mpr_dprint(sc, MPR_XINFO | MPR_RECOVERY,
+		    "Unfreezing devq for target ID %d\n",
 		    tm->cm_targ->tid);
 		tm->cm_targ->flags &= ~MPRSAS_TARGET_INRESET;
 		xpt_release_devq(tm->cm_ccb->ccb_h.path, 1, TRUE);
@@ -1924,6 +1925,9 @@ mprsas_action_scsiio(struct mprsas_softc *sassc, union ccb *ccb)
 	 */
 	if (targ->flags & MPRSAS_TARGET_INRESET) {
 		ccb->ccb_h.status = CAM_REQUEUE_REQ | CAM_DEV_QFRZN;
+		mpr_dprint(sc, MPR_XINFO | MPR_RECOVERY,
+		    "%s: Freezing devq for target ID %d\n",
+		    __func__, targ->tid);
 		xpt_freeze_devq(ccb->ccb_h.path, 1);
 		xpt_done(ccb);
 		return;
@@ -2513,8 +2517,8 @@ mprsas_scsiio_complete(struct mpr_softc *sc, struct mpr_command *cm)
 		if ((sassc->flags & MPRSAS_QUEUE_FROZEN) == 0) {
 			xpt_freeze_simq(sassc->sim, 1);
 			sassc->flags |= MPRSAS_QUEUE_FROZEN;
-			mpr_dprint(sc, MPR_XINFO, "Error sending command, "
-			    "freezing SIM queue\n");
+			mpr_dprint(sc, MPR_XINFO | MPR_RECOVERY,
+			    "Error sending command, freezing SIM queue\n");
 		}
 	}
 
@@ -2549,7 +2553,7 @@ mprsas_scsiio_complete(struct mpr_softc *sc, struct mpr_command *cm)
 			if (sassc->flags & MPRSAS_QUEUE_FROZEN) {
 				ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
 				sassc->flags &= ~MPRSAS_QUEUE_FROZEN;
-				mpr_dprint(sc, MPR_XINFO,
+				mpr_dprint(sc, MPR_XINFO | MPR_RECOVERY,
 				    "Unfreezing SIM queue\n");
 			}
 		} 
@@ -2817,7 +2821,7 @@ mprsas_scsiio_complete(struct mpr_softc *sc, struct mpr_command *cm)
 	if (sassc->flags & MPRSAS_QUEUE_FROZEN) {
 		ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
 		sassc->flags &= ~MPRSAS_QUEUE_FROZEN;
-		mpr_dprint(sc, MPR_XINFO, "Command completed, unfreezing SIM "
+		mpr_dprint(sc, MPR_INFO, "Command completed, unfreezing SIM "
 		    "queue\n");
 	}
 
@@ -3424,6 +3428,11 @@ mprsas_async(void *callback_arg, uint32_t code, struct cam_path *path,
  * the target until the reset has completed.  The CCB holds the path which
  * is used to release the devq.  The devq is released and the CCB is freed
  * when the TM completes.
+ * We only need to do this when we're entering reset, not at each time we
+ * need to send an abort (which will happen if multiple commands timeout
+ * while we're sending the abort). We do not release the queue for each
+ * command we complete (just at the end when we free the tm), so freezing
+ * it each time doesn't make sense.
  */
 void
 mprsas_prepare_for_tm(struct mpr_softc *sc, struct mpr_command *tm,
@@ -3439,13 +3448,15 @@ mprsas_prepare_for_tm(struct mpr_softc *sc, struct mpr_command *tm,
 		    target->tid, lun_id) != CAM_REQ_CMP) {
 			xpt_free_ccb(ccb);
 		} else {
-			mpr_dprint(sc, MPR_XINFO,
-			    "%s: Freezing devq for target ID %d\n",
-			    __func__, target->tid);
-			xpt_freeze_devq(ccb->ccb_h.path, 1);
 			tm->cm_ccb = ccb;
 			tm->cm_targ = target;
-			target->flags |= MPRSAS_TARGET_INRESET;
+			if ((target->flags & MPRSAS_TARGET_INRESET) == 0) {
+				mpr_dprint(sc, MPR_XINFO | MPR_RECOVERY,
+				    "%s: Freezing devq for target ID %d\n",
+				    __func__, target->tid);
+				xpt_freeze_devq(ccb->ccb_h.path, 1);
+				target->flags |= MPRSAS_TARGET_INRESET;
+			}
 		}
 	}
 }