svn commit: r345056 - head/sys/dev/mrsas

Kashyap D Desai kadesai at FreeBSD.org
Tue Mar 12 09:24:59 UTC 2019


Author: kadesai
Date: Tue Mar 12 09:24:58 2019
New Revision: 345056
URL: https://svnweb.freebsd.org/changeset/base/345056

Log:
  fw_outstanding"(outstanding IOs at firmware level) counter gets screwed up when R1 fastpath
  writes are running. Some of the cases which are not handled properly in driver are:
  
  1. With R1 fastpath supported, single write from CAM layer can consume 2 MPT frames
  at driver/firmware level for fastpath qualification(if fw_outstanding < controller Queue Depth).
  Due to this driver has to throttle IOs coming from CAM layer as well as second fastpath
  write(of R1 write) against Adapter Queue Depth.
  If "fw_outstanding" reaches to adapter queue depth, driver should return IOs from CAM layer with
  device busy status.While allocating second MPT frame(corresponding to R1 FP write) also, driver
  should ensure fw_outstanding should not exceed adapter QD.
  
  2. For R1 fastpath writes completion, driver decrements "fw_oustanding" counter without
  really returning MPT frame to free pool. It may cause IOs(with heavy IOs running, consuming whole
  adapter Queue Depth) consuming MPT frames reserved for DCMDs(management commands) and
  DCMDs(internal and sent by application) not getting MPT frame will start failing.
  
  Below is one test case to hit the issue described above-
  1. Run heavy IOs (outstanding IOs should hit adapter Queue Depth).
  2. Run management tool (Broadcom's storcli tool) querying adapter in loop (run command- "storcli64 /c0 show" in loop).
  3. Management tool's requests would start failing due to non-availability of free MPT frames as all frames would be consumed by IOs.
  
  Fix: Increment/decrement of "fw_outstanding" counter should be in sync with MPT frame get/return.
  
  Submitted by: Sumit Saxena <sumit.saxena at broadcom.com>
  Reviewed by:  Kashyap Desai <Kashyap.Desai at broadcom.com>
  Approved by:  Ken
  MFC after:  3 days
  Sponsored by:   Broadcom Inc

Modified:
  head/sys/dev/mrsas/mrsas.c
  head/sys/dev/mrsas/mrsas_cam.c

Modified: head/sys/dev/mrsas/mrsas.c
==============================================================================
--- head/sys/dev/mrsas/mrsas.c	Tue Mar 12 08:31:43 2019	(r345055)
+++ head/sys/dev/mrsas/mrsas.c	Tue Mar 12 09:24:58 2019	(r345056)
@@ -1712,6 +1712,7 @@ mrsas_complete_cmd(struct mrsas_softc *sc, u_int32_t M
 				mrsas_map_mpt_cmd_status(cmd_mpt, cmd_mpt->ccb_ptr, status,
 				    extStatus, data_length, sense);
 				mrsas_cmd_done(sc, cmd_mpt);
+				mrsas_atomic_dec(&sc->fw_outstanding);
 			} else {
 				/*
 				 * If the peer  Raid  1/10 fast path failed,
@@ -1735,12 +1736,13 @@ mrsas_complete_cmd(struct mrsas_softc *sc, u_int32_t M
 						r1_cmd->callout_owner  = false;
 					}
 					mrsas_release_mpt_cmd(r1_cmd);
+					mrsas_atomic_dec(&sc->fw_outstanding);
 					mrsas_map_mpt_cmd_status(cmd_mpt, cmd_mpt->ccb_ptr, status,
 					    extStatus, data_length, sense);
 					mrsas_cmd_done(sc, cmd_mpt);
+					mrsas_atomic_dec(&sc->fw_outstanding);
 				}
 			}
-			mrsas_atomic_dec(&sc->fw_outstanding);
 			break;
 		case MRSAS_MPI2_FUNCTION_PASSTHRU_IO_REQUEST:	/* MFI command */
 			cmd_mfi = sc->mfi_cmd_list[cmd_mpt->sync_cmd_idx];
@@ -2526,6 +2528,9 @@ mrsas_init_fw(struct mrsas_softc *sc)
 		else
 			sc->fast_path_io = 0;
 	}
+		
+	device_printf(sc->mrsas_dev, "max_fw_cmds: %u  max_scsi_cmds: %u\n",
+		sc->max_fw_cmds, sc->max_scsi_cmds);
 	return (0);
 }
 

Modified: head/sys/dev/mrsas/mrsas_cam.c
==============================================================================
--- head/sys/dev/mrsas/mrsas_cam.c	Tue Mar 12 08:31:43 2019	(r345055)
+++ head/sys/dev/mrsas/mrsas_cam.c	Tue Mar 12 09:24:58 2019	(r345056)
@@ -467,11 +467,20 @@ mrsas_startio(struct mrsas_softc *sc, struct cam_sim *
 		return (0);
 	}
 	ccb_h->status |= CAM_SIM_QUEUED;
+
+	if (mrsas_atomic_inc_return(&sc->fw_outstanding) > sc->max_scsi_cmds) {
+		ccb_h->status |= CAM_REQUEUE_REQ;
+		xpt_done(ccb);
+		mrsas_atomic_dec(&sc->fw_outstanding); 
+		return (0);
+	}
+
 	cmd = mrsas_get_mpt_cmd(sc);
 
 	if (!cmd) {
 		ccb_h->status |= CAM_REQUEUE_REQ;
 		xpt_done(ccb);
+		mrsas_atomic_dec(&sc->fw_outstanding); 
 		return (0);
 	}
 
@@ -638,7 +647,7 @@ mrsas_startio(struct mrsas_softc *sc, struct cam_sim *
 	    mrsas_scsiio_timeout, cmd);
 #endif
 
-	if (mrsas_atomic_inc_return(&sc->fw_outstanding) > sc->io_cmds_highwater)
+	if (mrsas_atomic_read(&sc->fw_outstanding) > sc->io_cmds_highwater)
 		sc->io_cmds_highwater++;
 
 	/*
@@ -653,7 +662,6 @@ mrsas_startio(struct mrsas_softc *sc, struct cam_sim *
 	 * new command
 	 */
 	if (cmd->r1_alt_dev_handle != MR_DEVHANDLE_INVALID) {
-		mrsas_atomic_inc(&sc->fw_outstanding);
 		mrsas_prepare_secondRaid1_IO(sc, cmd);
 		mrsas_fire_cmd(sc, req_desc->addr.u.low,
 			req_desc->addr.u.high);
@@ -669,6 +677,7 @@ mrsas_startio(struct mrsas_softc *sc, struct cam_sim *
 
 done:
 	xpt_done(ccb);
+	mrsas_atomic_dec(&sc->fw_outstanding); 
 	return (0);
 }
 
@@ -1092,14 +1101,20 @@ mrsas_setup_io(struct mrsas_softc *sc, struct mrsas_mp
 				(io_info.r1_alt_dev_handle != MR_DEVHANDLE_INVALID) &&
 				(raid->level == 1) && !io_info.isRead) {
 			r1_cmd = mrsas_get_mpt_cmd(sc);
-			if (!r1_cmd) {
+			if (mrsas_atomic_inc_return(&sc->fw_outstanding) > sc->max_scsi_cmds) {
 				fp_possible = FALSE;
-				printf("Avago debug fp disable from %s %d \n",
-					__func__, __LINE__);
+				mrsas_atomic_dec(&sc->fw_outstanding); 
 			} else {
-				cmd->peer_cmd = r1_cmd;
-				r1_cmd->peer_cmd = cmd;
-			}
+				r1_cmd = mrsas_get_mpt_cmd(sc);
+				if (!r1_cmd) {
+					fp_possible = FALSE;
+					mrsas_atomic_dec(&sc->fw_outstanding); 
+				}
+				else {
+					cmd->peer_cmd = r1_cmd;
+					r1_cmd->peer_cmd = cmd;
+				}
+ 			}
 		}
 	}
 


More information about the svn-src-all mailing list