git: b558ebb082ff - main - cam: Add a number of asserts to catch bad transactions

From: Warner Losh <imp_at_FreeBSD.org>
Date: Thu, 06 Nov 2025 18:56:20 UTC
The branch main has been updated by imp:

URL: https://cgit.FreeBSD.org/src/commit/?id=b558ebb082ffa13f3c37aa8f7d7974b64e93fa93

commit b558ebb082ffa13f3c37aa8f7d7974b64e93fa93
Author:     Warner Losh <imp@FreeBSD.org>
AuthorDate: 2025-11-06 18:52:43 +0000
Commit:     Warner Losh <imp@FreeBSD.org>
CommitDate: 2025-11-06 18:53:40 +0000

    cam: Add a number of asserts to catch bad transactions
    
    Ensure that we're in the right state / priority for each of the states
    in the driver. These asserts assured that a prior patch that I committed
    to fix a priority leak worked when a drive departed (and bounced back
    too!). These have been running in our production since I committed the
    change and haven't trigged.
    
    Sponsored by:           Netflix
    Differential Revision:  https://reviews.freebsd.org/D53259
---
 sys/cam/ata/ata_da.c   | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++
 sys/cam/scsi/scsi_da.c | 21 +++++++++++++++
 2 files changed, 93 insertions(+)

diff --git a/sys/cam/ata/ata_da.c b/sys/cam/ata/ata_da.c
index 08747cd59131..9434756b87f9 100644
--- a/sys/cam/ata/ata_da.c
+++ b/sys/cam/ata/ata_da.c
@@ -2328,15 +2328,38 @@ adastart(struct cam_periph *periph, union ccb *start_ccb)
 {
 	struct ada_softc *softc = (struct ada_softc *)periph->softc;
 	struct ccb_ataio *ataio = &start_ccb->ataio;
+	uint32_t priority = start_ccb->ccb_h.pinfo.priority;
 
 	CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("adastart\n"));
 
+	/*
+	 * When we're running the state machine, we should only accept DEV CCBs.
+	 * When we're doing normal I/O we should only accept NORMAL CCBs.
+	 *
+	 * While in the state machine, we carefully single step the queue, but
+	 * there's no protection for 'extra' calls to xpt_schedule() at the
+	 * wrong priority. Guard against that so that we filter any CCBs that
+	 * are offered at the wrong priority. This avoids generating requests
+	 * that are at normal priority.
+`        */
+	if ((softc->state != ADA_STATE_NORMAL && priority != CAM_PRIORITY_DEV) ||
+	    (softc->state == ADA_STATE_NORMAL && priority != CAM_PRIORITY_NORMAL)) {
+		xpt_print(periph->path, "Bad priority for state %d prio %d\n",
+		    softc->state, priority);
+		xpt_release_ccb(start_ccb);
+		return;
+	}
+
 	switch (softc->state) {
 	case ADA_STATE_NORMAL:
 	{
 		struct bio *bp;
 		uint8_t tag_code;
 
+		KASSERT(priority == CAM_PRIORITY_NORMAL,
+		    ("Expected priority %d, found %d in state normal",
+			CAM_PRIORITY_NORMAL, priority));
+
 		bp = cam_iosched_next_bio(softc->cam_iosched);
 		if (bp == NULL) {
 			xpt_release_ccb(start_ccb);
@@ -2555,6 +2578,11 @@ out:
 	case ADA_STATE_RAHEAD:
 	case ADA_STATE_WCACHE:
 	{
+		KASSERT(priority == CAM_PRIORITY_DEV,
+		    ("Expected priority %d, found %d in state %s",
+			CAM_PRIORITY_DEV, priority,
+			softc->state == ADA_STATE_RAHEAD ? "rahead" : "wcache"));
+
 		cam_fill_ataio(ataio,
 		    1,
 		    adadone,
@@ -2581,6 +2609,10 @@ out:
 	{
 		struct ata_gp_log_dir *log_dir;
 
+		KASSERT(priority == CAM_PRIORITY_DEV,
+		    ("Expected priority %d, found %d in state logdir",
+			CAM_PRIORITY_DEV, priority));
+
 		if ((softc->flags & ADA_FLAG_CAN_LOG) == 0) {
 			adaprobedone(periph, start_ccb);
 			break;
@@ -2615,6 +2647,10 @@ out:
 	{
 		struct ata_identify_log_pages *id_dir;
 
+		KASSERT(priority == CAM_PRIORITY_DEV,
+		    ("Expected priority %d, found %d in state iddir",
+			CAM_PRIORITY_DEV, priority));
+
 		id_dir = malloc(sizeof(*id_dir), M_ATADA, M_NOWAIT | M_ZERO);
 		if (id_dir == NULL) {
 			xpt_print(periph->path, "Couldn't malloc id_dir "
@@ -2643,6 +2679,10 @@ out:
 	{
 		struct ata_identify_log_sup_cap *sup_cap;
 
+		KASSERT(priority == CAM_PRIORITY_DEV,
+		    ("Expected priority %d, found %d in state sup_cap",
+			CAM_PRIORITY_DEV, priority));
+
 		sup_cap = malloc(sizeof(*sup_cap), M_ATADA, M_NOWAIT|M_ZERO);
 		if (sup_cap == NULL) {
 			xpt_print(periph->path, "Couldn't malloc sup_cap "
@@ -2671,6 +2711,10 @@ out:
 	{
 		struct ata_zoned_info_log *ata_zone;
 
+		KASSERT(priority == CAM_PRIORITY_DEV,
+		    ("Expected priority %d, found %d in state zone",
+			CAM_PRIORITY_DEV, priority));
+
 		ata_zone = malloc(sizeof(*ata_zone), M_ATADA, M_NOWAIT|M_ZERO);
 		if (ata_zone == NULL) {
 			xpt_print(periph->path, "Couldn't malloc ata_zone "
@@ -2896,6 +2940,10 @@ adadone(struct cam_periph *periph, union ccb *done_ccb)
 		struct bio *bp;
 		int error;
 
+		KASSERT(priority == CAM_PRIORITY_NORMAL,
+		    ("Expected priority %d, found %d for normal I/O",
+			CAM_PRIORITY_NORMAL, priority));
+
 		cam_periph_lock(periph);
 		bp = (struct bio *)done_ccb->ccb_h.ccb_bp;
 		if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
@@ -3000,6 +3048,10 @@ adadone(struct cam_periph *periph, union ccb *done_ccb)
 	}
 	case ADA_CCB_RAHEAD:
 	{
+		KASSERT(priority == CAM_PRIORITY_DEV,
+		    ("Expected priority %d, found %d in ccb state rahead",
+			CAM_PRIORITY_DEV, priority));
+
 		if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
 			if (adaerror(done_ccb, 0, 0) == ERESTART) {
 				/* Drop freeze taken due to CAM_DEV_QFREEZE */
@@ -3023,6 +3075,10 @@ adadone(struct cam_periph *periph, union ccb *done_ccb)
 	}
 	case ADA_CCB_WCACHE:
 	{
+		KASSERT(priority == CAM_PRIORITY_DEV,
+		    ("Expected priority %d, found %d in ccb state wcache",
+			CAM_PRIORITY_DEV, priority));
+
 		if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
 			if (adaerror(done_ccb, 0, 0) == ERESTART) {
 				/* Drop freeze taken due to CAM_DEV_QFREEZE */
@@ -3054,6 +3110,10 @@ adadone(struct cam_periph *periph, union ccb *done_ccb)
 	{
 		int error;
 
+		KASSERT(priority == CAM_PRIORITY_DEV,
+		    ("Expected priority %d, found %d in ccb state logdir",
+			CAM_PRIORITY_DEV, priority));
+
 		if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP) {
 			error = 0;
 			softc->valid_logdir_len = 0;
@@ -3123,6 +3183,10 @@ adadone(struct cam_periph *periph, union ccb *done_ccb)
 	case ADA_CCB_IDDIR: {
 		int error;
 
+		KASSERT(priority == CAM_PRIORITY_DEV,
+		    ("Expected priority %d, found %d in ccb state iddir",
+			CAM_PRIORITY_DEV, priority));
+
 		if ((ataio->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP) {
 			off_t entries_offset, max_entries;
 			error = 0;
@@ -3208,6 +3272,10 @@ adadone(struct cam_periph *periph, union ccb *done_ccb)
 	case ADA_CCB_SUP_CAP: {
 		int error;
 
+		KASSERT(priority == CAM_PRIORITY_DEV,
+		    ("Expected priority %d, found %d in ccb state sup_cap",
+			CAM_PRIORITY_DEV, priority));
+
 		if ((ataio->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP) {
 			uint32_t valid_len;
 			size_t needed_size;
@@ -3312,6 +3380,10 @@ adadone(struct cam_periph *periph, union ccb *done_ccb)
 	case ADA_CCB_ZONE: {
 		int error;
 
+		KASSERT(priority == CAM_PRIORITY_DEV,
+		    ("Expected priority %d, found %d in ccb state zone",
+			CAM_PRIORITY_DEV, priority));
+
 		if ((ataio->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP) {
 			struct ata_zoned_info_log *zi_log;
 			uint32_t valid_len;
diff --git a/sys/cam/scsi/scsi_da.c b/sys/cam/scsi/scsi_da.c
index c0c0be12856b..773a786d08f7 100644
--- a/sys/cam/scsi/scsi_da.c
+++ b/sys/cam/scsi/scsi_da.c
@@ -3369,12 +3369,33 @@ static void
 dastart(struct cam_periph *periph, union ccb *start_ccb)
 {
 	struct da_softc *softc;
+	uint32_t priority = start_ccb->ccb_h.pinfo.priority;
 
 	cam_periph_assert(periph, MA_OWNED);
 	softc = (struct da_softc *)periph->softc;
 
 	CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("dastart\n"));
 
+	/*
+	 * When we're running the state machine, we should only accept DEV CCBs.
+	 * When we're doing normal I/O we should only accept NORMAL CCBs.
+	 *
+	 * While in the state machine, we carefully single step the queue, but
+	 * there's no protection for 'extra' calls to xpt_schedule() at the
+	 * wrong priority. Guard against that so that we filter any CCBs that
+	 * are offered at the wrong priority. This avoids generating requests
+	 * that are at normal priority. In addition, though we can't easily
+	 * enforce it, one must not transition to the NORMAL state via the
+	 * skipstate mechanism.
+`        */
+	if ((softc->state != DA_STATE_NORMAL && priority != CAM_PRIORITY_DEV) ||
+	    (softc->state == DA_STATE_NORMAL && priority != CAM_PRIORITY_NORMAL)) {
+		xpt_print(periph->path, "Bad priority for state %d prio %d\n",
+		    softc->state, priority);
+		xpt_release_ccb(start_ccb);
+		return;
+	}
+
 skipstate:
 	switch (softc->state) {
 	case DA_STATE_NORMAL: