Possible improvement for ATA resume problems

Ian Dowse iedowse at maths.tcd.ie
Wed Jan 7 01:57:27 PST 2004


I've had some success with the following patch to avoid ATA-related
hangs at resume time and also resume-time errors such as:

  ad0: FAILURE - READ_MUL status=59<READY,DSC,DRQ,ERROR> error=4<ABORTED>
  ad0: FAILURE - WRITE_MUL status=51<READY,DSC,ERROR> error=4<ABORTED>

The patch does two things:

 o Re-issue the ATA_SET_MULTI command to ATA disks when reinitialising
   the channel. This appears to be a simple bug, and explains the
   ABORTED errors above.  It doesn't explain why things used to
   recover for me after one or two of these errors though. Also
   reissue the ATA_SETFEATURES commands relating to caching. Non-disk
   devices might need to do something similar too.

 o Ensure that no normal requests can get queued while a channel
   is being reinitialised. This stops regular I/O operations from
   happening before a channel has been fully set up after a resume.
   It works by putting any such requests on a "deferred" list, and
   moving them back when the reinit is complete. Note that the root
   cause here is probably the fact that the ATA resume method blocks
   while waiting for some operations to complete, giving normal
   processes a chance to run before the resume has fully completed.

The deferred queue approach is fairly gross - I guess a much better
way would be a mechanism to allow a single request to be executed
synchronously while the ATA channel remains locked (optionally using
DELAY() calls and polling when sleeping is not permitted).

I've no idea if this will help much in general, but it has certainly
made suspend/resume work more reliably for me.

Ian


Index: ata-all.c
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/dev/ata/ata-all.c,v
retrieving revision 1.198
diff -u -r1.198 ata-all.c
--- ata-all.c	30 Nov 2003 16:27:58 -0000	1.198
+++ ata-all.c	7 Jan 2004 08:55:20 -0000
@@ -122,6 +122,7 @@
     bzero(&ch->queue_mtx, sizeof(struct mtx));
     mtx_init(&ch->queue_mtx, "ATA queue lock", MTX_DEF, 0);
     TAILQ_INIT(&ch->ata_queue);
+    TAILQ_INIT(&ch->ata_deferred_queue);
 
     /* initialise device(s) on this channel */
     ch->locking(ch, ATA_LF_LOCK);
@@ -231,7 +232,7 @@
 ata_reinit(struct ata_channel *ch)
 {
     struct ata_request *request = ch->running;
-    int devices, misdev, newdev;
+    int devices, keptdev, misdev, newdev;
     
     if (!ch->r_irq)
 	return ENXIO;
@@ -239,6 +240,8 @@
     /* reset the HW */
     ata_printf(ch, -1, "resetting devices ..\n");
     ATA_FORCELOCK_CH(ch, ATA_CONTROL);
+    ch->flags |= ATA_REINIT_ACTIVE;
+    KASSERT(TAILQ_EMPTY(&ch->ata_queue), ("ata_reinit: queue not empty!"));
     ch->running = NULL;
     devices = ch->devices;
     ch->hw.reset(ch);
@@ -275,6 +278,16 @@
     /* identify whats present on this channel now */
     ata_identify_devices(ch);
 
+    /* reset/reconfigure devices that are still there */
+    if ((keptdev = devices & ch->devices)) {
+	if ((keptdev & (ATA_ATA_MASTER | ATA_ATAPI_MASTER)) &&
+	    ch->device[MASTER].reset)
+	    ch->device[MASTER].reset(&ch->device[MASTER]);
+	if ((keptdev & (ATA_ATA_SLAVE | ATA_ATAPI_SLAVE)) &&
+	    ch->device[SLAVE].reset)
+	    ch->device[SLAVE].reset(&ch->device[SLAVE]);
+    }
+
     /* attach new devices that appeared during reset */
     if ((newdev = ~devices & ch->devices)) {
 	if ((newdev & (ATA_ATA_MASTER | ATA_ATAPI_MASTER)) &&
@@ -295,6 +308,15 @@
     atapi_cam_reinit_bus(ch);
 #endif
 
+    /* Now it's safe to execute any normal requests that were deferred. */
+    mtx_lock(&ch->queue_mtx);
+    while (!TAILQ_EMPTY(&ch->ata_deferred_queue)) {
+	request = TAILQ_FIRST(&ch->ata_deferred_queue);
+	TAILQ_REMOVE(&ch->ata_deferred_queue, request, chain);
+	TAILQ_INSERT_TAIL(&ch->ata_queue, request, chain);
+    }
+    mtx_unlock(&ch->queue_mtx);
+    ch->flags &= ~ATA_REINIT_ACTIVE;
     printf("done\n");
     return 0;
 }
@@ -556,7 +578,7 @@
 	if (request) {
 	    request->device = atadev;
 	    request->u.ata.command = command;
-	    request->flags = (ATA_R_READ | ATA_R_QUIET);
+	    request->flags = (ATA_R_READ | ATA_R_QUIET | ATA_R_DONTDEFER);
 	    request->data = (caddr_t)atacap;
 	    request->timeout = 2;
 	    request->retries = 3;
Index: ata-all.h
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/dev/ata/ata-all.h,v
retrieving revision 1.68
diff -u -r1.68 ata-all.h
--- ata-all.h	28 Nov 2003 19:01:28 -0000	1.68
+++ ata-all.h	7 Jan 2004 02:46:06 -0000
@@ -195,6 +195,7 @@
 #define		ATA_R_AT_HEAD		0x0200
 #define		ATA_R_REQUEUE		0x0400
 #define		ATA_R_SKIPSTART		0x0800
+#define		ATA_R_DONTDEFER		0x1000
 
     void			(*callback)(struct ata_request *request);
     int				retries;	/* retry count */
@@ -218,6 +219,7 @@
     void			*softc;		/* ptr to softc for device */
     void			(*attach)(struct ata_device *atadev);
     void			(*detach)(struct ata_device *atadev);
+    void			(*reset)(struct ata_device *atadev);
     void			(*start)(struct ata_device *atadev);
     int				flags;
 #define		ATA_D_USE_CHS		0x0001
@@ -274,6 +276,8 @@
     int				offset;
 };
 
+TAILQ_HEAD(ata_reqhead, ata_request);
+
 /* structure describing an ATA channel */
 struct ata_channel {
     struct device		*dev;		/* device handle */
@@ -289,6 +293,7 @@
 #define		ATA_USE_PC98GEOM	0x04
 #define		ATA_ATAPI_DMA_RO	0x08
 #define		ATA_48BIT_ACTIVE	0x10
+#define		ATA_REINIT_ACTIVE	0x20
 
     struct ata_device		device[2];	/* devices on this channel */
 #define		MASTER			0x00
@@ -310,7 +315,8 @@
 #define		ATA_LF_UNLOCK		0x0002
 
     struct mtx			queue_mtx;
-    TAILQ_HEAD(, ata_request)	ata_queue;	/* head of ATA queue */
+    struct ata_reqhead		ata_queue;	/* head of ATA queue */
+    struct ata_reqhead		ata_deferred_queue;
     void			*running;	/* currently running request */
 };
 
Index: ata-disk.c
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/dev/ata/ata-disk.c,v
retrieving revision 1.164
diff -u -r1.164 ata-disk.c
--- ata-disk.c	11 Nov 2003 14:55:35 -0000	1.164
+++ ata-disk.c	7 Jan 2004 02:05:59 -0000
@@ -55,6 +55,7 @@
 
 /* prototypes */
 static void ad_detach(struct ata_device *);
+static void ad_reset(struct ata_device *);
 static void ad_start(struct ata_device *);
 static void ad_done(struct ata_request *);
 static disk_open_t adopen;
@@ -119,28 +120,13 @@
 	lbasize48 > 268435455)
 	adp->total_secs = lbasize48;
 
-    /* enable read caching */
-    ata_controlcmd(atadev, ATA_SETFEATURES, ATA_SF_ENAB_RCACHE, 0, 0);
-
-    /* enable write caching if enabled */
-    if (ata_wc)
-	ata_controlcmd(atadev, ATA_SETFEATURES, ATA_SF_ENAB_WCACHE, 0, 0);
-    else
-	ata_controlcmd(atadev, ATA_SETFEATURES, ATA_SF_DIS_WCACHE, 0, 0);
-
-    /* use multiple sectors/interrupt if device supports it */
-    adp->max_iosize = DEV_BSIZE;
-    if (ad_version(atadev->param->version_major)) {
-	int secsperint = max(1, min(atadev->param->sectors_intr, 16));
-
-	if (!ata_controlcmd(atadev, ATA_SET_MULTI, 0, 0, secsperint))
-	    adp->max_iosize = secsperint * DEV_BSIZE;
-    }
+    atadev->softc = adp;
+    ad_reset(atadev);
 
     /* setup the function ptrs */
     atadev->detach = ad_detach;
+    atadev->reset = ad_reset;
     atadev->start = ad_start;
-    atadev->softc = adp;
 
     /* lets create the disk device */
     adp->disk.d_open = adopen;
@@ -167,6 +153,30 @@
 }
 
 static void
+ad_reset(struct ata_device *atadev)
+{
+    struct ad_softc *adp = atadev->softc;
+
+    /* enable read caching */
+    ata_controlcmd(atadev, ATA_SETFEATURES, ATA_SF_ENAB_RCACHE, 0, 0);
+
+    /* enable write caching if enabled */
+    if (ata_wc)
+	ata_controlcmd(atadev, ATA_SETFEATURES, ATA_SF_ENAB_WCACHE, 0, 0);
+    else
+	ata_controlcmd(atadev, ATA_SETFEATURES, ATA_SF_DIS_WCACHE, 0, 0);
+
+    /* use multiple sectors/interrupt if device supports it */
+    adp->max_iosize = DEV_BSIZE;
+    if (ad_version(atadev->param->version_major)) {
+	int secsperint = max(1, min(atadev->param->sectors_intr, 16));
+
+	if (!ata_controlcmd(atadev, ATA_SET_MULTI, 0, 0, secsperint))
+	    adp->max_iosize = secsperint * DEV_BSIZE;
+    }
+}
+
+static void
 ad_detach(struct ata_device *atadev)
 {
     struct ad_softc *adp = atadev->softc;
@@ -185,6 +195,7 @@
     ata_free_lun(&adp_lun_map, adp->lun);
     atadev->attach = NULL;
     atadev->detach = NULL;
+    atadev->reset = NULL;
     atadev->start = NULL;
     atadev->softc = NULL;
     atadev->flags = 0;
Index: ata-queue.c
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/dev/ata/ata-queue.c,v
retrieving revision 1.13
diff -u -r1.13 ata-queue.c
--- ata-queue.c	16 Dec 2003 19:41:38 -0000	1.13
+++ ata-queue.c	7 Jan 2004 08:55:21 -0000
@@ -76,16 +76,25 @@
 void
 ata_queue_request(struct ata_request *request)
 {
+    struct ata_reqhead *qhead;
+
     /* mark request as virgin (it might be a reused one) */
     request->result = request->status = request->error = 0;
     request->flags &= ~ATA_R_DONE;
 
+    qhead = &request->device->channel->ata_queue;
+    /* Don't let normal requests interfere with reinitialisation. */
+    if ((request->device->channel->flags & ATA_REINIT_ACTIVE) &&
+      (request->flags & ATA_R_DONTDEFER) == 0) {
+	qhead = &request->device->channel->ata_deferred_queue;
+    }
+
     /* put request on the locked queue at the specified location */
     mtx_lock(&request->device->channel->queue_mtx);
     if (request->flags & ATA_R_AT_HEAD)
-	TAILQ_INSERT_HEAD(&request->device->channel->ata_queue, request, chain);
+	TAILQ_INSERT_HEAD(qhead, request, chain);
     else
-	TAILQ_INSERT_TAIL(&request->device->channel->ata_queue, request, chain);
+	TAILQ_INSERT_TAIL(qhead, request, chain);
     mtx_unlock(&request->device->channel->queue_mtx);
 
     /* should we skip start ? */
@@ -116,7 +125,7 @@
 	request->u.ata.lba = lba;
 	request->u.ata.count = count;
 	request->u.ata.feature = feature;
-	request->flags = ATA_R_CONTROL;
+	request->flags = ATA_R_CONTROL | ATA_R_DONTDEFER;
 	request->timeout = 5;
 	ata_queue_request(request);
 	error = request->result;
Index: atapi-cd.c
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/dev/ata/atapi-cd.c,v
retrieving revision 1.157
diff -u -r1.157 atapi-cd.c
--- atapi-cd.c	7 Dec 2003 23:15:22 -0000	1.157
+++ atapi-cd.c	7 Jan 2004 08:55:21 -0000
@@ -208,6 +208,7 @@
     ata_free_lun(&acd_lun_map, cdp->lun);
     atadev->attach = NULL;
     atadev->detach = NULL;
+    atadev->reset = NULL;
     atadev->start = NULL;
     atadev->softc = NULL;
     atadev->flags = 0;
Index: atapi-fd.c
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/dev/ata/atapi-fd.c,v
retrieving revision 1.89
diff -u -r1.89 atapi-fd.c
--- atapi-fd.c	11 Nov 2003 14:55:35 -0000	1.89
+++ atapi-fd.c	7 Jan 2004 01:46:37 -0000
@@ -126,6 +126,7 @@
     ata_free_lun(&afd_lun_map, fdp->lun);
     atadev->attach = NULL;
     atadev->detach = NULL;
+    atadev->reset = NULL;
     atadev->start = NULL;
     atadev->softc = NULL;
     atadev->flags = 0;
Index: atapi-tape.c
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/dev/ata/atapi-tape.c,v
retrieving revision 1.84
diff -u -r1.84 atapi-tape.c
--- atapi-tape.c	11 Nov 2003 14:55:36 -0000	1.84
+++ atapi-tape.c	7 Jan 2004 01:46:55 -0000
@@ -174,6 +174,7 @@
     ata_free_lun(&ast_lun_map, stp->lun);
     atadev->attach = NULL;
     atadev->detach = NULL;
+    atadev->reset = NULL;
     atadev->start = NULL;
     atadev->softc = NULL;
     atadev->flags = 0;


More information about the freebsd-current mailing list