svn commit: r302354 - head/sys/dev/ioat

Conrad E. Meyer cem at FreeBSD.org
Tue Jul 5 20:53:34 UTC 2016


Author: cem
Date: Tue Jul  5 20:53:32 2016
New Revision: 302354
URL: https://svnweb.freebsd.org/changeset/base/302354

Log:
  ioat(4): Block asynchronous work during HW reset
  
  Fix the race between ioat_reset_hw and ioat_process_events.
  
  HW reset isn't protected by a lock because it can sleep for a long time
  (40.1 ms).  This resulted in a race where we would process bogus parts
  of the descriptor ring as if it had completed.  This looked like
  duplicate completions on old events, if your ring had looped at least
  once.
  
  Block callout and interrupt work while reset runs so the completion end
  of things does not observe indeterminate state and process invalid parts
  of the ring.
  
  Start the channel with a manually implemented ioat_null() to keep other
  submitters quiesced while we wait for the channel to start (100 us).
  
  r295605 may have made the race between ioat_reset_hw and
  ioat_process_events wider, but I believe it already existed before that
  revision.  ioat_process_events can be invoked by two asynchronous
  sources: callout (softclock) and device interrupt.  Those could race
  each other, to the same effect.
  
  Reviewed by:	markj
  Approved by:	re
  Sponsored by:	EMC / Isilon Storage Division
  Differential Revision:	https://reviews.freebsd.org/D7097

Modified:
  head/sys/dev/ioat/ioat.c
  head/sys/dev/ioat/ioat_internal.h

Modified: head/sys/dev/ioat/ioat.c
==============================================================================
--- head/sys/dev/ioat/ioat.c	Tue Jul  5 20:52:35 2016	(r302353)
+++ head/sys/dev/ioat/ioat.c	Tue Jul  5 20:53:32 2016	(r302354)
@@ -376,12 +376,32 @@ ioat_teardown_intr(struct ioat_softc *io
 static int
 ioat_start_channel(struct ioat_softc *ioat)
 {
+	struct ioat_dma_hw_descriptor *hw_desc;
+	struct ioat_descriptor *desc;
+	struct bus_dmadesc *dmadesc;
 	uint64_t status;
 	uint32_t chanerr;
 	int i;
 
 	ioat_acquire(&ioat->dmaengine);
-	ioat_null(&ioat->dmaengine, NULL, NULL, 0);
+
+	/* Submit 'NULL' operation manually to avoid quiescing flag */
+	desc = ioat_get_ring_entry(ioat, ioat->head);
+	dmadesc = &desc->bus_dmadesc;
+	hw_desc = desc->u.dma;
+
+	dmadesc->callback_fn = NULL;
+	dmadesc->callback_arg = NULL;
+
+	hw_desc->u.control_raw = 0;
+	hw_desc->u.control_generic.op = IOAT_OP_COPY;
+	hw_desc->u.control_generic.completion_update = 1;
+	hw_desc->size = 8;
+	hw_desc->src_addr = 0;
+	hw_desc->dest_addr = 0;
+	hw_desc->u.control.null = 1;
+
+	ioat_submit_single(ioat);
 	ioat_release(&ioat->dmaengine);
 
 	for (i = 0; i < 100; i++) {
@@ -496,6 +516,7 @@ ioat3_attach(device_t device)
 	ioat->head = ioat->hw_head = 0;
 	ioat->tail = 0;
 	ioat->last_seen = 0;
+	*ioat->comp_update = 0;
 	return (0);
 }
 
@@ -641,14 +662,24 @@ ioat_process_events(struct ioat_softc *i
 	boolean_t pending;
 	int error;
 
+	CTR0(KTR_IOAT, __func__);
+
 	mtx_lock(&ioat->cleanup_lock);
 
+	/*
+	 * Don't run while the hardware is being reset.  Reset is responsible
+	 * for blocking new work and draining & completing existing work, so
+	 * there is nothing to do until new work is queued after reset anyway.
+	 */
+	if (ioat->resetting_cleanup) {
+		mtx_unlock(&ioat->cleanup_lock);
+		return;
+	}
+
 	completed = 0;
 	comp_update = *ioat->comp_update;
 	status = comp_update & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_MASK;
 
-	CTR0(KTR_IOAT, __func__);
-
 	if (status == ioat->last_seen) {
 		/*
 		 * If we landed in process_events and nothing has been
@@ -1643,6 +1674,13 @@ ioat_shrink_timer_callback(void *arg)
 
 	/* Slowly scale the ring down if idle. */
 	mtx_lock(&ioat->submit_lock);
+
+	/* Don't run while the hardware is being reset. */
+	if (ioat->resetting) {
+		mtx_unlock(&ioat->submit_lock);
+		return;
+	}
+
 	order = ioat->ring_size_order;
 	if (ioat->is_resize_pending || order == IOAT_MIN_ORDER) {
 		mtx_unlock(&ioat->submit_lock);
@@ -1712,6 +1750,14 @@ ioat_reset_hw(struct ioat_softc *ioat)
 	ioat_drain_locked(ioat);
 	mtx_unlock(IOAT_REFLK);
 
+	/*
+	 * Suspend ioat_process_events while the hardware and softc are in an
+	 * indeterminate state.
+	 */
+	mtx_lock(&ioat->cleanup_lock);
+	ioat->resetting_cleanup = TRUE;
+	mtx_unlock(&ioat->cleanup_lock);
+
 	status = ioat_get_chansts(ioat);
 	if (is_ioat_active(status) || is_ioat_idle(status))
 		ioat_suspend(ioat);
@@ -1793,6 +1839,7 @@ ioat_reset_hw(struct ioat_softc *ioat)
 	 */
 	ioat->tail = ioat->head = ioat->hw_head = 0;
 	ioat->last_seen = 0;
+	*ioat->comp_update = 0;
 
 	ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN);
 	ioat_write_chancmp(ioat, ioat->comp_update_bus_addr);
@@ -1800,16 +1847,27 @@ ioat_reset_hw(struct ioat_softc *ioat)
 	error = 0;
 
 out:
-	mtx_lock(IOAT_REFLK);
-	ioat->resetting = FALSE;
-	wakeup(&ioat->resetting);
+	/*
+	 * Resume completions now that ring state is consistent.
+	 * ioat_start_channel will add a pending completion and if we are still
+	 * blocking completions, we may livelock.
+	 */
+	mtx_lock(&ioat->cleanup_lock);
+	ioat->resetting_cleanup = FALSE;
+	mtx_unlock(&ioat->cleanup_lock);
+
+	/* Enqueues a null operation and ensures it completes. */
+	if (error == 0)
+		error = ioat_start_channel(ioat);
 
+	/* Unblock submission of new work */
+	mtx_lock(IOAT_REFLK);
 	ioat->quiescing = FALSE;
 	wakeup(&ioat->quiescing);
-	mtx_unlock(IOAT_REFLK);
 
-	if (error == 0)
-		error = ioat_start_channel(ioat);
+	ioat->resetting = FALSE;
+	wakeup(&ioat->resetting);
+	mtx_unlock(IOAT_REFLK);
 
 	return (error);
 }

Modified: head/sys/dev/ioat/ioat_internal.h
==============================================================================
--- head/sys/dev/ioat/ioat_internal.h	Tue Jul  5 20:52:35 2016	(r302353)
+++ head/sys/dev/ioat/ioat_internal.h	Tue Jul  5 20:53:32 2016	(r302354)
@@ -491,7 +491,8 @@ struct ioat_softc {
 	boolean_t		is_reset_pending;
 	boolean_t		is_channel_running;
 	boolean_t		intrdelay_supported;
-	boolean_t		resetting;
+	boolean_t		resetting;		/* submit_lock */
+	boolean_t		resetting_cleanup;	/* cleanup_lock */
 
 	uint32_t		head;
 	uint32_t		tail;


More information about the svn-src-head mailing list