svn commit: r344650 - stable/11/sys/dev/ioat

Alexander Motin mav at FreeBSD.org
Thu Feb 28 00:29:16 UTC 2019


Author: mav
Date: Thu Feb 28 00:29:15 2019
New Revision: 344650
URL: https://svnweb.freebsd.org/changeset/base/344650

Log:
  MFC r344441: Fix few issues in ioat(4) driver.
  
   - Do not explicitly count active descriptors.  It allows hardware reset
  to happen while device is still referenced, plus simplifies locking.
   - Do not stop/start callout each time the queue becomes empty.  Let it
  run to completion and rearm if needed, that is much cheaper then to touch
  it every time, plus also simplifies locking.
   - Decouple submit and cleanup locks, making driver reentrant.
   - Avoid memory mapped status register read on every interrupt.
   - Improve locking during device attach/detach.
   - Remove some no longer used variables.
  
  Sponsored by:	iXsystems, Inc.

Modified:
  stable/11/sys/dev/ioat/ioat.c
  stable/11/sys/dev/ioat/ioat_internal.h
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/sys/dev/ioat/ioat.c
==============================================================================
--- stable/11/sys/dev/ioat/ioat.c	Thu Feb 28 00:28:44 2019	(r344649)
+++ stable/11/sys/dev/ioat/ioat.c	Thu Feb 28 00:29:15 2019	(r344650)
@@ -1,6 +1,7 @@
 /*-
  * Copyright (C) 2012 Intel Corporation
  * All rights reserved.
+ * Copyright (C) 2018 Alexander Motin <mav at FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -62,7 +63,6 @@ __FBSDID("$FreeBSD$");
 #ifndef	BUS_SPACE_MAXADDR_40BIT
 #define	BUS_SPACE_MAXADDR_40BIT	0xFFFFFFFFFFULL
 #endif
-#define	IOAT_REFLK	(&ioat->submit_lock)
 
 static int ioat_probe(device_t device);
 static int ioat_attach(device_t device);
@@ -77,7 +77,7 @@ static void ioat_dmamap_cb(void *arg, bus_dma_segment_
 static void ioat_interrupt_handler(void *arg);
 static boolean_t ioat_model_resets_msix(struct ioat_softc *ioat);
 static int chanerr_to_errno(uint32_t);
-static void ioat_process_events(struct ioat_softc *ioat);
+static void ioat_process_events(struct ioat_softc *ioat, boolean_t intr);
 static inline uint32_t ioat_get_active(struct ioat_softc *ioat);
 static inline uint32_t ioat_get_ring_space(struct ioat_softc *ioat);
 static void ioat_free_ring(struct ioat_softc *, uint32_t size,
@@ -97,15 +97,8 @@ static int ioat_reset_hw(struct ioat_softc *ioat);
 static void ioat_reset_hw_task(void *, int);
 static void ioat_setup_sysctl(device_t device);
 static int sysctl_handle_reset(SYSCTL_HANDLER_ARGS);
-static inline struct ioat_softc *ioat_get(struct ioat_softc *,
-    enum ioat_ref_kind);
-static inline void ioat_put(struct ioat_softc *, enum ioat_ref_kind);
-static inline void _ioat_putn(struct ioat_softc *, uint32_t,
-    enum ioat_ref_kind, boolean_t);
-static inline void ioat_putn(struct ioat_softc *, uint32_t,
-    enum ioat_ref_kind);
-static inline void ioat_putn_locked(struct ioat_softc *, uint32_t,
-    enum ioat_ref_kind);
+static void ioat_get(struct ioat_softc *);
+static void ioat_put(struct ioat_softc *);
 static void ioat_drain_locked(struct ioat_softc *);
 
 #define	ioat_log_message(v, ...) do {					\
@@ -157,6 +150,8 @@ static struct ioat_softc *ioat_channel[IOAT_MAX_CHANNE
 static unsigned ioat_channel_index = 0;
 SYSCTL_UINT(_hw_ioat, OID_AUTO, channels, CTLFLAG_RD, &ioat_channel_index, 0,
     "Number of IOAT channels attached");
+static struct mtx ioat_list_mtx;
+MTX_SYSINIT(ioat_list_mtx, &ioat_list_mtx, "ioat list mtx", MTX_DEF);
 
 static struct _pcsid
 {
@@ -263,7 +258,7 @@ static int
 ioat_attach(device_t device)
 {
 	struct ioat_softc *ioat;
-	int error;
+	int error, i;
 
 	ioat = DEVICE2SOFTC(device);
 	ioat->device = device;
@@ -294,11 +289,26 @@ ioat_attach(device_t device)
 	if (error != 0)
 		goto err;
 
-	ioat_process_events(ioat);
+	ioat_process_events(ioat, FALSE);
 	ioat_setup_sysctl(device);
 
-	ioat->chan_idx = ioat_channel_index;
-	ioat_channel[ioat_channel_index++] = ioat;
+	mtx_lock(&ioat_list_mtx);
+	for (i = 0; i < IOAT_MAX_CHANNELS; i++) {
+		if (ioat_channel[i] == NULL)
+			break;
+	}
+	if (i >= IOAT_MAX_CHANNELS) {
+		mtx_unlock(&ioat_list_mtx);
+		device_printf(device, "Too many I/OAT devices in system\n");
+		error = ENXIO;
+		goto err;
+	}
+	ioat->chan_idx = i;
+	ioat_channel[i] = ioat;
+	if (i >= ioat_channel_index)
+		ioat_channel_index = i + 1;
+	mtx_unlock(&ioat_list_mtx);
+
 	ioat_test_attach();
 
 err:
@@ -314,19 +324,28 @@ ioat_detach(device_t device)
 
 	ioat = DEVICE2SOFTC(device);
 
+	mtx_lock(&ioat_list_mtx);
+	ioat_channel[ioat->chan_idx] = NULL;
+	while (ioat_channel_index > 0 &&
+	    ioat_channel[ioat_channel_index - 1] == NULL)
+		ioat_channel_index--;
+	mtx_unlock(&ioat_list_mtx);
+
 	ioat_test_detach();
 	taskqueue_drain(taskqueue_thread, &ioat->reset_task);
 
-	mtx_lock(IOAT_REFLK);
+	mtx_lock(&ioat->submit_lock);
 	ioat->quiescing = TRUE;
 	ioat->destroying = TRUE;
 	wakeup(&ioat->quiescing);
 	wakeup(&ioat->resetting);
 
-	ioat_channel[ioat->chan_idx] = NULL;
-
 	ioat_drain_locked(ioat);
-	mtx_unlock(IOAT_REFLK);
+	mtx_unlock(&ioat->submit_lock);
+	mtx_lock(&ioat->cleanup_lock);
+	while (ioat_get_active(ioat) > 0)
+		msleep(&ioat->tail, &ioat->cleanup_lock, 0, "ioat_drain", 1);
+	mtx_unlock(&ioat->cleanup_lock);
 
 	ioat_teardown_intr(ioat);
 	callout_drain(&ioat->poll_timer);
@@ -455,15 +474,12 @@ ioat3_attach(device_t device)
 	TASK_INIT(&ioat->reset_task, 0, ioat_reset_hw_task, ioat);
 
 	/* Establish lock order for Witness */
-	mtx_lock(&ioat->submit_lock);
 	mtx_lock(&ioat->cleanup_lock);
-	mtx_unlock(&ioat->cleanup_lock);
+	mtx_lock(&ioat->submit_lock);
 	mtx_unlock(&ioat->submit_lock);
+	mtx_unlock(&ioat->cleanup_lock);
 
 	ioat->is_submitter_processing = FALSE;
-	ioat->is_completion_pending = FALSE;
-	ioat->is_reset_pending = FALSE;
-	ioat->is_channel_running = FALSE;
 
 	bus_dma_tag_create(bus_get_dma_tag(ioat->device), sizeof(uint64_t), 0x0,
 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
@@ -518,7 +534,7 @@ ioat3_attach(device_t device)
 		dma_hw_desc->next = RING_PHYS_ADDR(ioat, i + 1);
 	}
 
-	ioat->head = ioat->hw_head = 0;
+	ioat->head = 0;
 	ioat->tail = 0;
 	ioat->last_seen = 0;
 	*ioat->comp_update = 0;
@@ -638,7 +654,7 @@ ioat_interrupt_handler(void *arg)
 	struct ioat_softc *ioat = arg;
 
 	ioat->stats.interrupts++;
-	ioat_process_events(ioat);
+	ioat_process_events(ioat, TRUE);
 }
 
 static int
@@ -658,13 +674,12 @@ chanerr_to_errno(uint32_t chanerr)
 }
 
 static void
-ioat_process_events(struct ioat_softc *ioat)
+ioat_process_events(struct ioat_softc *ioat, boolean_t intr)
 {
 	struct ioat_descriptor *desc;
 	struct bus_dmadesc *dmadesc;
 	uint64_t comp_update, status;
 	uint32_t completed, chanerr;
-	boolean_t pending;
 	int error;
 
 	mtx_lock(&ioat->cleanup_lock);
@@ -718,42 +733,21 @@ ioat_process_events(struct ioat_softc *ioat)
 	if (completed != 0) {
 		ioat->last_seen = RING_PHYS_ADDR(ioat, ioat->tail - 1);
 		ioat->stats.descriptors_processed += completed;
+		wakeup(&ioat->tail);
 	}
 
 out:
 	ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN);
-
-	/* Perform a racy check first; only take the locks if it passes. */
-	pending = (ioat_get_active(ioat) != 0);
-	if (!pending && ioat->is_completion_pending) {
-		mtx_unlock(&ioat->cleanup_lock);
-		mtx_lock(&ioat->submit_lock);
-		mtx_lock(&ioat->cleanup_lock);
-
-		pending = (ioat_get_active(ioat) != 0);
-		if (!pending && ioat->is_completion_pending) {
-			ioat->is_completion_pending = FALSE;
-			callout_stop(&ioat->poll_timer);
-		}
-		mtx_unlock(&ioat->submit_lock);
-	}
 	mtx_unlock(&ioat->cleanup_lock);
 
-	if (pending)
-		callout_reset(&ioat->poll_timer, 1, ioat_poll_timer_callback,
-		    ioat);
-
-	if (completed != 0) {
-		ioat_putn(ioat, completed, IOAT_ACTIVE_DESCR_REF);
-		wakeup(&ioat->tail);
-	}
-
 	/*
 	 * The device doesn't seem to reliably push suspend/halt statuses to
 	 * the channel completion memory address, so poll the device register
-	 * here.
+	 * here.  For performance reasons skip it on interrupts, do it only
+	 * on much more rare polling events.
 	 */
-	comp_update = ioat_get_chansts(ioat) & IOAT_CHANSTS_STATUS;
+	if (!intr)
+		comp_update = ioat_get_chansts(ioat) & IOAT_CHANSTS_STATUS;
 	if (!is_ioat_halted(comp_update) && !is_ioat_suspended(comp_update))
 		return;
 
@@ -764,16 +758,17 @@ out:
 	 * work with error status and restart the engine.
 	 */
 	mtx_lock(&ioat->submit_lock);
-	mtx_lock(&ioat->cleanup_lock);
 	ioat->quiescing = TRUE;
+	mtx_unlock(&ioat->submit_lock);
+
 	/*
-	 * This is safe to do here because we have both locks and the submit
-	 * queue is quiesced.  We know that we will drain all outstanding
-	 * events, so ioat_reset_hw can't deadlock.  It is necessary to
-	 * protect other ioat_process_event threads from racing ioat_reset_hw,
-	 * reading an indeterminate hw state, and attempting to continue
-	 * issuing completions.
+	 * This is safe to do here because the submit queue is quiesced.  We
+	 * know that we will drain all outstanding events, so ioat_reset_hw
+	 * can't deadlock. It is necessary to protect other ioat_process_event
+	 * threads from racing ioat_reset_hw, reading an indeterminate hw
+	 * state, and attempting to continue issuing completions.
 	 */
+	mtx_lock(&ioat->cleanup_lock);
 	ioat->resetting_cleanup = TRUE;
 
 	chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET);
@@ -792,7 +787,6 @@ out:
 			dmadesc->callback_fn(dmadesc->callback_arg,
 			    chanerr_to_errno(chanerr));
 
-		ioat_putn_locked(ioat, 1, IOAT_ACTIVE_DESCR_REF);
 		ioat->tail++;
 		ioat->stats.descriptors_processed++;
 		ioat->stats.descriptors_error++;
@@ -800,16 +794,10 @@ out:
 	CTR5(KTR_IOAT, "%s channel=%u head=%u tail=%u active=%u", __func__,
 	    ioat->chan_idx, ioat->head, ioat->tail, ioat_get_active(ioat));
 
-	if (ioat->is_completion_pending) {
-		ioat->is_completion_pending = FALSE;
-		callout_stop(&ioat->poll_timer);
-	}
-
 	/* Clear error status */
 	ioat_write_4(ioat, IOAT_CHANERR_OFFSET, chanerr);
 
 	mtx_unlock(&ioat->cleanup_lock);
-	mtx_unlock(&ioat->submit_lock);
 
 	ioat_log_message(0, "Resetting channel to recover from error\n");
 	error = taskqueue_enqueue(taskqueue_thread, &ioat->reset_task);
@@ -851,31 +839,39 @@ ioat_get_dmaengine(uint32_t index, int flags)
 	KASSERT((flags & (M_NOWAIT | M_WAITOK)) != (M_NOWAIT | M_WAITOK),
 	    ("invalid wait | nowait"));
 
-	if (index >= ioat_channel_index)
+	mtx_lock(&ioat_list_mtx);
+	if (index >= ioat_channel_index ||
+	    (ioat = ioat_channel[index]) == NULL) {
+		mtx_unlock(&ioat_list_mtx);
 		return (NULL);
+	}
+	mtx_lock(&ioat->submit_lock);
+	mtx_unlock(&ioat_list_mtx);
 
-	ioat = ioat_channel[index];
-	if (ioat == NULL || ioat->destroying)
+	if (ioat->destroying) {
+		mtx_unlock(&ioat->submit_lock);
 		return (NULL);
+	}
 
+	ioat_get(ioat);
 	if (ioat->quiescing) {
-		if ((flags & M_NOWAIT) != 0)
+		if ((flags & M_NOWAIT) != 0) {
+			ioat_put(ioat);
+			mtx_unlock(&ioat->submit_lock);
 			return (NULL);
+		}
 
-		mtx_lock(IOAT_REFLK);
 		while (ioat->quiescing && !ioat->destroying)
-			msleep(&ioat->quiescing, IOAT_REFLK, 0, "getdma", 0);
-		mtx_unlock(IOAT_REFLK);
+			msleep(&ioat->quiescing, &ioat->submit_lock, 0, "getdma", 0);
 
-		if (ioat->destroying)
+		if (ioat->destroying) {
+			ioat_put(ioat);
+			mtx_unlock(&ioat->submit_lock);
 			return (NULL);
+		}
 	}
-
-	/*
-	 * There's a race here between the quiescing check and HW reset or
-	 * module destroy.
-	 */
-	return (&ioat_get(ioat, IOAT_DMAENGINE_REF)->dmaengine);
+	mtx_unlock(&ioat->submit_lock);
+	return (&ioat->dmaengine);
 }
 
 void
@@ -884,7 +880,9 @@ ioat_put_dmaengine(bus_dmaengine_t dmaengine)
 	struct ioat_softc *ioat;
 
 	ioat = to_ioat_softc(dmaengine);
-	ioat_put(ioat, IOAT_DMAENGINE_REF);
+	mtx_lock(&ioat->submit_lock);
+	ioat_put(ioat);
+	mtx_unlock(&ioat->submit_lock);
 }
 
 int
@@ -972,18 +970,17 @@ ioat_release(bus_dmaengine_t dmaengine)
 	struct ioat_softc *ioat;
 
 	ioat = to_ioat_softc(dmaengine);
-	CTR4(KTR_IOAT, "%s channel=%u dispatch1 hw_head=%u head=%u", __func__,
-	    ioat->chan_idx, ioat->hw_head & UINT16_MAX, ioat->head);
+	CTR3(KTR_IOAT, "%s channel=%u dispatch1 head=%u", __func__,
+	    ioat->chan_idx, ioat->head);
 	KFAIL_POINT_CODE(DEBUG_FP, ioat_release, /* do nothing */);
-	CTR4(KTR_IOAT, "%s channel=%u dispatch2 hw_head=%u head=%u", __func__,
-	    ioat->chan_idx, ioat->hw_head & UINT16_MAX, ioat->head);
+	CTR3(KTR_IOAT, "%s channel=%u dispatch2 head=%u", __func__,
+	    ioat->chan_idx, ioat->head);
 
 	if (ioat->acq_head != ioat->head) {
 		ioat_write_2(ioat, IOAT_DMACOUNT_OFFSET,
-		    (uint16_t)ioat->hw_head);
+		    (uint16_t)ioat->head);
 
-		if (!ioat->is_completion_pending) {
-			ioat->is_completion_pending = TRUE;
+		if (!callout_pending(&ioat->poll_timer)) {
 			callout_reset(&ioat->poll_timer, 1,
 			    ioat_poll_timer_callback, ioat);
 		}
@@ -1400,7 +1397,7 @@ ioat_reserve_space(struct ioat_softc *ioat, uint32_t n
 
 			CTR2(KTR_IOAT, "%s channel=%u attempting to process events",
 			    __func__, ioat->chan_idx);
-			ioat_process_events(ioat);
+			ioat_process_events(ioat, FALSE);
 
 			mtx_lock(&ioat->submit_lock);
 			dug = TRUE;
@@ -1478,7 +1475,12 @@ ioat_poll_timer_callback(void *arg)
 	ioat = arg;
 	ioat_log_message(3, "%s\n", __func__);
 
-	ioat_process_events(ioat);
+	ioat_process_events(ioat, FALSE);
+
+	mtx_lock(&ioat->submit_lock);
+	if (ioat_get_active(ioat) > 0)
+		callout_schedule(&ioat->poll_timer, 1);
+	mtx_unlock(&ioat->submit_lock);
 }
 
 /*
@@ -1490,12 +1492,9 @@ ioat_submit_single(struct ioat_softc *ioat)
 
 	mtx_assert(&ioat->submit_lock, MA_OWNED);
 
-	ioat_get(ioat, IOAT_ACTIVE_DESCR_REF);
-	atomic_add_rel_int(&ioat->head, 1);
-	atomic_add_rel_int(&ioat->hw_head, 1);
-	CTR5(KTR_IOAT, "%s channel=%u head=%u hw_head=%u tail=%u", __func__,
-	    ioat->chan_idx, ioat->head, ioat->hw_head & UINT16_MAX,
-	    ioat->tail);
+	ioat->head++;
+	CTR4(KTR_IOAT, "%s channel=%u head=%u tail=%u", __func__,
+	    ioat->chan_idx, ioat->head, ioat->tail);
 
 	ioat->stats.descriptors_submitted++;
 }
@@ -1510,24 +1509,24 @@ ioat_reset_hw(struct ioat_softc *ioat)
 
 	CTR2(KTR_IOAT, "%s channel=%u", __func__, ioat->chan_idx);
 
-	mtx_lock(IOAT_REFLK);
+	mtx_lock(&ioat->submit_lock);
 	while (ioat->resetting && !ioat->destroying)
-		msleep(&ioat->resetting, IOAT_REFLK, 0, "IRH_drain", 0);
+		msleep(&ioat->resetting, &ioat->submit_lock, 0, "IRH_drain", 0);
 	if (ioat->destroying) {
-		mtx_unlock(IOAT_REFLK);
+		mtx_unlock(&ioat->submit_lock);
 		return (ENXIO);
 	}
 	ioat->resetting = TRUE;
-
 	ioat->quiescing = TRUE;
-	ioat_drain_locked(ioat);
-	mtx_unlock(IOAT_REFLK);
+	mtx_unlock(&ioat->submit_lock);
+	mtx_lock(&ioat->cleanup_lock);
+	while (ioat_get_active(ioat) > 0)
+		msleep(&ioat->tail, &ioat->cleanup_lock, 0, "ioat_drain", 1);
 
 	/*
 	 * Suspend ioat_process_events while the hardware and softc are in an
 	 * indeterminate state.
 	 */
-	mtx_lock(&ioat->cleanup_lock);
 	ioat->resetting_cleanup = TRUE;
 	mtx_unlock(&ioat->cleanup_lock);
 
@@ -1618,10 +1617,9 @@ ioat_reset_hw(struct ioat_softc *ioat)
 	 * The internal ring counter resets to zero, so we have to start over
 	 * at zero as well.
 	 */
-	ioat->tail = ioat->head = ioat->hw_head = 0;
+	ioat->tail = ioat->head = 0;
 	ioat->last_seen = 0;
 	*ioat->comp_update = 0;
-	KASSERT(!ioat->is_completion_pending, ("bogus completion_pending"));
 
 	ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN);
 	ioat_write_chancmp(ioat, ioat->comp_update_bus_addr);
@@ -1646,18 +1644,15 @@ out:
 	mtx_unlock(&ioat->cleanup_lock);
 
 	/* Unblock submission of new work */
-	mtx_lock(IOAT_REFLK);
+	mtx_lock(&ioat->submit_lock);
 	ioat->quiescing = FALSE;
 	wakeup(&ioat->quiescing);
 
 	ioat->resetting = FALSE;
 	wakeup(&ioat->resetting);
 
-	if (ioat->is_completion_pending)
-		callout_reset(&ioat->poll_timer, 1, ioat_poll_timer_callback,
-		    ioat);
 	CTR2(KTR_IOAT, "%s channel=%u reset done", __func__, ioat->chan_idx);
-	mtx_unlock(IOAT_REFLK);
+	mtx_unlock(&ioat->submit_lock);
 
 	return (error);
 }
@@ -1801,8 +1796,6 @@ ioat_setup_sysctl(device_t device)
 	    0, "SW descriptor head pointer index");
 	SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "tail", CTLFLAG_RD, &ioat->tail,
 	    0, "SW descriptor tail pointer index");
-	SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "hw_head", CTLFLAG_RD,
-	    &ioat->hw_head, 0, "HW DMACOUNT");
 
 	SYSCTL_ADD_UQUAD(ctx, state, OID_AUTO, "last_completion", CTLFLAG_RD,
 	    ioat->comp_update, "HW addr of last completion");
@@ -1810,12 +1803,6 @@ ioat_setup_sysctl(device_t device)
 	SYSCTL_ADD_INT(ctx, state, OID_AUTO, "is_submitter_processing",
 	    CTLFLAG_RD, &ioat->is_submitter_processing, 0,
 	    "submitter processing");
-	SYSCTL_ADD_INT(ctx, state, OID_AUTO, "is_completion_pending",
-	    CTLFLAG_RD, &ioat->is_completion_pending, 0, "completion pending");
-	SYSCTL_ADD_INT(ctx, state, OID_AUTO, "is_reset_pending", CTLFLAG_RD,
-	    &ioat->is_reset_pending, 0, "reset pending");
-	SYSCTL_ADD_INT(ctx, state, OID_AUTO, "is_channel_running", CTLFLAG_RD,
-	    &ioat->is_channel_running, 0, "channel running");
 
 	SYSCTL_ADD_PROC(ctx, state, OID_AUTO, "chansts",
 	    CTLTYPE_STRING | CTLFLAG_RD, ioat, 0, sysctl_handle_chansts, "A",
@@ -1861,91 +1848,35 @@ ioat_setup_sysctl(device_t device)
 	    "Descriptors per interrupt");
 }
 
-static inline struct ioat_softc *
-ioat_get(struct ioat_softc *ioat, enum ioat_ref_kind kind)
+static void
+ioat_get(struct ioat_softc *ioat)
 {
-	uint32_t old;
 
-	KASSERT(kind < IOAT_NUM_REF_KINDS, ("bogus"));
+	mtx_assert(&ioat->submit_lock, MA_OWNED);
+	KASSERT(ioat->refcnt < UINT32_MAX, ("refcnt overflow"));
 
-	old = atomic_fetchadd_32(&ioat->refcnt, 1);
-	KASSERT(old < UINT32_MAX, ("refcnt overflow"));
-
-#ifdef INVARIANTS
-	old = atomic_fetchadd_32(&ioat->refkinds[kind], 1);
-	KASSERT(old < UINT32_MAX, ("refcnt kind overflow"));
-#endif
-
-	return (ioat);
+	ioat->refcnt++;
 }
 
-static inline void
-ioat_putn(struct ioat_softc *ioat, uint32_t n, enum ioat_ref_kind kind)
+static void
+ioat_put(struct ioat_softc *ioat)
 {
 
-	_ioat_putn(ioat, n, kind, FALSE);
-}
+	mtx_assert(&ioat->submit_lock, MA_OWNED);
+	KASSERT(ioat->refcnt >= 1, ("refcnt error"));
 
-static inline void
-ioat_putn_locked(struct ioat_softc *ioat, uint32_t n, enum ioat_ref_kind kind)
-{
-
-	_ioat_putn(ioat, n, kind, TRUE);
+	if (--ioat->refcnt == 0)
+		wakeup(&ioat->refcnt);
 }
 
-static inline void
-_ioat_putn(struct ioat_softc *ioat, uint32_t n, enum ioat_ref_kind kind,
-    boolean_t locked)
-{
-	uint32_t old;
-
-	KASSERT(kind < IOAT_NUM_REF_KINDS, ("bogus"));
-
-	if (n == 0)
-		return;
-
-#ifdef INVARIANTS
-	old = atomic_fetchadd_32(&ioat->refkinds[kind], -n);
-	KASSERT(old >= n, ("refcnt kind underflow"));
-#endif
-
-	/* Skip acquiring the lock if resulting refcnt > 0. */
-	for (;;) {
-		old = ioat->refcnt;
-		if (old <= n)
-			break;
-		if (atomic_cmpset_32(&ioat->refcnt, old, old - n))
-			return;
-	}
-
-	if (locked)
-		mtx_assert(IOAT_REFLK, MA_OWNED);
-	else
-		mtx_lock(IOAT_REFLK);
-
-	old = atomic_fetchadd_32(&ioat->refcnt, -n);
-	KASSERT(old >= n, ("refcnt error"));
-
-	if (old == n)
-		wakeup(IOAT_REFLK);
-	if (!locked)
-		mtx_unlock(IOAT_REFLK);
-}
-
-static inline void
-ioat_put(struct ioat_softc *ioat, enum ioat_ref_kind kind)
-{
-
-	ioat_putn(ioat, 1, kind);
-}
-
 static void
 ioat_drain_locked(struct ioat_softc *ioat)
 {
 
-	mtx_assert(IOAT_REFLK, MA_OWNED);
+	mtx_assert(&ioat->submit_lock, MA_OWNED);
+
 	while (ioat->refcnt > 0)
-		msleep(IOAT_REFLK, IOAT_REFLK, 0, "ioat_drain", 0);
+		msleep(&ioat->refcnt, &ioat->submit_lock, 0, "ioat_drain", 0);
 }
 
 #ifdef DDB
@@ -1988,15 +1919,11 @@ DB_SHOW_COMMAND(ioat, db_show_ioat)
 	db_printf(" destroying: %d\n", (int)sc->destroying);
 	db_printf(" is_submitter_processing: %d\n",
 	    (int)sc->is_submitter_processing);
-	db_printf(" is_completion_pending: %d\n", (int)sc->is_completion_pending);
-	db_printf(" is_reset_pending: %d\n", (int)sc->is_reset_pending);
-	db_printf(" is_channel_running: %d\n", (int)sc->is_channel_running);
 	db_printf(" intrdelay_supported: %d\n", (int)sc->intrdelay_supported);
 	db_printf(" resetting: %d\n", (int)sc->resetting);
 
 	db_printf(" head: %u\n", sc->head);
 	db_printf(" tail: %u\n", sc->tail);
-	db_printf(" hw_head: %u\n", sc->hw_head);
 	db_printf(" ring_size_order: %u\n", sc->ring_size_order);
 	db_printf(" last_seen: 0x%lx\n", sc->last_seen);
 	db_printf(" ring: %p\n", sc->ring);
@@ -2037,11 +1964,6 @@ DB_SHOW_COMMAND(ioat, db_show_ioat)
 	db_show_lock(&sc->cleanup_lock);
 
 	db_printf(" refcnt: %u\n", sc->refcnt);
-#ifdef INVARIANTS
-	CTASSERT(IOAT_NUM_REF_KINDS == 2);
-	db_printf(" refkinds: [ENG=%u, DESCR=%u]\n", sc->refkinds[0],
-	    sc->refkinds[1]);
-#endif
 	db_printf(" stats:\n");
 	db_printf("  interrupts: %lu\n", sc->stats.interrupts);
 	db_printf("  descriptors_processed: %lu\n", sc->stats.descriptors_processed);

Modified: stable/11/sys/dev/ioat/ioat_internal.h
==============================================================================
--- stable/11/sys/dev/ioat/ioat_internal.h	Thu Feb 28 00:28:44 2019	(r344649)
+++ stable/11/sys/dev/ioat/ioat_internal.h	Thu Feb 28 00:29:15 2019	(r344650)
@@ -426,12 +426,6 @@ struct ioat_descriptor {
 #define	IOAT_OP_OLD_XOR		0x85
 #define	IOAT_OP_OLD_XOR_VAL	0x86
 
-enum ioat_ref_kind {
-	IOAT_DMAENGINE_REF = 0,
-	IOAT_ACTIVE_DESCR_REF,
-	IOAT_NUM_REF_KINDS
-};
-
 /* One of these per allocated PCI device. */
 struct ioat_softc {
 	bus_dmaengine_t		dmaengine;
@@ -442,22 +436,22 @@ struct ioat_softc {
 	    offsetof(struct ioat_softc, dmaengine));			\
 })
 
+	device_t		device;
 	int			version;
 	unsigned		chan_idx;
 
-	struct mtx		submit_lock;
-	device_t		device;
 	bus_space_tag_t		pci_bus_tag;
 	bus_space_handle_t	pci_bus_handle;
-	int			pci_resource_id;
 	struct resource		*pci_resource;
+	int			pci_resource_id;
 	uint32_t		max_xfer_size;
 	uint32_t		capabilities;
+	uint32_t		ring_size_order;
 	uint16_t		intrdelay_max;
 	uint16_t		cached_intrdelay;
 
-	struct resource		*res;
 	int			rid;
+	struct resource		*res;
 	void			*tag;
 
 	bus_dma_tag_t		hw_desc_tag;
@@ -468,27 +462,13 @@ struct ioat_softc {
 	uint64_t		*comp_update;
 	bus_addr_t		comp_update_bus_addr;
 
-	struct callout		poll_timer;
-	struct callout		shrink_timer;
-	struct task		reset_task;
-
 	boolean_t		quiescing;
 	boolean_t		destroying;
 	boolean_t		is_submitter_processing;
-	boolean_t		is_completion_pending;	/* submit_lock */
-	boolean_t		is_reset_pending;
-	boolean_t		is_channel_running;
 	boolean_t		intrdelay_supported;
 	boolean_t		resetting;		/* submit_lock */
 	boolean_t		resetting_cleanup;	/* cleanup_lock */
 
-	uint32_t		head;
-	uint32_t		acq_head;
-	uint32_t		tail;
-	uint32_t		hw_head;
-	uint32_t		ring_size_order;
-	bus_addr_t		last_seen;
-
 	struct ioat_descriptor	*ring;
 
 	union ioat_hw_descriptor {
@@ -506,11 +486,16 @@ struct ioat_softc {
 #define	RING_PHYS_ADDR(sc, i)	(sc)->hw_desc_bus_addr + \
     (((i) % (1 << (sc)->ring_size_order)) * sizeof(struct ioat_dma_hw_descriptor))
 
-	struct mtx		cleanup_lock;
-	volatile uint32_t	refcnt;
-#ifdef INVARIANTS
-	volatile uint32_t	refkinds[IOAT_NUM_REF_KINDS];
-#endif
+	struct mtx_padalign	submit_lock;
+	struct callout		poll_timer;
+	struct task		reset_task;
+	struct mtx_padalign	cleanup_lock;
+
+	uint32_t		refcnt;
+	uint32_t		head;
+	uint32_t		acq_head;
+	uint32_t		tail;
+	bus_addr_t		last_seen;
 
 	struct {
 		uint64_t	interrupts;


More information about the svn-src-all mailing list