svn commit: r216254 - in head/sys/cddl: compat/opensolaris/sys dev/cyclic

Andriy Gapon avg at FreeBSD.org
Tue Dec 7 12:25:27 UTC 2010


Author: avg
Date: Tue Dec  7 12:25:26 2010
New Revision: 216254
URL: http://svn.freebsd.org/changeset/base/216254

Log:
  opensolaris cyclic: fix deadlock and make a little bit closer to upstream
  
  The dealock was caused in the following way:
  - thread T1 on CPU C1 holds a spin mutex, IPIs CPU C2 and waits for the
    IPI to be handled
  - C2 executes timer interrupt filter, thus has interrupts disabled, and
    gets blocked on the spin mutex held by T1
  The problem seems to have been introduced by simplifications made to
  OpenSolaris code during porting.
  The problem is fixed by reorganizing the code to more closely resemble
  the upstream version.  Interrupt filter (cyclic_fire) now doesn't
  acquire any locks, all per-CPU data accesses are performed on a
  target CPU with preemption and interrupts disabled thus precluding
  concurrent access to the data.
  cyp_mtx spin mutex is used to disable preemtion and interrupts; it's not
  used for classical mutual exclusion, because xcall already serializes
  calls to a CPU.  It's an emulation of OpenSolaris
  cyb_set_level(CY_HIGH_LEVEL) call, the spin mutexes could probably be
  reduced to just a spinlock_enter()/_exit() pair.
  
  Diff with upstream version is now reduced by ~500 lines, however it still
  remains quite large - many things that are not needed (at the moment) or
  are irrelevant on FreeBSD were simply ripped out during porting.
  Examples of such things:
  - support for CPU onlining/offlining
  - support for suspend/resume
  - support for running callouts at soft interrupt levels
  - support for callout rebinding from CPU to CPU
  - support for CPU partitions
  
  Tested by:	Artem Belevich <fbsdlist at src.cx>
  MFC after:	3 weeks
  X-MFC with:	r216252

Modified:
  head/sys/cddl/compat/opensolaris/sys/cyclic_impl.h
  head/sys/cddl/dev/cyclic/cyclic.c

Modified: head/sys/cddl/compat/opensolaris/sys/cyclic_impl.h
==============================================================================
--- head/sys/cddl/compat/opensolaris/sys/cyclic_impl.h	Tue Dec  7 12:17:43 2010	(r216253)
+++ head/sys/cddl/compat/opensolaris/sys/cyclic_impl.h	Tue Dec  7 12:25:26 2010	(r216254)
@@ -288,7 +288,14 @@ typedef struct cyc_id {
 
 typedef struct cyc_xcallarg {
 	cyc_cpu_t *cyx_cpu;
-	hrtime_t cyx_exp;
+	cyc_handler_t *cyx_hdlr;
+	cyc_time_t *cyx_when;
+	cyc_index_t cyx_ndx;
+	cyc_index_t *cyx_heap;
+	cyclic_t *cyx_cyclics;
+	cyc_index_t cyx_size;
+	uint16_t cyx_flags;
+	int cyx_wait;
 } cyc_xcallarg_t;
 
 #define	CY_DEFAULT_PERCPU	1

Modified: head/sys/cddl/dev/cyclic/cyclic.c
==============================================================================
--- head/sys/cddl/dev/cyclic/cyclic.c	Tue Dec  7 12:17:43 2010	(r216253)
+++ head/sys/cddl/dev/cyclic/cyclic.c	Tue Dec  7 12:25:26 2010	(r216254)
@@ -473,73 +473,6 @@ cyclic_expire(cyc_cpu_t *cpu, cyc_index_
 	(*handler)(arg);
 }
 
-static void
-cyclic_enable_xcall(void *v)
-{
-	cyc_xcallarg_t *argp = v;
-	cyc_cpu_t *cpu = argp->cyx_cpu;
-	cyc_backend_t *be = cpu->cyp_backend;
-
-	be->cyb_enable(be->cyb_arg);
-}
-
-static void
-cyclic_enable(cyc_cpu_t *cpu)
-{
-	cyc_backend_t *be = cpu->cyp_backend;
-	cyc_xcallarg_t arg;
-
-	arg.cyx_cpu = cpu;
-
-	/* Cross call to the target CPU */
-	be->cyb_xcall(be->cyb_arg, cpu->cyp_cpu, cyclic_enable_xcall, &arg);
-}
-
-static void
-cyclic_disable_xcall(void *v)
-{
-	cyc_xcallarg_t *argp = v;
-	cyc_cpu_t *cpu = argp->cyx_cpu;
-	cyc_backend_t *be = cpu->cyp_backend;
-
-	be->cyb_disable(be->cyb_arg);
-}
-
-static void
-cyclic_disable(cyc_cpu_t *cpu)
-{
-	cyc_backend_t *be = cpu->cyp_backend;
-	cyc_xcallarg_t arg;
-
-	arg.cyx_cpu = cpu;
-
-	/* Cross call to the target CPU */
-	be->cyb_xcall(be->cyb_arg, cpu->cyp_cpu, cyclic_disable_xcall, &arg);
-}
-
-static void
-cyclic_reprogram_xcall(void *v)
-{
-	cyc_xcallarg_t *argp = v;
-	cyc_cpu_t *cpu = argp->cyx_cpu;
-	cyc_backend_t *be = cpu->cyp_backend;
-
-	be->cyb_reprogram(be->cyb_arg, argp->cyx_exp);
-}
-
-static void
-cyclic_reprogram(cyc_cpu_t *cpu, hrtime_t exp)
-{
-	cyc_backend_t *be = cpu->cyp_backend;
-	cyc_xcallarg_t arg;
-
-	arg.cyx_cpu = cpu;
-	arg.cyx_exp = exp;
-
-	/* Cross call to the target CPU */
-	be->cyb_xcall(be->cyb_arg, cpu->cyp_cpu, cyclic_reprogram_xcall, &arg);
-}
-
 /*
  *  cyclic_fire(cpu_t *)
  *
@@ -570,17 +503,15 @@ static void
 cyclic_fire(cpu_t *c)
 {
 	cyc_cpu_t *cpu = c->cpu_cyclic;
-
-	mtx_lock_spin(&cpu->cyp_mtx);
-
+	cyc_backend_t *be = cpu->cyp_backend;
 	cyc_index_t *heap = cpu->cyp_heap;
 	cyclic_t *cyclic, *cyclics = cpu->cyp_cyclics;
+	void *arg = be->cyb_arg;
 	hrtime_t now = gethrtime();
 	hrtime_t exp;
 
 	if (cpu->cyp_nelems == 0) {
 		/* This is a spurious fire. */
-		mtx_unlock_spin(&cpu->cyp_mtx);
 		return;
 	}
 
@@ -631,8 +562,45 @@ cyclic_fire(cpu_t *c)
 	 * Now we have a cyclic in the root slot which isn't in the past;
 	 * reprogram the interrupt source.
 	 */
-	cyclic_reprogram(cpu, exp);
+	be->cyb_reprogram(arg, exp);
+}
+
+static void
+cyclic_expand_xcall(cyc_xcallarg_t *arg)
+{
+	cyc_cpu_t *cpu = arg->cyx_cpu;
+	cyc_index_t new_size = arg->cyx_size, size = cpu->cyp_size, i;
+	cyc_index_t *new_heap = arg->cyx_heap;
+	cyclic_t *cyclics = cpu->cyp_cyclics, *new_cyclics = arg->cyx_cyclics;
+
+	/* Disable preemption and interrupts. */
+	mtx_lock_spin(&cpu->cyp_mtx);
+
+	/*
+	 * Assert that the new size is a power of 2.
+	 */
+	ASSERT((new_size & (new_size - 1)) == 0);
+	ASSERT(new_size == (size << 1));
+	ASSERT(cpu->cyp_heap != NULL && cpu->cyp_cyclics != NULL);
+
+	bcopy(cpu->cyp_heap, new_heap, sizeof (cyc_index_t) * size);
+	bcopy(cyclics, new_cyclics, sizeof (cyclic_t) * size);
+
+	/*
+	 * Set up the free list, and set all of the new cyclics to be CYF_FREE.
+	 */
+	for (i = size; i < new_size; i++) {
+		new_heap[i] = i;
+		new_cyclics[i].cy_flags = CYF_FREE;
+	}
 
+	/*
+	 * We can go ahead and plow the value of cyp_heap and cyp_cyclics;
+	 * cyclic_expand() has kept a copy.
+	 */
+	cpu->cyp_heap = new_heap;
+	cpu->cyp_cyclics = new_cyclics;
+	cpu->cyp_size = new_size;
 	mtx_unlock_spin(&cpu->cyp_mtx);
 }
 
@@ -643,102 +611,70 @@ cyclic_fire(cpu_t *c)
 static void
 cyclic_expand(cyc_cpu_t *cpu)
 {
-	cyc_index_t new_size, old_size, i;
+	cyc_index_t new_size, old_size;
 	cyc_index_t *new_heap, *old_heap;
 	cyclic_t *new_cyclics, *old_cyclics;
+	cyc_xcallarg_t arg;
+	cyc_backend_t *be = cpu->cyp_backend;
 
 	ASSERT(MUTEX_HELD(&cpu_lock));
 
-	if ((new_size = ((old_size = cpu->cyp_size) << 1)) == 0)
+	old_heap = cpu->cyp_heap;
+	old_cyclics = cpu->cyp_cyclics;
+
+	if ((new_size = ((old_size = cpu->cyp_size) << 1)) == 0) {
 		new_size = CY_DEFAULT_PERCPU;
+		ASSERT(old_heap == NULL && old_cyclics == NULL);
+	}
 
 	/*
 	 * Check that the new_size is a power of 2.
 	 */
 	ASSERT(((new_size - 1) & new_size) == 0);
 
-	/* Unlock the mutex while allocating memory so we can wait... */
-	mtx_unlock_spin(&cpu->cyp_mtx);
-
 	new_heap = malloc(sizeof(cyc_index_t) * new_size, M_CYCLIC, M_WAITOK);
 	new_cyclics = malloc(sizeof(cyclic_t) * new_size, M_CYCLIC, M_ZERO | M_WAITOK);
 
-	/* Grab the lock again now we've got the memory... */
-	mtx_lock_spin(&cpu->cyp_mtx);
-
-	/* Check if another thread beat us while the mutex was unlocked. */
-	if (old_size != cpu->cyp_size) {
-		/* Oh well, he won. */
-		mtx_unlock_spin(&cpu->cyp_mtx);
-
-		free(new_heap, M_CYCLIC);
-		free(new_cyclics, M_CYCLIC);
-
-		mtx_lock_spin(&cpu->cyp_mtx);
-		return;
-	}
-
-	old_heap = cpu->cyp_heap;
-	old_cyclics = cpu->cyp_cyclics;
-
-	bcopy(cpu->cyp_heap, new_heap, sizeof (cyc_index_t) * old_size);
-	bcopy(old_cyclics, new_cyclics, sizeof (cyclic_t) * old_size);
-
-	/*
-	 * Set up the free list, and set all of the new cyclics to be CYF_FREE.
-	 */
-	for (i = old_size; i < new_size; i++) {
-		new_heap[i] = i;
-		new_cyclics[i].cy_flags = CYF_FREE;
-	}
+	arg.cyx_cpu = cpu;
+	arg.cyx_heap = new_heap;
+	arg.cyx_cyclics = new_cyclics;
+	arg.cyx_size = new_size;
 
-	/*
-	 * We can go ahead and plow the value of cyp_heap and cyp_cyclics;
-	 * cyclic_expand() has kept a copy.
-	 */
-	cpu->cyp_heap = new_heap;
-	cpu->cyp_cyclics = new_cyclics;
-	cpu->cyp_size = new_size;
+	be->cyb_xcall(be->cyb_arg, cpu->cyp_cpu,
+	    (cyc_func_t)cyclic_expand_xcall, &arg);
 
 	if (old_cyclics != NULL) {
 		ASSERT(old_heap != NULL);
 		ASSERT(old_size != 0);
-		mtx_unlock_spin(&cpu->cyp_mtx);
-
 		free(old_cyclics, M_CYCLIC);
 		free(old_heap, M_CYCLIC);
-
-		mtx_lock_spin(&cpu->cyp_mtx);
 	}
 }
 
-static cyc_index_t
-cyclic_add_here(cyc_cpu_t *cpu, cyc_handler_t *hdlr,
-    cyc_time_t *when, uint16_t flags)
+static void
+cyclic_add_xcall(cyc_xcallarg_t *arg)
 {
+	cyc_cpu_t *cpu = arg->cyx_cpu;
+	cyc_handler_t *hdlr = arg->cyx_hdlr;
+	cyc_time_t *when = arg->cyx_when;
+	cyc_backend_t *be = cpu->cyp_backend;
 	cyc_index_t ndx, nelems;
+	cyb_arg_t bar = be->cyb_arg;
 	cyclic_t *cyclic;
 
-	ASSERT(MUTEX_HELD(&cpu_lock));
-
-	mtx_lock_spin(&cpu->cyp_mtx);
-
-	ASSERT(!(cpu->cyp_cpu->cpu_flags & CPU_OFFLINE));
-	ASSERT(when->cyt_when >= 0 && when->cyt_interval > 0);
-
-	while (cpu->cyp_nelems == cpu->cyp_size)
-		cyclic_expand(cpu);
-
 	ASSERT(cpu->cyp_nelems < cpu->cyp_size);
 
+	/* Disable preemption and interrupts. */
+	mtx_lock_spin(&cpu->cyp_mtx);
 	nelems = cpu->cyp_nelems++;
 
-	if (nelems == 0)
+	if (nelems == 0) {
 		/*
 		 * If this is the first element, we need to enable the
 		 * backend on this CPU.
 		 */
-		cyclic_enable(cpu);
+		be->cyb_enable(bar);
+	}
 
 	ndx = cpu->cyp_heap[nelems];
 	cyclic = &cpu->cyp_cyclics[ndx];
@@ -746,14 +682,20 @@ cyclic_add_here(cyc_cpu_t *cpu, cyc_hand
 	ASSERT(cyclic->cy_flags == CYF_FREE);
 	cyclic->cy_interval = when->cyt_interval;
 
-	if (when->cyt_when == 0)
-		cyclic->cy_expire = gethrtime() + cyclic->cy_interval;
-	else
+	if (when->cyt_when == 0) {
+		/*
+		 * If a start time hasn't been explicitly specified, we'll
+		 * start on the next interval boundary.
+		 */
+		cyclic->cy_expire = (gethrtime() / cyclic->cy_interval + 1) *
+		    cyclic->cy_interval;
+	} else {
 		cyclic->cy_expire = when->cyt_when;
+	}
 
 	cyclic->cy_handler = hdlr->cyh_func;
 	cyclic->cy_arg = hdlr->cyh_arg;
-	cyclic->cy_flags = flags;
+	cyclic->cy_flags = arg->cyx_flags;
 
 	if (cyclic_upheap(cpu, nelems)) {
 		hrtime_t exp = cyclic->cy_expire;
@@ -762,31 +704,63 @@ cyclic_add_here(cyc_cpu_t *cpu, cyc_hand
 		 * If our upheap propagated to the root, we need to
 		 * reprogram the interrupt source.
 		 */
-		cyclic_reprogram(cpu, exp);
+		be->cyb_reprogram(bar, exp);
 	}
-
 	mtx_unlock_spin(&cpu->cyp_mtx);
 
-	return (ndx);
+	arg->cyx_ndx = ndx;
 }
 
-
-static int
-cyclic_remove_here(cyc_cpu_t *cpu, cyc_index_t ndx, cyc_time_t *when, int wait)
+static cyc_index_t
+cyclic_add_here(cyc_cpu_t *cpu, cyc_handler_t *hdlr,
+    cyc_time_t *when, uint16_t flags)
 {
-	cyc_index_t nelems, i;
-	cyclic_t *cyclic;
-	cyc_index_t *heap, last;
+	cyc_backend_t *be = cpu->cyp_backend;
+	cyb_arg_t bar = be->cyb_arg;
+	cyc_xcallarg_t arg;
 
 	ASSERT(MUTEX_HELD(&cpu_lock));
-	ASSERT(wait == CY_WAIT || wait == CY_NOWAIT);
+	ASSERT(!(cpu->cyp_cpu->cpu_flags & CPU_OFFLINE));
+	ASSERT(when->cyt_when >= 0 && when->cyt_interval > 0);
 
-	mtx_lock_spin(&cpu->cyp_mtx);
+	if (cpu->cyp_nelems == cpu->cyp_size) {
+		/*
+		 * This is expensive; it will cross call onto the other
+		 * CPU to perform the expansion.
+		 */
+		cyclic_expand(cpu);
+		ASSERT(cpu->cyp_nelems < cpu->cyp_size);
+	}
+
+	/*
+	 * By now, we know that we're going to be able to successfully
+	 * perform the add.  Now cross call over to the CPU of interest to
+	 * actually add our cyclic.
+	 */
+	arg.cyx_cpu = cpu;
+	arg.cyx_hdlr = hdlr;
+	arg.cyx_when = when;
+	arg.cyx_flags = flags;
+
+	be->cyb_xcall(bar, cpu->cyp_cpu, (cyc_func_t)cyclic_add_xcall, &arg);
+
+	return (arg.cyx_ndx);
+}
 
-	heap = cpu->cyp_heap;
+static void
+cyclic_remove_xcall(cyc_xcallarg_t *arg)
+{
+	cyc_cpu_t *cpu = arg->cyx_cpu;
+	cyc_backend_t *be = cpu->cyp_backend;
+	cyb_arg_t bar = be->cyb_arg;
+	cyc_index_t ndx = arg->cyx_ndx, nelems = cpu->cyp_nelems, i;
+	cyc_index_t *heap = cpu->cyp_heap, last;
+	cyclic_t *cyclic;
 
-	nelems = cpu->cyp_nelems;
+	ASSERT(nelems > 0);
 
+	/* Disable preemption and interrupts. */
+	mtx_lock_spin(&cpu->cyp_mtx);
 	cyclic = &cpu->cyp_cyclics[ndx];
 
 	/*
@@ -794,11 +768,17 @@ cyclic_remove_here(cyc_cpu_t *cpu, cyc_i
 	 * removed as part of a juggling operation, the expiration time
 	 * will be used when the cyclic is added to the new CPU.
 	 */
-	if (when != NULL) {
-		when->cyt_when = cyclic->cy_expire;
-		when->cyt_interval = cyclic->cy_interval;
+	if (arg->cyx_when != NULL) {
+		arg->cyx_when->cyt_when = cyclic->cy_expire;
+		arg->cyx_when->cyt_interval = cyclic->cy_interval;
 	}
 
+	/*
+	 * Now set the flags to CYF_FREE.  We don't need a membar_enter()
+	 * between zeroing pend and setting the flags because we're at
+	 * CY_HIGH_LEVEL (that is, the zeroing of pend and the setting
+	 * of cy_flags appear atomic to softints).
+	 */
 	cyclic->cy_flags = CYF_FREE;
 
 	for (i = 0; i < nelems; i++) {
@@ -811,19 +791,21 @@ cyclic_remove_here(cyc_cpu_t *cpu, cyc_i
 
 	cpu->cyp_nelems = --nelems;
 
-	if (nelems == 0)
+	if (nelems == 0) {
 		/*
 		 * If we just removed the last element, then we need to
 		 * disable the backend on this CPU.
 		 */
-		cyclic_disable(cpu);
+		be->cyb_disable(bar);
+	}
 
-	if (i == nelems)
+	if (i == nelems) {
 		/*
 		 * If we just removed the last element of the heap, then
 		 * we don't have to downheap.
 		 */
-		goto done;
+		goto out;
+	}
 
 	/*
 	 * Swap the last element of the heap with the one we want to
@@ -833,17 +815,18 @@ cyclic_remove_here(cyc_cpu_t *cpu, cyc_i
 	heap[i] = (last = heap[nelems]);
 	heap[nelems] = ndx;
 
-	if (i == 0)
+	if (i == 0) {
 		cyclic_downheap(cpu, 0);
-	else {
+	} else {
 		if (cyclic_upheap(cpu, i) == 0) {
 			/*
 			 * The upheap didn't propagate to the root; if it
 			 * didn't propagate at all, we need to downheap.
 			 */
-			if (heap[i] == last)
+			if (heap[i] == last) {
 				cyclic_downheap(cpu, i);
-			goto done;
+			}
+			goto out;
 		}
 	}
 
@@ -854,10 +837,27 @@ cyclic_remove_here(cyc_cpu_t *cpu, cyc_i
 	cyclic = &cpu->cyp_cyclics[heap[0]];
 
 	ASSERT(nelems != 0);
-	cyclic_reprogram(cpu, cyclic->cy_expire);
-
-done:
+	be->cyb_reprogram(bar, cyclic->cy_expire);
+out:
 	mtx_unlock_spin(&cpu->cyp_mtx);
+}
+
+static int
+cyclic_remove_here(cyc_cpu_t *cpu, cyc_index_t ndx, cyc_time_t *when, int wait)
+{
+	cyc_backend_t *be = cpu->cyp_backend;
+	cyc_xcallarg_t arg;
+
+	ASSERT(MUTEX_HELD(&cpu_lock));
+	ASSERT(wait == CY_WAIT || wait == CY_NOWAIT);
+
+	arg.cyx_ndx = ndx;
+	arg.cyx_cpu = cpu;
+	arg.cyx_when = when;
+	arg.cyx_wait = wait;
+
+	be->cyb_xcall(be->cyb_arg, cpu->cyp_cpu,
+	    (cyc_func_t)cyclic_remove_xcall, &arg);
 
 	return (1);
 }
@@ -1214,15 +1214,10 @@ cyclic_add_omni(cyc_omni_handler_t *omni
 
 	idp->cyi_omni_hdlr = *omni;
 
-	for (i = 0; i < MAXCPU; i++) {
-		if (pcpu_find(i) == NULL)
-			continue;
-
+	CPU_FOREACH(i) {
 		c = &solaris_cpu[i];
-
 		if ((cpu = c->cpu_cyclic) == NULL)
 			continue;
-
 		cyclic_omni_start(idp, cpu);
 	}
 
@@ -1325,12 +1320,8 @@ cyclic_mp_init(void)
 
 	mutex_enter(&cpu_lock);
 
-	for (i = 0; i <= mp_maxid; i++) {
-		if (pcpu_find(i) == NULL)
-			continue;
-
+	CPU_FOREACH(i) {
 		c = &solaris_cpu[i];
-
 		if (c->cpu_cyclic == NULL)
 			cyclic_configure(c);
 	}
@@ -1346,10 +1337,8 @@ cyclic_uninit(void)
 
 	CPU_FOREACH(id) {
 		c = &solaris_cpu[id];
-
 		if (c->cpu_cyclic == NULL)
 			continue;
-
 		cyclic_unconfigure(c);
 	}
 


More information about the svn-src-all mailing list