svn commit: r221688 - in user/avg/xcpu/sys: amd64/amd64 kern
Andriy Gapon
avg at FreeBSD.org
Mon May 9 07:14:16 UTC 2011
Author: avg
Date: Mon May 9 07:14:16 2011
New Revision: 221688
URL: http://svn.freebsd.org/changeset/base/221688
Log:
re-implement smp rendezvous code
- create one rendezvous (outgoing) mailbox per each cpu where a cpu would
place its rendezvous request directed to other cpus
- create a cpu mask for each cpu where other cpus can set a bit to indicate
that they send a rendezvous request to the cpu in question
- send an ipi only for a first rv request, piggyback subsequent
requests if a target cpu is still processing previous incoming requests
- many-to-many rv requests can be sent now, there is no locking, the only
limitation is that a cpu can have only a single outgoing request at
a time
- to avoid deadlocks, when a cpu waits for its requested to be completed
by target cpus, it also checks for and processes incoming requests
- to avoid deadlock with cpu stopping logic, cpus also check for stop
requests while waiting
- there can be only one cpu asking other cpus to stop; this is implemented
via a handrolled spin mutex analogue; similar to the above, to avoid
deadlocks a cpu spinning for this lock also checks for an incoming stop
request
- implement tlb shootdowns via smp rendezvous mechanism, no special ipis
are needed now, amd64 only (see if the code can be further simplified)
- thus the smp_ipi_mtx is not needed any longer
Modified:
user/avg/xcpu/sys/amd64/amd64/mp_machdep.c
user/avg/xcpu/sys/kern/kern_shutdown.c
user/avg/xcpu/sys/kern/subr_smp.c
Modified: user/avg/xcpu/sys/amd64/amd64/mp_machdep.c
==============================================================================
--- user/avg/xcpu/sys/amd64/amd64/mp_machdep.c Mon May 9 07:13:08 2011 (r221687)
+++ user/avg/xcpu/sys/amd64/amd64/mp_machdep.c Mon May 9 07:14:16 2011 (r221688)
@@ -1087,67 +1087,66 @@ SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_
&ipi_masked_range_size, 0, "");
#endif /* COUNT_XINVLTLB_HITS */
-/*
- * Flush the TLB on all other CPU's
- */
+struct tlb_shootdown_params {
+ u_int type;
+ vm_offset_t addr1;
+ vm_offset_t addr2;
+};
+
static void
-smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
+tlb_shootdown_action(void *arg)
{
- u_int ncpu;
+ struct tlb_shootdown_params *params;
+ vm_offset_t addr;
- ncpu = mp_ncpus - 1; /* does not shootdown self */
- if (ncpu < 1)
- return; /* no other cpus */
- if (!(read_rflags() & PSL_I))
- panic("%s: interrupts disabled", __func__);
- mtx_lock_spin(&smp_ipi_mtx);
- smp_tlb_addr1 = addr1;
- smp_tlb_addr2 = addr2;
- atomic_store_rel_int(&smp_tlb_wait, 0);
- ipi_all_but_self(vector);
- while (smp_tlb_wait < ncpu)
- ia32_pause();
- mtx_unlock_spin(&smp_ipi_mtx);
+ params = (struct tlb_shootdown_params *)arg;
+ switch (params->type) {
+ case IPI_INVLCACHE:
+ wbinvd();
+ break;
+ case IPI_INVLTLB:
+ invltlb();
+ break;
+ case IPI_INVLPG:
+ invlpg(params->addr1);
+ break;
+ case IPI_INVLRNG:
+ for (addr = params->addr1; addr < params->addr2;
+ addr += PAGE_SIZE)
+ invlpg(addr);
+ break;
+ default:
+ panic("Unknown TLB shootdown type %u", params->type);
+ }
}
static void
-smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
+smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector,
+ vm_offset_t addr1, vm_offset_t addr2)
{
- int ncpu, othercpus;
+ struct tlb_shootdown_params params;
- othercpus = mp_ncpus - 1;
- if (mask == (cpumask_t)-1) {
- ncpu = othercpus;
- if (ncpu < 1)
- return;
- } else {
- mask &= ~PCPU_GET(cpumask);
- if (mask == 0)
- return;
- ncpu = bitcount32(mask);
- if (ncpu > othercpus) {
- /* XXX this should be a panic offence */
- printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
- ncpu, othercpus);
- ncpu = othercpus;
- }
- /* XXX should be a panic, implied by mask == 0 above */
- if (ncpu < 1)
- return;
- }
+#if 0
if (!(read_rflags() & PSL_I))
panic("%s: interrupts disabled", __func__);
- mtx_lock_spin(&smp_ipi_mtx);
- smp_tlb_addr1 = addr1;
- smp_tlb_addr2 = addr2;
- atomic_store_rel_int(&smp_tlb_wait, 0);
- if (mask == (cpumask_t)-1)
- ipi_all_but_self(vector);
- else
- ipi_selected(mask, vector);
- while (smp_tlb_wait < ncpu)
- ia32_pause();
- mtx_unlock_spin(&smp_ipi_mtx);
+#endif
+ params.type = vector;
+ params.addr1 = addr1;
+ params.addr2 = addr2;
+ smp_rendezvous_cpus(mask & all_cpus & ~(1 << curcpu),
+ smp_no_rendevous_barrier, tlb_shootdown_action,
+ smp_no_rendevous_barrier, ¶ms);
+}
+
+/*
+ * Flush the TLB on all other CPU's
+ */
+static void
+smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
+{
+
+ smp_targeted_tlb_shootdown(all_cpus & ~(1 << curcpu),
+ vector, addr1, addr2);
}
/*
Modified: user/avg/xcpu/sys/kern/kern_shutdown.c
==============================================================================
--- user/avg/xcpu/sys/kern/kern_shutdown.c Mon May 9 07:13:08 2011 (r221687)
+++ user/avg/xcpu/sys/kern/kern_shutdown.c Mon May 9 07:14:16 2011 (r221688)
@@ -509,26 +509,9 @@ shutdown_reset(void *junk, int howto)
printf("Rebooting...\n");
DELAY(1000000); /* wait 1 sec for printf's to complete and be read */
- /*
- * Acquiring smp_ipi_mtx here has a double effect:
- * - it disables interrupts avoiding CPU0 preemption
- * by fast handlers (thus deadlocking against other CPUs)
- * - it avoids deadlocks against smp_rendezvous() or, more
- * generally, threads busy-waiting, with this spinlock held,
- * and waiting for responses by threads on other CPUs
- * (ie. smp_tlb_shootdown()).
- *
- * For the !SMP case it just needs to handle the former problem.
- */
-#ifdef SMP
- mtx_lock_spin(&smp_ipi_mtx);
-#else
spinlock_enter();
-#endif
-
- /* cpu_boot(howto); */ /* doesn't do anything at the moment */
cpu_reset();
- /* NOTREACHED */ /* assuming reset worked */
+ /* NOTREACHED */
}
/*
Modified: user/avg/xcpu/sys/kern/subr_smp.c
==============================================================================
--- user/avg/xcpu/sys/kern/subr_smp.c Mon May 9 07:13:08 2011 (r221687)
+++ user/avg/xcpu/sys/kern/subr_smp.c Mon May 9 07:14:16 2011 (r221688)
@@ -101,6 +101,10 @@ SYSCTL_INT(_kern_smp, OID_AUTO, topology
"Topology override setting; 0 is default provided by hardware.");
TUNABLE_INT("kern.smp.topology", &smp_topology);
+unsigned int coalesced_ipi_count;
+SYSCTL_INT(_kern_smp, OID_AUTO, coalesced_ipi_count, CTLFLAG_RD,
+ &coalesced_ipi_count, 0, "Count of coalesced SMP rendezvous IPIs");
+
#ifdef SMP
/* Enable forwarding of a signal to a process running on a different CPU */
static int forward_signal_enabled = 1;
@@ -109,14 +113,20 @@ SYSCTL_INT(_kern_smp, OID_AUTO, forward_
"Forwarding of a signal to a process on a different CPU");
/* Variables needed for SMP rendezvous. */
-static volatile int smp_rv_ncpus;
-static void (*volatile smp_rv_setup_func)(void *arg);
-static void (*volatile smp_rv_action_func)(void *arg);
-static void (*volatile smp_rv_teardown_func)(void *arg);
-static void *volatile smp_rv_func_arg;
-static volatile int smp_rv_waiters[3];
+struct smp_rendezvous_data {
+ void (*smp_rv_setup_func)(void *arg);
+ void (*smp_rv_action_func)(void *arg);
+ void (*smp_rv_teardown_func)(void *arg);
+ void *smp_rv_func_arg;
+ volatile int smp_rv_waiters[2];
+ int smp_rv_ncpus;
+};
+
+static DPCPU_DEFINE(struct smp_rendezvous_data, smp_rv_data);
+static volatile DPCPU_DEFINE(cpumask_t, smp_rv_senders);
+static volatile DPCPU_DEFINE(cpumask_t, smp_rv_count);
-/*
+/*
* Shared mutex to restrict busywaits between smp_rendezvous() and
* smp(_targeted)_tlb_shootdown(). A deadlock occurs if both of these
* functions trigger at once and cause multiple CPUs to busywait with
@@ -397,39 +407,44 @@ unstop_cpus_hard(void)
* Note that the supplied external functions _must_ be reentrant and aware
* that they are running in parallel and in an unknown lock context.
*/
-void
-smp_rendezvous_action(void)
+static void
+smp_rendezvous_action_body(int cpu)
{
- void* local_func_arg = smp_rv_func_arg;
- void (*local_setup_func)(void*) = smp_rv_setup_func;
- void (*local_action_func)(void*) = smp_rv_action_func;
- void (*local_teardown_func)(void*) = smp_rv_teardown_func;
-
- /* Ensure we have up-to-date values. */
- atomic_add_acq_int(&smp_rv_waiters[0], 1);
- while (smp_rv_waiters[0] < smp_rv_ncpus)
- cpu_spinwait();
+ volatile struct smp_rendezvous_data *rv;
+ void *local_func_arg;
+ void (*local_setup_func)(void*);
+ void (*local_action_func)(void*);
+ void (*local_teardown_func)(void*);
+ int ncpus;
+
+ rv = DPCPU_ID_PTR(cpu, smp_rv_data);
+ local_func_arg = rv->smp_rv_func_arg;
+ local_setup_func = rv->smp_rv_setup_func;
+ local_action_func = rv->smp_rv_action_func;
+ local_teardown_func = rv->smp_rv_teardown_func;
+ ncpus = rv->smp_rv_ncpus;
/* setup function */
if (local_setup_func != smp_no_rendevous_barrier) {
- if (smp_rv_setup_func != NULL)
- smp_rv_setup_func(smp_rv_func_arg);
+ if (local_setup_func != NULL)
+ local_setup_func(local_func_arg);
/* spin on entry rendezvous */
- atomic_add_int(&smp_rv_waiters[1], 1);
- while (smp_rv_waiters[1] < smp_rv_ncpus)
- cpu_spinwait();
+ atomic_add_int(&rv->smp_rv_waiters[0], 1);
+ while (rv->smp_rv_waiters[0] < ncpus)
+ cpu_spinwait();
}
/* action function */
if (local_action_func != NULL)
local_action_func(local_func_arg);
- /* spin on exit rendezvous */
- atomic_add_int(&smp_rv_waiters[2], 1);
+ atomic_add_int(&rv->smp_rv_waiters[1], 1);
if (local_teardown_func == smp_no_rendevous_barrier)
return;
- while (smp_rv_waiters[2] < smp_rv_ncpus)
+
+ /* spin on exit rendezvous */
+ while (rv->smp_rv_waiters[1] < ncpus)
cpu_spinwait();
/* teardown function */
@@ -438,13 +453,95 @@ smp_rendezvous_action(void)
}
void
+smp_rendezvous_action(void)
+{
+ cpumask_t mask;
+ int pending;
+ int count;
+ int cpu;
+
+ pending = DPCPU_GET(smp_rv_count);
+ while (pending != 0) {
+ KASSERT(pending > 0, ("negative pending rendezvous count"));
+ mask = DPCPU_GET(smp_rv_senders);
+ if (mask == 0) {
+ cpu_spinwait();
+ continue;
+ }
+
+ atomic_clear_acq_int(DPCPU_PTR(smp_rv_senders), mask);
+ count = 0;
+ do {
+ count++;
+ cpu = ffs(mask) - 1;
+ mask &= ~(1 << cpu);
+ smp_rendezvous_action_body(cpu);
+ } while (mask != 0);
+
+ pending = atomic_fetchadd_int(DPCPU_PTR(smp_rv_count), -count);
+ pending -= count;
+ }
+}
+
+static void
+smp_rendezvous_wait(void)
+{
+ volatile struct smp_rendezvous_data *rv;
+ int ncpus;
+
+ rv = DPCPU_PTR(smp_rv_data);
+ ncpus = rv->smp_rv_ncpus;
+
+ while (atomic_load_acq_int(&rv->smp_rv_waiters[1]) < ncpus) {
+ /* check for incoming events */
+ if ((stopping_cpus & (1 << curcpu)) != 0)
+ cpustop_handler();
+ else if (DPCPU_GET(smp_rv_senders) != 0)
+ smp_rendezvous_action();
+ else
+ cpu_spinwait();
+ }
+}
+
+/*
+ * Execute the action_func on the targeted CPUs.
+ *
+ * setup_func:
+ * - if a function pointer is given, then first execute the function;
+ * only after the function is executed on all targeted can they proceed
+ * to the next step;
+ * - if NULL is given, this is equivalent to specifying a pointer to an
+ * empty function; as such there is no actual setup function, but all
+ * targeted CPUs proceed to the next step at about the same time;
+ * - smp_no_rendevous_barrier is a special value that signifies that there
+ * is no setup function nor the targeted CPUs should wait for anything
+ * before proceeding to the next step.
+ *
+ * action_func:
+ * - a function to be executed on the targeted CPUs;
+ * NULL is equivalent to specifying a pointer to an empty function.
+ *
+ * teardown_func:
+ * - if a function pointer is given, then first wait for all targeted CPUs
+ * to complete execution of action_func, then execute this function;
+ * - if NULL is given, this is equivalent to specifying a pointer to an
+ * empty function; as such there is no actual teardown action, but all
+ * targeted CPUs wait for each other to complete execution of action_func;
+ * - smp_no_rendevous_barrier is a special value that signifies that there
+ * is no teardown function nor the targeted CPUs should wait for anything
+ * after completing action_func.
+ */
+void
smp_rendezvous_cpus(cpumask_t map,
void (* setup_func)(void *),
void (* action_func)(void *),
void (* teardown_func)(void *),
void *arg)
{
- int i, ncpus = 0;
+ volatile struct smp_rendezvous_data *rv;
+ cpumask_t tmp;
+ int ncpus;
+ int cpu;
if (!smp_started) {
if (setup_func != NULL)
@@ -456,39 +553,66 @@ smp_rendezvous_cpus(cpumask_t map,
return;
}
- CPU_FOREACH(i) {
- if (((1 << i) & map) != 0)
- ncpus++;
+ map &= all_cpus;
+ tmp = map;
+ ncpus = 0;
+ while (tmp != 0) {
+ cpu = ffs(tmp) - 1;
+ tmp &= ~(1 << cpu);
+ ncpus++;
}
- if (ncpus == 0)
- panic("ncpus is 0 with map=0x%x", map);
- /* obtain rendezvous lock */
- mtx_lock_spin(&smp_ipi_mtx);
+ spinlock_enter();
+
+ /*
+ * First wait for an event previously posted by us to complete (if any),
+ * this is done in case the event was asynchronous.
+ * In the future we could have a queue of outgoing events instead
+ * of a single item.
+ */
+ smp_rendezvous_wait();
/* set static function pointers */
- smp_rv_ncpus = ncpus;
- smp_rv_setup_func = setup_func;
- smp_rv_action_func = action_func;
- smp_rv_teardown_func = teardown_func;
- smp_rv_func_arg = arg;
- smp_rv_waiters[1] = 0;
- smp_rv_waiters[2] = 0;
- atomic_store_rel_int(&smp_rv_waiters[0], 0);
+ rv = DPCPU_PTR(smp_rv_data);
+ rv->smp_rv_ncpus = ncpus;
+ rv->smp_rv_setup_func = setup_func;
+ rv->smp_rv_action_func = action_func;
+ rv->smp_rv_teardown_func = teardown_func;
+ rv->smp_rv_func_arg = arg;
+ rv->smp_rv_waiters[1] = 0;
+ atomic_store_rel_int(&rv->smp_rv_waiters[0], 0);
+
+ /* signal other CPUs, which will enter the IPI with interrupts off */
+ tmp = map;
+ while (tmp != 0) {
+ cpu = ffs(tmp) - 1;
+ tmp &= ~(1 << cpu);
+
+ if (cpu == curcpu)
+ continue;
+
+ KASSERT(
+ (DPCPU_ID_GET(cpu, smp_rv_senders) & (1 << curcpu)) == 0,
+ ("curcpu bit is set in target cpu's senders map"));
+
+ /* if we are the first to send an event, then send an ipi */
+ if (atomic_fetchadd_int(DPCPU_ID_PTR(cpu, smp_rv_count), 1)
+ == 0)
+ ipi_cpu(cpu, IPI_RENDEZVOUS);
+ else
+ coalesced_ipi_count++;
- /* signal other processors, which will enter the IPI with interrupts off */
- ipi_selected(map & ~(1 << curcpu), IPI_RENDEZVOUS);
+ atomic_set_rel_int(DPCPU_ID_PTR(cpu, smp_rv_senders),
+ 1 << curcpu);
+ }
/* Check if the current CPU is in the map */
if ((map & (1 << curcpu)) != 0)
- smp_rendezvous_action();
-
+ smp_rendezvous_action_body(curcpu);
if (teardown_func == smp_no_rendevous_barrier)
- while (atomic_load_acq_int(&smp_rv_waiters[2]) < ncpus)
- cpu_spinwait();
+ smp_rendezvous_wait();
- /* release lock */
- mtx_unlock_spin(&smp_ipi_mtx);
+ spinlock_exit();
}
void
More information about the svn-src-user
mailing list