svn commit: r336021 - in head/sys: kern sys

Matt Macy mmacy at FreeBSD.org
Fri Jul 6 06:20:04 UTC 2018


Author: mmacy
Date: Fri Jul  6 06:20:03 2018
New Revision: 336021
URL: https://svnweb.freebsd.org/changeset/base/336021

Log:
  epoch(9): simplify initialization
  
  replace manual NUMA aware allocation with a pcpu zone

Modified:
  head/sys/kern/subr_epoch.c
  head/sys/sys/epoch_private.h

Modified: head/sys/kern/subr_epoch.c
==============================================================================
--- head/sys/kern/subr_epoch.c	Fri Jul  6 02:06:03 2018	(r336020)
+++ head/sys/kern/subr_epoch.c	Fri Jul  6 06:20:03 2018	(r336021)
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
+#include <vm/uma.h>
 
 #include <ck_epoch.h>
 
@@ -93,29 +94,23 @@ TAILQ_HEAD (threadlist, thread);
 CK_STACK_CONTAINER(struct ck_epoch_entry, stack_entry,
     ck_epoch_entry_container)
 
-	epoch_t	allepochs[MAX_EPOCHS];
+epoch_t	allepochs[MAX_EPOCHS];
 
 DPCPU_DEFINE(struct grouptask, epoch_cb_task);
 DPCPU_DEFINE(int, epoch_cb_count);
 
-static __read_mostly int domcount[MAXMEMDOM];
-static __read_mostly int domoffsets[MAXMEMDOM];
 static __read_mostly int inited;
 static __read_mostly int epoch_count;
 __read_mostly epoch_t global_epoch;
 __read_mostly epoch_t global_epoch_preempt;
 
 static void epoch_call_task(void *context __unused);
+static 	uma_zone_t pcpu_zone_record;
 
-#if defined(__powerpc64__) || defined(__powerpc__) || !defined(NUMA)
-static bool usedomains = false;
-#else
-static bool usedomains = true;
-#endif
 static void
 epoch_init(void *arg __unused)
 {
-	int domain, cpu;
+	int cpu;
 
 	block_count = counter_u64_alloc(M_WAITOK);
 	migrate_count = counter_u64_alloc(M_WAITOK);
@@ -123,25 +118,9 @@ epoch_init(void *arg __unused)
 	switch_count = counter_u64_alloc(M_WAITOK);
 	epoch_call_count = counter_u64_alloc(M_WAITOK);
 	epoch_call_task_count = counter_u64_alloc(M_WAITOK);
-	if (usedomains == false)
-		goto done;
-	domain = 0;
-	domoffsets[0] = 0;
-	for (domain = 0; domain < vm_ndomains; domain++) {
-		domcount[domain] = CPU_COUNT(&cpuset_domain[domain]);
-		if (bootverbose)
-			printf("domcount[%d] %d\n", domain, domcount[domain]);
-	}
-	for (domain = 1; domain < vm_ndomains; domain++)
-		domoffsets[domain] = domoffsets[domain - 1] + domcount[domain - 1];
 
-	for (domain = 0; domain < vm_ndomains; domain++) {
-		if (domcount[domain] == 0) {
-			usedomains = false;
-			break;
-		}
-	}
-done:
+	pcpu_zone_record = uma_zcreate("epoch_record pcpu", sizeof(struct epoch_record),
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU);
 	CPU_FOREACH(cpu) {
 		GROUPTASK_INIT(DPCPU_ID_PTR(cpu, epoch_cb_task), 0, epoch_call_task, NULL);
 		taskqgroup_attach_cpu(qgroup_softirq, DPCPU_ID_PTR(cpu, epoch_cb_task), NULL, cpu, -1, "epoch call task");
@@ -161,39 +140,19 @@ epoch_init_smp(void *dummy __unused)
 SYSINIT(epoch_smp, SI_SUB_SMP + 1, SI_ORDER_FIRST, epoch_init_smp, NULL);
 #endif
 
-
 static void
-epoch_init_numa(epoch_t epoch)
+epoch_ctor(epoch_t epoch)
 {
-	int domain, cpu_offset;
 	epoch_record_t er;
+	int cpu;
 
-	for (domain = 0; domain < vm_ndomains; domain++) {
-		er = malloc_domain(sizeof(*er) * domcount[domain], M_EPOCH,
-		    domain, M_ZERO | M_WAITOK);
-		epoch->e_pcpu_dom[domain] = er;
-		cpu_offset = domoffsets[domain];
-		for (int i = 0; i < domcount[domain]; i++, er++) {
-			epoch->e_pcpu[cpu_offset + i] = er;
-			ck_epoch_register(&epoch->e_epoch, &er->er_record, NULL);
-			TAILQ_INIT((struct threadlist *)(uintptr_t)&er->er_tdlist);
-			er->er_cpuid = cpu_offset + i;
-		}
-	}
-}
-
-static void
-epoch_init_legacy(epoch_t epoch)
-{
-	epoch_record_t er;
-
-	er = malloc(sizeof(*er) * mp_ncpus, M_EPOCH, M_ZERO | M_WAITOK);
-	epoch->e_pcpu_dom[0] = er;
-	for (int i = 0; i < mp_ncpus; i++, er++) {
-		epoch->e_pcpu[i] = er;
+	epoch->e_pcpu_record = uma_zalloc_pcpu(pcpu_zone_record, M_WAITOK);
+	CPU_FOREACH(cpu) {
+		er = zpcpu_get_cpu(epoch->e_pcpu_record, cpu);
+		bzero(er, sizeof(*er));
 		ck_epoch_register(&epoch->e_epoch, &er->er_record, NULL);
 		TAILQ_INIT((struct threadlist *)(uintptr_t)&er->er_tdlist);
-		er->er_cpuid = i;
+		er->er_cpuid = cpu;
 	}
 }
 
@@ -204,13 +163,9 @@ epoch_alloc(int flags)
 
 	if (__predict_false(!inited))
 		panic("%s called too early in boot", __func__);
-	epoch = malloc(sizeof(struct epoch) + mp_ncpus * sizeof(void *),
-	    M_EPOCH, M_ZERO | M_WAITOK);
+	epoch = malloc(sizeof(struct epoch), M_EPOCH, M_ZERO | M_WAITOK);
 	ck_epoch_init(&epoch->e_epoch);
-	if (usedomains)
-		epoch_init_numa(epoch);
-	else
-		epoch_init_legacy(epoch);
+	epoch_ctor(epoch);
 	MPASS(epoch_count < MAX_EPOCHS - 2);
 	epoch->e_flags = flags;
 	epoch->e_idx = epoch_count;
@@ -221,23 +176,18 @@ epoch_alloc(int flags)
 void
 epoch_free(epoch_t epoch)
 {
-	int domain;
 #ifdef INVARIANTS
 	struct epoch_record *er;
 	int cpu;
 
 	CPU_FOREACH(cpu) {
-		er = epoch->e_pcpu[cpu];
+		er = zpcpu_get_cpu(epoch->e_pcpu_record, cpu);
 		MPASS(TAILQ_EMPTY(&er->er_tdlist));
 	}
 #endif
 	allepochs[epoch->e_idx] = NULL;
 	epoch_wait(global_epoch);
-	if (usedomains)
-		for (domain = 0; domain < vm_ndomains; domain++)
-			free_domain(epoch->e_pcpu_dom[domain], M_EPOCH);
-	else
-		free(epoch->e_pcpu_dom[0], M_EPOCH);
+	uma_zfree_pcpu(pcpu_zone_record, epoch->e_pcpu_record);
 	free(epoch, M_EPOCH);
 }
 
@@ -496,7 +446,7 @@ epoch_call(epoch_t epoch, epoch_context_t ctx, void (*
 
 	critical_enter();
 	*DPCPU_PTR(epoch_cb_count) += 1;
-	er = epoch->e_pcpu[curcpu];
+	er = epoch_currecord(epoch);
 	ck_epoch_call(&er->er_record, cb, (ck_epoch_cb_t *)callback);
 	critical_exit();
 	return;
@@ -509,6 +459,7 @@ epoch_call_task(void *arg __unused)
 {
 	ck_stack_entry_t *cursor, *head, *next;
 	ck_epoch_record_t *record;
+	epoch_record_t er;
 	epoch_t epoch;
 	ck_stack_t cb_stack;
 	int i, npending, total;
@@ -519,7 +470,8 @@ epoch_call_task(void *arg __unused)
 	for (total = i = 0; i < epoch_count; i++) {
 		if (__predict_false((epoch = allepochs[i]) == NULL))
 			continue;
-		record = &epoch->e_pcpu[curcpu]->er_record;
+		er = epoch_currecord(epoch);
+		record = &er->er_record;
 		if ((npending = record->n_pending) == 0)
 			continue;
 		ck_epoch_poll_deferred(record, &cb_stack);
@@ -555,7 +507,7 @@ in_epoch_verbose(epoch_t epoch, int dump_onfail)
 	if (__predict_false((epoch) == NULL))
 		return (0);
 	critical_enter();
-	er = epoch->e_pcpu[curcpu];
+	er = epoch_currecord(epoch);
 	TAILQ_FOREACH(tdwait, &er->er_tdlist, et_link)
 		if (tdwait->et_td == td) {
 			critical_exit();

Modified: head/sys/sys/epoch_private.h
==============================================================================
--- head/sys/sys/epoch_private.h	Fri Jul  6 02:06:03 2018	(r336020)
+++ head/sys/sys/epoch_private.h	Fri Jul  6 06:20:03 2018	(r336021)
@@ -97,12 +97,17 @@ typedef struct epoch_record {
 
 struct epoch {
 	struct ck_epoch e_epoch __aligned(EPOCH_ALIGN);
-	struct epoch_record *e_pcpu_dom[MAXMEMDOM] __aligned(EPOCH_ALIGN);
+	epoch_record_t e_pcpu_record;
 	int	e_idx;
 	int	e_flags;
-	struct epoch_record *e_pcpu[0];
 };
 
+static epoch_record_t
+epoch_currecord(epoch_t epoch)
+{
+	return zpcpu_get_cpu(epoch->e_pcpu_record, curcpu);
+}
+
 #define INIT_CHECK(epoch)							\
 	do {											\
 		if (__predict_false((epoch) == NULL))		\
@@ -115,6 +120,7 @@ epoch_enter_preempt(epoch_t epoch, epoch_tracker_t et)
 	struct epoch_record *er;
 	struct epoch_thread *etd;
 	struct thread_lite *td;
+
 	MPASS(cold || epoch != NULL);
 	INIT_CHECK(epoch);
 	etd = (void *)et;
@@ -130,7 +136,7 @@ epoch_enter_preempt(epoch_t epoch, epoch_tracker_t et)
 	sched_pin_lite(td);
 
 	td->td_pre_epoch_prio = td->td_priority;
-	er = epoch->e_pcpu[curcpu];
+	er = epoch_currecord(epoch);
 	TAILQ_INSERT_TAIL(&er->er_tdlist, etd, et_link);
 	ck_epoch_begin(&er->er_record, (ck_epoch_section_t *)&etd->et_section);
 	critical_exit_sa(td);
@@ -139,16 +145,17 @@ epoch_enter_preempt(epoch_t epoch, epoch_tracker_t et)
 static __inline void
 epoch_enter(epoch_t epoch)
 {
-	ck_epoch_record_t *record;
 	struct thread_lite *td;
+	epoch_record_t er;
+
 	MPASS(cold || epoch != NULL);
 	INIT_CHECK(epoch);
 	td = (struct thread_lite *)curthread;
 
 	td->td_epochnest++;
 	critical_enter_sa(td);
-	record = &epoch->e_pcpu[curcpu]->er_record;
-	ck_epoch_begin(record, NULL);
+	er = epoch_currecord(epoch);
+	ck_epoch_begin(&er->er_record, NULL);
 }
 
 static __inline void
@@ -164,7 +171,7 @@ epoch_exit_preempt(epoch_t epoch, epoch_tracker_t et)
 	sched_unpin_lite(td);
 	MPASS(td->td_epochnest);
 	td->td_epochnest--;
-	er = epoch->e_pcpu[curcpu];
+	er = epoch_currecord(epoch);
 	MPASS(epoch->e_flags & EPOCH_PREEMPT);
 	etd = (void *)et;
 #ifdef INVARIANTS
@@ -188,15 +195,15 @@ epoch_exit_preempt(epoch_t epoch, epoch_tracker_t et)
 static __inline void
 epoch_exit(epoch_t epoch)
 {
-	ck_epoch_record_t *record;
 	struct thread_lite *td;
+	epoch_record_t er;
 
 	INIT_CHECK(epoch);
 	td = (struct thread_lite *)curthread;
 	MPASS(td->td_epochnest);
 	td->td_epochnest--;
-	record = &epoch->e_pcpu[curcpu]->er_record;
-	ck_epoch_end(record, NULL);
+	er = epoch_currecord(epoch);
+	ck_epoch_end(&er->er_record, NULL);
 	critical_exit_sa(td);
 }
 #endif /* _KERNEL */


More information about the svn-src-head mailing list