svn commit: r302372 - in head/sys: amd64/include cddl/compat/opensolaris/sys dev/cpuctl i386/include kern net netinet powerpc/include powerpc/powerpc vm

Nathan Whitehorn nwhitehorn at FreeBSD.org
Wed Jul 6 14:09:52 UTC 2016


Author: nwhitehorn
Date: Wed Jul  6 14:09:49 2016
New Revision: 302372
URL: https://svnweb.freebsd.org/changeset/base/302372

Log:
  Replace a number of conflations of mp_ncpus and mp_maxid with either
  mp_maxid or CPU_FOREACH() as appropriate. This fixes a number of places in
  the kernel that assumed CPU IDs are dense in [0, mp_ncpus) and would try,
  for example, to run tasks on CPUs that did not exist or to allocate too
  few buffers on systems with sparse CPU IDs in which there are holes in the
  range and mp_maxid > mp_ncpus. Such circumstances generally occur on
  systems with SMT, but on which SMT is disabled. This patch restores system
  operation at least on POWER8 systems configured in this way.
  
  There are a number of other places in the kernel with potential problems
  in these situations, but where sparse CPU IDs are not currently known
  to occur, mostly in the ARM machine-dependent code. These will be fixed
  in a follow-up commit after the stable/11 branch.
  
  PR:		kern/210106
  Reviewed by:	jhb
  Approved by:	re (glebius)

Modified:
  head/sys/amd64/include/counter.h
  head/sys/cddl/compat/opensolaris/sys/proc.h
  head/sys/dev/cpuctl/cpuctl.c
  head/sys/i386/include/counter.h
  head/sys/kern/subr_pcpu.c
  head/sys/kern/subr_taskqueue.c
  head/sys/net/flowtable.c
  head/sys/net/iflib.c
  head/sys/netinet/ip_id.c
  head/sys/powerpc/include/counter.h
  head/sys/powerpc/powerpc/mp_machdep.c
  head/sys/vm/uma.h
  head/sys/vm/uma_core.c

Modified: head/sys/amd64/include/counter.h
==============================================================================
--- head/sys/amd64/include/counter.h	Wed Jul  6 10:57:04 2016	(r302371)
+++ head/sys/amd64/include/counter.h	Wed Jul  6 14:09:49 2016	(r302372)
@@ -51,7 +51,7 @@ counter_u64_fetch_inline(uint64_t *p)
 	int i;
 
 	r = 0;
-	for (i = 0; i < mp_ncpus; i++)
+	CPU_FOREACH(i)
 		r += counter_u64_read_one((uint64_t *)p, i);
 
 	return (r);

Modified: head/sys/cddl/compat/opensolaris/sys/proc.h
==============================================================================
--- head/sys/cddl/compat/opensolaris/sys/proc.h	Wed Jul  6 10:57:04 2016	(r302371)
+++ head/sys/cddl/compat/opensolaris/sys/proc.h	Wed Jul  6 14:09:49 2016	(r302372)
@@ -45,8 +45,8 @@
 #define	CPU		curcpu
 #define	minclsyspri	PRIBIO
 #define	maxclsyspri	PVM
-#define	max_ncpus	mp_ncpus
-#define	boot_max_ncpus	mp_ncpus
+#define	max_ncpus	(mp_maxid + 1)
+#define	boot_max_ncpus	(mp_maxid + 1)
 
 #define	TS_RUN	0
 

Modified: head/sys/dev/cpuctl/cpuctl.c
==============================================================================
--- head/sys/dev/cpuctl/cpuctl.c	Wed Jul  6 10:57:04 2016	(r302371)
+++ head/sys/dev/cpuctl/cpuctl.c	Wed Jul  6 14:09:49 2016	(r302372)
@@ -120,7 +120,7 @@ static void
 set_cpu(int cpu, struct thread *td)
 {
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus && cpu_enabled(cpu),
+	KASSERT(cpu >= 0 && cpu <= mp_maxid && cpu_enabled(cpu),
 	    ("[cpuctl,%d]: bad cpu number %d", __LINE__, cpu));
 	thread_lock(td);
 	sched_bind(td, cpu);
@@ -133,7 +133,7 @@ static void
 restore_cpu(int oldcpu, int is_bound, struct thread *td)
 {
 
-	KASSERT(oldcpu >= 0 && oldcpu < mp_ncpus && cpu_enabled(oldcpu),
+	KASSERT(oldcpu >= 0 && oldcpu <= mp_maxid && cpu_enabled(oldcpu),
 	    ("[cpuctl,%d]: bad cpu number %d", __LINE__, oldcpu));
 	thread_lock(td);
 	if (is_bound == 0)
@@ -150,7 +150,7 @@ cpuctl_ioctl(struct cdev *dev, u_long cm
 	int ret;
 	int cpu = dev2unit(dev);
 
-	if (cpu >= mp_ncpus || !cpu_enabled(cpu)) {
+	if (cpu > mp_maxid || !cpu_enabled(cpu)) {
 		DPRINTF("[cpuctl,%d]: bad cpu number %d\n", __LINE__, cpu);
 		return (ENXIO);
 	}
@@ -201,7 +201,7 @@ cpuctl_do_cpuid_count(int cpu, cpuctl_cp
 	int is_bound = 0;
 	int oldcpu;
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu <= mp_maxid,
 	    ("[cpuctl,%d]: bad cpu number %d", __LINE__, cpu));
 
 	/* Explicitly clear cpuid data to avoid returning stale info. */
@@ -245,7 +245,7 @@ cpuctl_do_msr(int cpu, cpuctl_msr_args_t
 	int oldcpu;
 	int ret;
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu <= mp_maxid,
 	    ("[cpuctl,%d]: bad cpu number %d", __LINE__, cpu));
 
 	/*
@@ -296,7 +296,7 @@ cpuctl_do_update(int cpu, cpuctl_update_
 	char vendor[13];
 	int ret;
 
-	KASSERT(cpu >= 0 && cpu < mp_ncpus,
+	KASSERT(cpu >= 0 && cpu <= mp_maxid,
 	    ("[cpuctl,%d]: bad cpu number %d", __LINE__, cpu));
 	DPRINTF("[cpuctl,%d]: XXX %d", __LINE__, cpu);
 
@@ -512,7 +512,7 @@ cpuctl_open(struct cdev *dev, int flags,
 	int cpu;
 
 	cpu = dev2unit(dev);
-	if (cpu >= mp_ncpus || !cpu_enabled(cpu)) {
+	if (cpu > mp_maxid || !cpu_enabled(cpu)) {
 		DPRINTF("[cpuctl,%d]: incorrect cpu number %d\n", __LINE__,
 		    cpu);
 		return (ENXIO);
@@ -531,15 +531,15 @@ cpuctl_modevent(module_t mod __unused, i
 	case MOD_LOAD:
 		if (bootverbose)
 			printf("cpuctl: access to MSR registers/cpuid info.\n");
-		cpuctl_devs = malloc(sizeof(*cpuctl_devs) * mp_ncpus, M_CPUCTL,
+		cpuctl_devs = malloc(sizeof(*cpuctl_devs) * (mp_maxid + 1), M_CPUCTL,
 		    M_WAITOK | M_ZERO);
-		for (cpu = 0; cpu < mp_ncpus; cpu++)
+		CPU_FOREACH(cpu)
 			if (cpu_enabled(cpu))
 				cpuctl_devs[cpu] = make_dev(&cpuctl_cdevsw, cpu,
 				    UID_ROOT, GID_KMEM, 0640, "cpuctl%d", cpu);
 		break;
 	case MOD_UNLOAD:
-		for (cpu = 0; cpu < mp_ncpus; cpu++) {
+		CPU_FOREACH(cpu) {
 			if (cpuctl_devs[cpu] != NULL)
 				destroy_dev(cpuctl_devs[cpu]);
 		}

Modified: head/sys/i386/include/counter.h
==============================================================================
--- head/sys/i386/include/counter.h	Wed Jul  6 10:57:04 2016	(r302371)
+++ head/sys/i386/include/counter.h	Wed Jul  6 14:09:49 2016	(r302372)
@@ -98,13 +98,13 @@ counter_u64_fetch_inline(uint64_t *p)
 		 * critical section as well.
 		 */
 		critical_enter();
-		for (i = 0; i < mp_ncpus; i++) {
+		CPU_FOREACH(i) {
 			res += *(uint64_t *)((char *)p +
 			    sizeof(struct pcpu) * i);
 		}
 		critical_exit();
 	} else {
-		for (i = 0; i < mp_ncpus; i++)
+		CPU_FOREACH(i)
 			res += counter_u64_read_one_8b((uint64_t *)((char *)p +
 			    sizeof(struct pcpu) * i));
 	}
@@ -144,7 +144,7 @@ counter_u64_zero_inline(counter_u64_t c)
 
 	if ((cpu_feature & CPUID_CX8) == 0) {
 		critical_enter();
-		for (i = 0; i < mp_ncpus; i++)
+		CPU_FOREACH(i)
 			*(uint64_t *)((char *)c + sizeof(struct pcpu) * i) = 0;
 		critical_exit();
 	} else {

Modified: head/sys/kern/subr_pcpu.c
==============================================================================
--- head/sys/kern/subr_pcpu.c	Wed Jul  6 10:57:04 2016	(r302371)
+++ head/sys/kern/subr_pcpu.c	Wed Jul  6 14:09:49 2016	(r302372)
@@ -248,7 +248,7 @@ dpcpu_copy(void *s, int size)
 	uintptr_t dpcpu;
 	int i;
 
-	for (i = 0; i < mp_ncpus; ++i) {
+	CPU_FOREACH(i) {
 		dpcpu = dpcpu_off[i];
 		if (dpcpu == 0)
 			continue;
@@ -289,7 +289,7 @@ sysctl_dpcpu_quad(SYSCTL_HANDLER_ARGS)
 	int i;
 
 	count = 0;
-	for (i = 0; i < mp_ncpus; ++i) {
+	CPU_FOREACH(i) {
 		dpcpu = dpcpu_off[i];
 		if (dpcpu == 0)
 			continue;
@@ -306,7 +306,7 @@ sysctl_dpcpu_long(SYSCTL_HANDLER_ARGS)
 	int i;
 
 	count = 0;
-	for (i = 0; i < mp_ncpus; ++i) {
+	CPU_FOREACH(i) {
 		dpcpu = dpcpu_off[i];
 		if (dpcpu == 0)
 			continue;
@@ -323,7 +323,7 @@ sysctl_dpcpu_int(SYSCTL_HANDLER_ARGS)
 	int i;
 
 	count = 0;
-	for (i = 0; i < mp_ncpus; ++i) {
+	CPU_FOREACH(i) {
 		dpcpu = dpcpu_off[i];
 		if (dpcpu == 0)
 			continue;

Modified: head/sys/kern/subr_taskqueue.c
==============================================================================
--- head/sys/kern/subr_taskqueue.c	Wed Jul  6 10:57:04 2016	(r302371)
+++ head/sys/kern/subr_taskqueue.c	Wed Jul  6 14:09:49 2016	(r302372)
@@ -832,6 +832,7 @@ static void
 taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx)
 {
 	struct taskqgroup_cpu *qcpu;
+	int i, j;
 
 	qcpu = &qgroup->tqg_queue[idx];
 	LIST_INIT(&qcpu->tgc_tasks);
@@ -839,7 +840,15 @@ taskqgroup_cpu_create(struct taskqgroup 
 	    taskqueue_thread_enqueue, &qcpu->tgc_taskq);
 	taskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT,
 	    "%s_%d", qgroup->tqg_name, idx);
-	qcpu->tgc_cpu = idx * qgroup->tqg_stride;
+
+	for (i = CPU_FIRST(), j = 0; j < idx * qgroup->tqg_stride;
+	    j++, i = CPU_NEXT(i)) {
+		/*
+		 * Wait: evaluate the idx * qgroup->tqg_stride'th CPU,
+		 * potentially wrapping the actual count
+		 */
+	}
+	qcpu->tgc_cpu = i;
 }
 
 static void
@@ -1017,13 +1026,14 @@ _taskqgroup_adjust(struct taskqgroup *qg
 	LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
 	cpuset_t mask;
 	struct grouptask *gtask;
-	int i, old_cnt, qid;
+	int i, k, old_cnt, qid, cpu;
 
 	mtx_assert(&qgroup->tqg_lock, MA_OWNED);
 
 	if (cnt < 1 || cnt * stride > mp_ncpus || !smp_started) {
-		printf("taskqgroup_adjust failed cnt: %d stride: %d mp_ncpus: %d smp_started: %d\n",
-			   cnt, stride, mp_ncpus, smp_started);
+		printf("taskqgroup_adjust failed cnt: %d stride: %d "
+		    "mp_ncpus: %d smp_started: %d\n", cnt, stride, mp_ncpus,
+		    smp_started);
 		return (EINVAL);
 	}
 	if (qgroup->tqg_adjusting) {
@@ -1081,8 +1091,11 @@ _taskqgroup_adjust(struct taskqgroup *qg
 	/*
 	 * Set new CPU and IRQ affinity
 	 */
+	cpu = CPU_FIRST();
 	for (i = 0; i < cnt; i++) {
-		qgroup->tqg_queue[i].tgc_cpu = i * qgroup->tqg_stride;
+		qgroup->tqg_queue[i].tgc_cpu = cpu;
+		for (k = 0; k < qgroup->tqg_stride; k++)
+			cpu = CPU_NEXT(cpu);
 		CPU_ZERO(&mask);
 		CPU_SET(qgroup->tqg_queue[i].tgc_cpu, &mask);
 		LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list) {

Modified: head/sys/net/flowtable.c
==============================================================================
--- head/sys/net/flowtable.c	Wed Jul  6 10:57:04 2016	(r302371)
+++ head/sys/net/flowtable.c	Wed Jul  6 14:09:49 2016	(r302372)
@@ -746,7 +746,7 @@ flowtable_alloc(struct flowtable *ft)
 		ft->ft_table[i] = uma_zalloc(pcpu_zone_ptr, M_WAITOK | M_ZERO);
 
 	ft->ft_masks = uma_zalloc(pcpu_zone_ptr, M_WAITOK);
-	for (int i = 0; i < mp_ncpus; i++) {
+	CPU_FOREACH(i) {
 		bitstr_t **b;
 
 		b = zpcpu_get_cpu(ft->ft_masks, i);

Modified: head/sys/net/iflib.c
==============================================================================
--- head/sys/net/iflib.c	Wed Jul  6 10:57:04 2016	(r302371)
+++ head/sys/net/iflib.c	Wed Jul  6 14:09:49 2016	(r302372)
@@ -3848,7 +3848,7 @@ iflib_queues_alloc(if_ctx_t ctx)
 	iflib_txq_t txq;
 	iflib_rxq_t rxq;
 	iflib_fl_t fl = NULL;
-	int i, j, err, txconf, rxconf, fl_ifdi_offset;
+	int i, j, cpu, err, txconf, rxconf, fl_ifdi_offset;
 	iflib_dma_info_t ifdip;
 	uint32_t *rxqsizes = sctx->isc_rxqsizes;
 	uint32_t *txqsizes = sctx->isc_txqsizes;
@@ -3897,7 +3897,7 @@ iflib_queues_alloc(if_ctx_t ctx)
 	/*
 	 * XXX handle allocation failure
 	 */
-	for (txconf = i = 0; i < ntxqsets; i++, txconf++, txq++) {
+	for (txconf = i = 0, cpu = CPU_FIRST(); i < ntxqsets; i++, txconf++, txq++, cpu = CPU_NEXT(cpu)) {
 		/* Set up some basics */
 
 		if ((ifdip = malloc(sizeof(struct iflib_dma_info) * ntxqs, M_IFLIB, M_WAITOK|M_ZERO)) == NULL) {
@@ -3917,8 +3917,8 @@ iflib_queues_alloc(if_ctx_t ctx)
 		txq->ift_ctx = ctx;
 		txq->ift_id = i;
 		/* XXX fix this */
-		txq->ift_timer.c_cpu = i % mp_ncpus;
-		txq->ift_db_check.c_cpu = i % mp_ncpus;
+		txq->ift_timer.c_cpu = cpu;
+		txq->ift_db_check.c_cpu = cpu;
 		txq->ift_nbr = nbuf_rings;
 
 		if (iflib_txsd_alloc(txq)) {

Modified: head/sys/netinet/ip_id.c
==============================================================================
--- head/sys/netinet/ip_id.c	Wed Jul  6 10:57:04 2016	(r302371)
+++ head/sys/netinet/ip_id.c	Wed Jul  6 14:09:49 2016	(r302372)
@@ -275,10 +275,12 @@ ip_fillid(struct ip *ip)
 static void
 ipid_sysinit(void)
 {
+	int i;
 
 	mtx_init(&V_ip_id_mtx, "ip_id_mtx", NULL, MTX_DEF);
 	V_ip_id = counter_u64_alloc(M_WAITOK);
-	for (int i = 0; i < mp_ncpus; i++)
+	
+	CPU_FOREACH(i)
 		arc4rand(zpcpu_get_cpu(V_ip_id, i), sizeof(uint64_t), 0);
 }
 VNET_SYSINIT(ip_id, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipid_sysinit, NULL);

Modified: head/sys/powerpc/include/counter.h
==============================================================================
--- head/sys/powerpc/include/counter.h	Wed Jul  6 10:57:04 2016	(r302371)
+++ head/sys/powerpc/include/counter.h	Wed Jul  6 14:09:49 2016	(r302372)
@@ -54,7 +54,7 @@ counter_u64_fetch_inline(uint64_t *p)
 	int i;
 
 	r = 0;
-	for (i = 0; i < mp_ncpus; i++)
+	CPU_FOREACH(i)
 		r += counter_u64_read_one((uint64_t *)p, i);
 
 	return (r);

Modified: head/sys/powerpc/powerpc/mp_machdep.c
==============================================================================
--- head/sys/powerpc/powerpc/mp_machdep.c	Wed Jul  6 10:57:04 2016	(r302371)
+++ head/sys/powerpc/powerpc/mp_machdep.c	Wed Jul  6 14:09:49 2016	(r302372)
@@ -113,20 +113,16 @@ cpu_mp_setmaxid(void)
 	int error;
 
 	mp_ncpus = 0;
+	mp_maxid = 0;
 	error = platform_smp_first_cpu(&cpuref);
 	while (!error) {
 		mp_ncpus++;
+		mp_maxid = max(cpuref.cr_cpuid, mp_maxid);
 		error = platform_smp_next_cpu(&cpuref);
 	}
 	/* Sanity. */
 	if (mp_ncpus == 0)
 		mp_ncpus = 1;
-
-	/*
-	 * Set the largest cpuid we're going to use. This is necessary
-	 * for VM initialization.
-	 */
-	mp_maxid = min(mp_ncpus, MAXCPU) - 1;
 }
 
 int

Modified: head/sys/vm/uma.h
==============================================================================
--- head/sys/vm/uma.h	Wed Jul  6 10:57:04 2016	(r302371)
+++ head/sys/vm/uma.h	Wed Jul  6 14:09:49 2016	(r302372)
@@ -276,7 +276,7 @@ uma_zone_t uma_zcache_create(char *name,
 					 * mini-dumps.
 					 */
 #define	UMA_ZONE_PCPU		0x8000	/*
-					 * Allocates mp_ncpus slabs sized to
+					 * Allocates mp_maxid + 1 slabs sized to
 					 * sizeof(struct pcpu).
 					 */
 

Modified: head/sys/vm/uma_core.c
==============================================================================
--- head/sys/vm/uma_core.c	Wed Jul  6 10:57:04 2016	(r302371)
+++ head/sys/vm/uma_core.c	Wed Jul  6 14:09:49 2016	(r302372)
@@ -1227,7 +1227,7 @@ keg_small_init(uma_keg_t keg)
 	u_int shsize;
 
 	if (keg->uk_flags & UMA_ZONE_PCPU) {
-		u_int ncpus = mp_ncpus ? mp_ncpus : MAXCPU;
+		u_int ncpus = (mp_maxid + 1) ? (mp_maxid + 1) : MAXCPU;
 
 		keg->uk_slabsize = sizeof(struct pcpu);
 		keg->uk_ppera = howmany(ncpus * sizeof(struct pcpu),
@@ -3265,9 +3265,10 @@ uma_large_free(uma_slab_t slab)
 static void
 uma_zero_item(void *item, uma_zone_t zone)
 {
+	int i;
 
 	if (zone->uz_flags & UMA_ZONE_PCPU) {
-		for (int i = 0; i < mp_ncpus; i++)
+		CPU_FOREACH(i)
 			bzero(zpcpu_get_cpu(item, i), zone->uz_size);
 	} else
 		bzero(item, zone->uz_size);


More information about the svn-src-all mailing list