svn commit: r299746 - in head/sys: cddl/dev/dtrace cddl/dev/dtrace/amd64 cddl/dev/dtrace/i386 cddl/dev/dtrace/powerpc conf dev/acpica dev/hwpmc dev/hyperv/vmbus dev/xen/control geom/eli kern net sy...

Julian Elischer julian at freebsd.org
Mon May 16 05:26:15 UTC 2016


On 15/05/2016 2:22 AM, John Baldwin wrote:
> Author: jhb
> Date: Sat May 14 18:22:52 2016
> New Revision: 299746
> URL: https://svnweb.freebsd.org/changeset/base/299746
>
> Log:
>    Add an EARLY_AP_STARTUP option to start APs earlier during boot.
>    
>    Currently, Application Processors (non-boot CPUs) are started by
>    MD code at SI_SUB_CPU, but they are kept waiting in a "pen" until
>    SI_SUB_SMP at which point they are released to run kernel threads.
>    SI_SUB_SMP is one of the last SYSINIT levels, so APs don't enter
>    the scheduler and start running threads until fairly late in the
>    boot.
>    
>    This change moves SI_SUB_SMP up to just before software interrupt
>    threads are created allowing the APs to start executing kernel
>    threads much sooner (before any devices are probed).  This allows
>    several initialization routines that need to perform initialization
>    on all CPUs to now perform that initialization in one step rather
>    than having to defer the AP initialization to a second SYSINIT run
>    at SI_SUB_SMP.  It also permits all CPUs to be available for
>    handling interrupts before any devices are probed.
>    
>    This last feature fixes a problem on with interrupt vector exhaustion.
>    Specifically, in the old model all device interrupts were routed
>    onto the boot CPU during boot.  Later after the APs were released at
>    SI_SUB_SMP, interrupts were redistributed across all CPUs.
>    
>    However, several drivers for multiqueue hardware allocate N interrupts
>    per CPU in the system.  In a system with many CPUs, just a few drivers
>    doing this could exhaust the available pool of interrupt vectors on
>    the boot CPU as each driver was allocating N * mp_ncpu vectors on the
>    boot CPU.  Now, drivers will allocate interrupts on their desired CPUs
>    during boot meaning that only N interrupts are allocated from the boot
>    CPU instead of N * mp_ncpu.
>    
>    Some other bits of code can also be simplified as smp_started is
>    now true much earlier and will now always be true for these bits of
>    code.  This removes the need to treat the single-CPU boot environment
>    as a special case.
>    
>    As a transition aid, the new behavior is available under a new kernel
>    option (EARLY_AP_STARTUP).  This will allow the option to be turned off
>    if need be during initial testing.  I plan to enable this on x86 by
>    default in a followup commit in the next few days and to have all
>    platforms moved over before 11.0.  Once the transition is complete,
>    the option will be removed along with the !EARLY_AP_STARTUP code.
>    
>    These changes have only been tested on x86.  Other platform maintainers
>    are encouraged to port their architectures over as well.  The main
>    things to check for are any uses of smp_started in MD code that can be
>    simplified and SI_SUB_SMP SYSINITs in MD code that can be removed in
>    the EARLY_AP_STARTUP case (e.g. the interrupt shuffling).
>    
>    PR:		kern/199321
>    Reviewed by:	markj, gnn, kib
>    Sponsored by:	Netflix
>
> Modified:
>    head/sys/cddl/dev/dtrace/amd64/dtrace_subr.c
>    head/sys/cddl/dev/dtrace/dtrace_load.c
>    head/sys/cddl/dev/dtrace/i386/dtrace_subr.c
>    head/sys/cddl/dev/dtrace/powerpc/dtrace_subr.c
>    head/sys/conf/NOTES
>    head/sys/conf/options
>    head/sys/dev/acpica/acpi.c
>    head/sys/dev/acpica/acpi_cpu.c
>    head/sys/dev/hwpmc/hwpmc_mod.c
>    head/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c
>    head/sys/dev/xen/control/control.c
>    head/sys/geom/eli/g_eli.c
>    head/sys/kern/kern_clock.c
>    head/sys/kern/kern_clocksource.c
>    head/sys/kern/kern_cpu.c
>    head/sys/net/netisr.c
>    head/sys/sys/kernel.h
>    head/sys/x86/isa/clock.c
>    head/sys/x86/x86/intr_machdep.c
>    head/sys/x86/x86/local_apic.c
>    head/sys/x86/x86/mca.c
>    head/sys/x86/x86/mp_x86.c
>
> Modified: head/sys/cddl/dev/dtrace/amd64/dtrace_subr.c
> ==============================================================================
> --- head/sys/cddl/dev/dtrace/amd64/dtrace_subr.c	Sat May 14 18:02:47 2016	(r299745)
> +++ head/sys/cddl/dev/dtrace/amd64/dtrace_subr.c	Sat May 14 18:22:52 2016	(r299746)
> @@ -246,6 +246,26 @@ static uint64_t	nsec_scale;
>   /* See below for the explanation of this macro. */
>   #define SCALE_SHIFT	28
>   
> +static void
> +dtrace_gethrtime_init_cpu(void *arg)
> +{
> +	uintptr_t cpu = (uintptr_t) arg;
> +
> +	if (cpu == curcpu)
> +		tgt_cpu_tsc = rdtsc();
> +	else
> +		hst_cpu_tsc = rdtsc();
> +}
> +
> +#ifdef EARLY_AP_STARTUP
> +static void
> +dtrace_gethrtime_init(void *arg)
> +{
> +	struct pcpu *pc;
> +	uint64_t tsc_f;
> +	cpuset_t map;
> +	int i;
> +#else
>   /*
>    * Get the frequency and scale factor as early as possible so that they can be
>    * used for boot-time tracing.
> @@ -254,6 +274,7 @@ static void
>   dtrace_gethrtime_init_early(void *arg)
>   {
>   	uint64_t tsc_f;
> +#endif
>   
>   	/*
>   	 * Get TSC frequency known at this moment.
> @@ -282,27 +303,18 @@ dtrace_gethrtime_init_early(void *arg)
>   	 *   (terahertz) values;
>   	 */
>   	nsec_scale = ((uint64_t)NANOSEC << SCALE_SHIFT) / tsc_f;
> +#ifndef EARLY_AP_STARTUP
>   }
>   SYSINIT(dtrace_gethrtime_init_early, SI_SUB_CPU, SI_ORDER_ANY,
>       dtrace_gethrtime_init_early, NULL);
>   
>   static void
> -dtrace_gethrtime_init_cpu(void *arg)
> -{
> -	uintptr_t cpu = (uintptr_t) arg;
> -
> -	if (cpu == curcpu)
> -		tgt_cpu_tsc = rdtsc();
> -	else
> -		hst_cpu_tsc = rdtsc();
> -}
> -
> -static void
>   dtrace_gethrtime_init(void *arg)
>   {
>   	struct pcpu *pc;
>   	cpuset_t map;
>   	int i;
> +#endif
>   
>   	/* The current CPU is the reference one. */
>   	sched_pin();
> @@ -323,8 +335,13 @@ dtrace_gethrtime_init(void *arg)
>   	}
>   	sched_unpin();
>   }
> +#ifdef EARLY_AP_STARTUP
> +SYSINIT(dtrace_gethrtime_init, SI_SUB_DTRACE, SI_ORDER_ANY,
> +    dtrace_gethrtime_init, NULL);
> +#else
>   SYSINIT(dtrace_gethrtime_init, SI_SUB_SMP, SI_ORDER_ANY, dtrace_gethrtime_init,
>       NULL);
> +#endif
>   
>   /*
>    * DTrace needs a high resolution time function which can
>
> Modified: head/sys/cddl/dev/dtrace/dtrace_load.c
> ==============================================================================
> --- head/sys/cddl/dev/dtrace/dtrace_load.c	Sat May 14 18:02:47 2016	(r299745)
> +++ head/sys/cddl/dev/dtrace/dtrace_load.c	Sat May 14 18:22:52 2016	(r299746)
> @@ -22,6 +22,7 @@
>    *
>    */
>   
> +#ifndef EARLY_AP_STARTUP
>   static void
>   dtrace_ap_start(void *dummy)
>   {
> @@ -41,11 +42,15 @@ dtrace_ap_start(void *dummy)
>   }
>   
>   SYSINIT(dtrace_ap_start, SI_SUB_SMP, SI_ORDER_ANY, dtrace_ap_start, NULL);
> +#endif
>   
>   static void
>   dtrace_load(void *dummy)
>   {
>   	dtrace_provider_id_t id;
> +#ifdef EARLY_AP_STARTUP
> +	int i;
> +#endif
>   
>   	/* Hook into the trap handler. */
>   	dtrace_trap_func = dtrace_trap;
> @@ -142,8 +147,14 @@ dtrace_load(void *dummy)
>   
>   	mutex_enter(&cpu_lock);
>   
> +#ifdef EARLY_AP_STARTUP
> +	CPU_FOREACH(i) {
> +		(void) dtrace_cpu_setup(CPU_CONFIG, i);
> +	}
> +#else
>   	/* Setup the boot CPU */
>   	(void) dtrace_cpu_setup(CPU_CONFIG, 0);
> +#endif
>   
>   	mutex_exit(&cpu_lock);
>   
>
> Modified: head/sys/cddl/dev/dtrace/i386/dtrace_subr.c
> ==============================================================================
> --- head/sys/cddl/dev/dtrace/i386/dtrace_subr.c	Sat May 14 18:02:47 2016	(r299745)
> +++ head/sys/cddl/dev/dtrace/i386/dtrace_subr.c	Sat May 14 18:22:52 2016	(r299746)
> @@ -248,6 +248,26 @@ static uint64_t	nsec_scale;
>   /* See below for the explanation of this macro. */
>   #define SCALE_SHIFT	28
>   
> +static void
> +dtrace_gethrtime_init_cpu(void *arg)
> +{
> +	uintptr_t cpu = (uintptr_t) arg;
> +
> +	if (cpu == curcpu)
> +		tgt_cpu_tsc = rdtsc();
> +	else
> +		hst_cpu_tsc = rdtsc();
> +}
> +
> +#ifdef EARLY_AP_STARTUP
> +static void
> +dtrace_gethrtime_init(void *arg)
> +{
> +	struct pcpu *pc;
> +	uint64_t tsc_f;
> +	cpuset_t map;
> +	int i;
> +#else
>   /*
>    * Get the frequency and scale factor as early as possible so that they can be
>    * used for boot-time tracing.
> @@ -256,6 +276,7 @@ static void
>   dtrace_gethrtime_init_early(void *arg)
>   {
>   	uint64_t tsc_f;
> +#endif
>   
>   	/*
>   	 * Get TSC frequency known at this moment.
> @@ -284,27 +305,18 @@ dtrace_gethrtime_init_early(void *arg)
>   	 *   (terahertz) values;
>   	 */
>   	nsec_scale = ((uint64_t)NANOSEC << SCALE_SHIFT) / tsc_f;
> +#ifndef EARLY_AP_STARTUP
>   }
>   SYSINIT(dtrace_gethrtime_init_early, SI_SUB_CPU, SI_ORDER_ANY,
>       dtrace_gethrtime_init_early, NULL);
>   
>   static void
> -dtrace_gethrtime_init_cpu(void *arg)
> -{
> -	uintptr_t cpu = (uintptr_t) arg;
> -
> -	if (cpu == curcpu)
> -		tgt_cpu_tsc = rdtsc();
> -	else
> -		hst_cpu_tsc = rdtsc();
> -}
> -
> -static void
>   dtrace_gethrtime_init(void *arg)
>   {
>   	cpuset_t map;
>   	struct pcpu *pc;
>   	int i;
> +#endif
>   
>   	/* The current CPU is the reference one. */
>   	sched_pin();
> @@ -325,8 +337,13 @@ dtrace_gethrtime_init(void *arg)
>   	}
>   	sched_unpin();
>   }
> +#ifdef EARLY_AP_STARTUP
> +SYSINIT(dtrace_gethrtime_init, SI_SUB_DTRACE, SI_ORDER_ANY,
> +    dtrace_gethrtime_init, NULL);
> +#else
>   SYSINIT(dtrace_gethrtime_init, SI_SUB_SMP, SI_ORDER_ANY, dtrace_gethrtime_init,
>       NULL);
> +#endif
>   
>   /*
>    * DTrace needs a high resolution time function which can
>
> Modified: head/sys/cddl/dev/dtrace/powerpc/dtrace_subr.c
> ==============================================================================
> --- head/sys/cddl/dev/dtrace/powerpc/dtrace_subr.c	Sat May 14 18:02:47 2016	(r299745)
> +++ head/sys/cddl/dev/dtrace/powerpc/dtrace_subr.c	Sat May 14 18:22:52 2016	(r299746)
> @@ -218,8 +218,13 @@ dtrace_gethrtime_init(void *arg)
>   	}
>   	sched_unpin();
>   }
> -
> -SYSINIT(dtrace_gethrtime_init, SI_SUB_SMP, SI_ORDER_ANY, dtrace_gethrtime_init, NULL);
> +#ifdef EARLY_AP_STARTUP
> +SYSINIT(dtrace_gethrtime_init, SI_SUB_DTRACE, SI_ORDER_ANY,
> +    dtrace_gethrtime_init, NULL);
> +#else
> +SYSINIT(dtrace_gethrtime_init, SI_SUB_SMP, SI_ORDER_ANY, dtrace_gethrtime_init,
> +    NULL);
> +#endif
>   
>   /*
>    * DTrace needs a high resolution time function which can
>
> Modified: head/sys/conf/NOTES
> ==============================================================================
> --- head/sys/conf/NOTES	Sat May 14 18:02:47 2016	(r299745)
> +++ head/sys/conf/NOTES	Sat May 14 18:22:52 2016	(r299746)
> @@ -223,6 +223,12 @@ options 	SCHED_STATS
>   # Mandatory:
>   options 	SMP			# Symmetric MultiProcessor Kernel
>   
> +# EARLY_AP_STARTUP releases the Application Processors earlier in the
> +# kernel startup process (before devices are probed) rather than at the
> +# end.  This is a temporary option for use during the transition from
> +# late to early AP startup.
> +options		EARLY_AP_STARTUP
> +
>   # MAXCPU defines the maximum number of CPUs that can boot in the system.
>   # A default value should be already present, for every architecture.
>   options 	MAXCPU=32
>
> Modified: head/sys/conf/options
> ==============================================================================
> --- head/sys/conf/options	Sat May 14 18:02:47 2016	(r299745)
> +++ head/sys/conf/options	Sat May 14 18:22:52 2016	(r299746)
> @@ -620,6 +620,7 @@ DEBUG_MEMGUARD		opt_vm.h
>   DEBUG_REDZONE		opt_vm.h
>   
>   # Standard SMP options
> +EARLY_AP_STARTUP	opt_global.h
>   SMP			opt_global.h
>   
>   # Size of the kernel message buffer
>
> Modified: head/sys/dev/acpica/acpi.c
> ==============================================================================
> --- head/sys/dev/acpica/acpi.c	Sat May 14 18:02:47 2016	(r299745)
> +++ head/sys/dev/acpica/acpi.c	Sat May 14 18:22:52 2016	(r299746)
> @@ -2856,11 +2856,18 @@ acpi_EnterSleepState(struct acpi_softc *
>       stop_all_proc();
>       EVENTHANDLER_INVOKE(power_suspend);
>   
> +#ifdef EARLY_AP_STARTUP
> +    MPASS(mp_ncpus == 1 || smp_started);
> +    thread_lock(curthread);
> +    sched_bind(curthread, 0);
> +    thread_unlock(curthread);
> +#else
>       if (smp_started) {
>   	thread_lock(curthread);
>   	sched_bind(curthread, 0);
>   	thread_unlock(curthread);
>       }
> +#endif
>   
>       /*
>        * Be sure to hold Giant across DEVICE_SUSPEND/RESUME since non-MPSAFE
> @@ -2991,11 +2998,17 @@ backout:
>   
>       mtx_unlock(&Giant);
>   
> +#ifdef EARLY_AP_STARTUP
> +    thread_lock(curthread);
> +    sched_unbind(curthread);
> +    thread_unlock(curthread);
> +#else
>       if (smp_started) {
>   	thread_lock(curthread);
>   	sched_unbind(curthread);
>   	thread_unlock(curthread);
>       }
> +#endif
>   
>       resume_all_proc();
>   
>
> Modified: head/sys/dev/acpica/acpi_cpu.c
> ==============================================================================
> --- head/sys/dev/acpica/acpi_cpu.c	Sat May 14 18:02:47 2016	(r299745)
> +++ head/sys/dev/acpica/acpi_cpu.c	Sat May 14 18:22:52 2016	(r299746)
> @@ -439,8 +439,12 @@ acpi_cpu_postattach(void *unused __unuse
>       free(devices, M_TEMP);
>   
>       if (attached) {
> +#ifdef EARLY_AP_STARTUP
> +	acpi_cpu_startup(NULL);
> +#else
>   	/* Queue post cpu-probing task handler */
>   	AcpiOsExecute(OSL_NOTIFY_HANDLER, acpi_cpu_startup, NULL);
> +#endif
>       }
>   }
>   
>
> Modified: head/sys/dev/hwpmc/hwpmc_mod.c
> ==============================================================================
> --- head/sys/dev/hwpmc/hwpmc_mod.c	Sat May 14 18:02:47 2016	(r299745)
> +++ head/sys/dev/hwpmc/hwpmc_mod.c	Sat May 14 18:22:52 2016	(r299746)
> @@ -334,7 +334,11 @@ static moduledata_t pmc_mod = {
>   	&pmc_syscall_mod
>   };
>   
> +#ifdef EARLY_AP_STARTUP
> +DECLARE_MODULE(pmc, pmc_mod, SI_SUB_SYSCALLS, SI_ORDER_ANY);
> +#else
>   DECLARE_MODULE(pmc, pmc_mod, SI_SUB_SMP, SI_ORDER_ANY);
> +#endif
>   MODULE_VERSION(pmc, PMC_VERSION);
>   
>   #ifdef	HWPMC_DEBUG
>
> Modified: head/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c
> ==============================================================================
> --- head/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c	Sat May 14 18:02:47 2016	(r299745)
> +++ head/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c	Sat May 14 18:22:52 2016	(r299746)
> @@ -519,6 +519,7 @@ vmbus_attach(device_t dev)
>   		device_printf(dev, "VMBUS: attach dev: %p\n", dev);
>   	vmbus_devp = dev;
>   
> +#ifndef EARLY_AP_STARTUP
>   	/*
>   	 * If the system has already booted and thread
>   	 * scheduling is possible indicated by the global
> @@ -526,6 +527,7 @@ vmbus_attach(device_t dev)
>   	 * initialization directly.
>   	 */
>   	if (!cold)
> +#endif
>   		vmbus_bus_init();
>   
>   	bus_generic_probe(dev);
> @@ -538,6 +540,7 @@ vmbus_init(void)
>   	if (vm_guest != VM_GUEST_HV)
>   		return;
>   
> +#ifndef EARLY_AP_STARTUP
>   	/*
>   	 * If the system has already booted and thread
>   	 * scheduling is possible, as indicated by the
> @@ -545,6 +548,7 @@ vmbus_init(void)
>   	 * initialization directly.
>   	 */
>   	if (!cold)
> +#endif
>   		vmbus_bus_init();
>   }
>   
> @@ -611,6 +615,9 @@ vmbus_modevent(module_t mod, int what, v
>   	switch (what) {
>   
>   	case MOD_LOAD:
> +#ifdef EARLY_AP_STARTUP
> +		vmbus_init();
> +#endif
>   		vmbus_mod_load();
>   		break;
>   	case MOD_UNLOAD:
> @@ -649,6 +656,7 @@ DRIVER_MODULE(vmbus, acpi, vmbus_driver,
>   MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
>   MODULE_VERSION(vmbus, 1);
>   
> +#ifndef EARLY_AP_STARTUP
>   /* We want to be started after SMP is initialized */
>   SYSINIT(vmb_init, SI_SUB_SMP + 1, SI_ORDER_FIRST, vmbus_init, NULL);
> -
> +#endif
>
> Modified: head/sys/dev/xen/control/control.c
> ==============================================================================
> --- head/sys/dev/xen/control/control.c	Sat May 14 18:02:47 2016	(r299745)
> +++ head/sys/dev/xen/control/control.c	Sat May 14 18:22:52 2016	(r299746)
> @@ -202,11 +202,18 @@ xctrl_suspend()
>   	stop_all_proc();
>   	EVENTHANDLER_INVOKE(power_suspend);
>   
> +#ifdef EARLY_AP_STARTUP
> +	MPASS(mp_ncpus == 1 || smp_started);
> +	thread_lock(curthread);
> +	sched_bind(curthread, 0);
> +	thread_unlock(curthread);
> +#else
>   	if (smp_started) {
>   		thread_lock(curthread);
>   		sched_bind(curthread, 0);
>   		thread_unlock(curthread);
>   	}
> +#endif
>   	KASSERT((PCPU_GET(cpuid) == 0), ("Not running on CPU#0"));
>   
>   	/*
> @@ -227,6 +234,17 @@ xctrl_suspend()
>   	}
>   
>   #ifdef SMP
> +#ifdef EARLY_AP_STARTUP
> +	/*
> +	 * Suspend other CPUs. This prevents IPIs while we
> +	 * are resuming, and will allow us to reset per-cpu
> +	 * vcpu_info on resume.
> +	 */
> +	cpu_suspend_map = all_cpus;
> +	CPU_CLR(PCPU_GET(cpuid), &cpu_suspend_map);
> +	if (!CPU_EMPTY(&cpu_suspend_map))
> +		suspend_cpus(cpu_suspend_map);
> +#else
>   	CPU_ZERO(&cpu_suspend_map);	/* silence gcc */
>   	if (smp_started) {
>   		/*
> @@ -240,6 +258,7 @@ xctrl_suspend()
>   			suspend_cpus(cpu_suspend_map);
>   	}
>   #endif
> +#endif
>   
>   	/*
>   	 * Prevent any races with evtchn_interrupt() handler.
> @@ -285,11 +304,17 @@ xctrl_suspend()
>   	timecounter->tc_get_timecount(timecounter);
>   	inittodr(time_second);
>   
> +#ifdef EARLY_AP_STARTUP
> +	thread_lock(curthread);
> +	sched_unbind(curthread);
> +	thread_unlock(curthread);
> +#else
>   	if (smp_started) {
>   		thread_lock(curthread);
>   		sched_unbind(curthread);
>   		thread_unlock(curthread);
>   	}
> +#endif
>   
>   	resume_all_proc();
>   
>
> Modified: head/sys/geom/eli/g_eli.c
> ==============================================================================
> --- head/sys/geom/eli/g_eli.c	Sat May 14 18:02:47 2016	(r299745)
> +++ head/sys/geom/eli/g_eli.c	Sat May 14 18:22:52 2016	(r299746)
> @@ -479,7 +479,9 @@ g_eli_worker(void *arg)
>   
>   	wr = arg;
>   	sc = wr->w_softc;
> -#ifdef SMP
> +#ifdef EARLY_AP_STARTUP
> +	MPASS(!sc->sc_cpubind || smp_started);
> +#elif defined(SMP)
>   	/* Before sched_bind() to a CPU, wait for all CPUs to go on-line. */
>   	if (sc->sc_cpubind) {
>   		while (!smp_started)
>
> Modified: head/sys/kern/kern_clock.c
> ==============================================================================
> --- head/sys/kern/kern_clock.c	Sat May 14 18:02:47 2016	(r299745)
> +++ head/sys/kern/kern_clock.c	Sat May 14 18:22:52 2016	(r299746)
> @@ -391,6 +391,10 @@ static void
>   initclocks(dummy)
>   	void *dummy;
>   {
> +#ifdef EARLY_AP_STARTUP
> +	struct proc *p;
> +	struct thread *td;
> +#endif
>   	register int i;
>   
>   	/*
> @@ -415,6 +419,35 @@ initclocks(dummy)
>   	 * sign problems sooner.
>   	 */
>   	ticks = INT_MAX - (hz * 10 * 60);
> +
> +#ifdef EARLY_AP_STARTUP
> +	/*
> +	 * Fixup the tick counts in any blocked or sleeping threads to
> +	 * account for the jump above.
> +	 */
> +	sx_slock(&allproc_lock);
> +	FOREACH_PROC_IN_SYSTEM(p) {
> +		PROC_LOCK(p);
> +		if (p->p_state == PRS_NEW) {
> +			PROC_UNLOCK(p);
> +			continue;
> +		}
> +		FOREACH_THREAD_IN_PROC(p, td) {
> +			thread_lock(td);
> +			if (TD_ON_LOCK(td)) {
> +				MPASS(td->td_blktick == 0);
> +				td->td_blktick = ticks;
> +			}
> +			if (TD_ON_SLEEPQ(td)) {
> +				MPASS(td->td_slptick == 0);
> +				td->td_slptick = ticks;
> +			}
> +			thread_unlock(td);
> +		}
> +		PROC_UNLOCK(p);
> +	}
> +	sx_sunlock(&allproc_lock);
> +#endif
>   }
>   
>   /*
>
> Modified: head/sys/kern/kern_clocksource.c
> ==============================================================================
> --- head/sys/kern/kern_clocksource.c	Sat May 14 18:02:47 2016	(r299745)
> +++ head/sys/kern/kern_clocksource.c	Sat May 14 18:22:52 2016	(r299746)
> @@ -322,9 +322,16 @@ timercb(struct eventtimer *et, void *arg
>   	    curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
>   
>   #ifdef SMP
> +#ifdef EARLY_AP_STARTUP
> +	MPASS(mp_ncpus == 1 || smp_started);
> +#endif
>   	/* Prepare broadcasting to other CPUs for non-per-CPU timers. */
>   	bcast = 0;
> +#ifdef EARLY_AP_STARTUP
> +	if ((et->et_flags & ET_FLAGS_PERCPU) == 0) {
> +#else
>   	if ((et->et_flags & ET_FLAGS_PERCPU) == 0 && smp_started) {
> +#endif
>   		CPU_FOREACH(cpu) {
>   			state = DPCPU_ID_PTR(cpu, timerstate);
>   			ET_HW_LOCK(state);
> @@ -485,12 +492,17 @@ configtimer(int start)
>   			nexttick = next;
>   		else
>   			nexttick = -1;
> +#ifdef EARLY_AP_STARTUP
> +		MPASS(mp_ncpus == 1 || smp_started);
> +#endif
>   		CPU_FOREACH(cpu) {
>   			state = DPCPU_ID_PTR(cpu, timerstate);
>   			state->now = now;
> +#ifndef EARLY_AP_STARTUP
>   			if (!smp_started && cpu != CPU_FIRST())
>   				state->nextevent = SBT_MAX;
>   			else
> +#endif
>   				state->nextevent = next;
>   			if (periodic)
>   				state->nexttick = next;
> @@ -513,8 +525,13 @@ configtimer(int start)
>   	}
>   	ET_HW_UNLOCK(DPCPU_PTR(timerstate));
>   #ifdef SMP
> +#ifdef EARLY_AP_STARTUP
> +	/* If timer is global we are done. */
> +	if ((timer->et_flags & ET_FLAGS_PERCPU) == 0) {
> +#else
>   	/* If timer is global or there is no other CPUs yet - we are done. */
>   	if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 || !smp_started) {
> +#endif
>   		critical_exit();
>   		return;
>   	}
>
> Modified: head/sys/kern/kern_cpu.c
> ==============================================================================
> --- head/sys/kern/kern_cpu.c	Sat May 14 18:02:47 2016	(r299745)
> +++ head/sys/kern/kern_cpu.c	Sat May 14 18:22:52 2016	(r299746)
> @@ -259,6 +259,9 @@ cf_set_method(device_t dev, const struct
>   	CF_MTX_LOCK(&sc->lock);
>   
>   #ifdef SMP
> +#ifdef EARLY_AP_STARTUP
> +	MPASS(mp_ncpus == 1 || smp_started);
> +#else
>   	/*
>   	 * If still booting and secondary CPUs not started yet, don't allow
>   	 * changing the frequency until they're online.  This is because we
> @@ -271,6 +274,7 @@ cf_set_method(device_t dev, const struct
>   		error = ENXIO;
>   		goto out;
>   	}
> +#endif
>   #endif /* SMP */
>   
>   	/*
>
> Modified: head/sys/net/netisr.c
> ==============================================================================
> --- head/sys/net/netisr.c	Sat May 14 18:02:47 2016	(r299745)
> +++ head/sys/net/netisr.c	Sat May 14 18:22:52 2016	(r299746)
> @@ -1119,6 +1119,10 @@ netisr_start_swi(u_int cpuid, struct pcp
>   static void
>   netisr_init(void *arg)
>   {
> +#ifdef EARLY_AP_STARTUP
> +	struct pcpu *pc;
> +#endif
> +
>   	KASSERT(curcpu == 0, ("%s: not on CPU 0", __func__));
>   
>   	NETISR_LOCK_INIT();
> @@ -1149,10 +1153,20 @@ netisr_init(void *arg)
>   		netisr_bindthreads = 0;
>   	}
>   #endif
> +
> +#ifdef EARLY_AP_STARTUP
> +	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
> +		if (nws_count >= netisr_maxthreads)
> +			break;
> +		netisr_start_swi(pc->pc_cpuid, pc);
> +	}
> +#else
>   	netisr_start_swi(curcpu, pcpu_find(curcpu));
> +#endif
>   }
>   SYSINIT(netisr_init, SI_SUB_SOFTINTR, SI_ORDER_FIRST, netisr_init, NULL);
>   
> +#ifndef EARLY_AP_STARTUP
>   /*
>    * Start worker threads for additional CPUs.  No attempt to gracefully handle
>    * work reassignment, we don't yet support dynamic reconfiguration.
> @@ -1172,6 +1186,7 @@ netisr_start(void *arg)
>   	}
>   }
>   SYSINIT(netisr_start, SI_SUB_SMP, SI_ORDER_MIDDLE, netisr_start, NULL);
> +#endif
>   
>   /*
>    * Sysctl monitoring for netisr: query a list of registered protocols.
>
> Modified: head/sys/sys/kernel.h
> ==============================================================================
> --- head/sys/sys/kernel.h	Sat May 14 18:02:47 2016	(r299745)
> +++ head/sys/sys/kernel.h	Sat May 14 18:22:52 2016	(r299746)
> @@ -118,7 +118,10 @@ enum sysinit_sub_id {
>   	SI_SUB_SCHED_IDLE	= 0x2600000,	/* required idle procs */
>   	SI_SUB_MBUF		= 0x2700000,	/* mbuf subsystem */
>   	SI_SUB_INTR		= 0x2800000,	/* interrupt threads */
> -	SI_SUB_SOFTINTR		= 0x2800001,	/* start soft interrupt thread */
> +#ifdef EARLY_AP_STARTUP
> +	SI_SUB_SMP		= 0x2900000,	/* start the APs*/
> +#endif
> +	SI_SUB_SOFTINTR		= 0x2A00000,	/* start soft interrupt thread */
>   	SI_SUB_DEVFS		= 0x2F00000,	/* devfs ready for devices */
>   	SI_SUB_INIT_IF		= 0x3000000,	/* prep for net interfaces */
>   	SI_SUB_NETGRAPH		= 0x3010000,	/* Let Netgraph initialize */
> @@ -154,7 +157,9 @@ enum sysinit_sub_id {
>   	SI_SUB_KTHREAD_BUF	= 0xea00000,	/* buffer daemon*/
>   	SI_SUB_KTHREAD_UPDATE	= 0xec00000,	/* update daemon*/
>   	SI_SUB_KTHREAD_IDLE	= 0xee00000,	/* idle procs*/
> +#ifndef EARLY_AP_STARTUP
>   	SI_SUB_SMP		= 0xf000000,	/* start the APs*/
> +#endif	
>   	SI_SUB_RACCTD		= 0xf100000,	/* start racctd*/
>   	SI_SUB_LAST		= 0xfffffff	/* final initialization */
>   };
>
> Modified: head/sys/x86/isa/clock.c
> ==============================================================================
> --- head/sys/x86/isa/clock.c	Sat May 14 18:02:47 2016	(r299745)
> +++ head/sys/x86/isa/clock.c	Sat May 14 18:22:52 2016	(r299746)
> @@ -475,8 +475,27 @@ startrtclock()
>   void
>   cpu_initclocks(void)
>   {
> +#ifdef EARLY_AP_STARTUP
> +	struct thread *td;
> +	int i;
>   
> +	td = curthread;
>   	cpu_initclocks_bsp();
> +	CPU_FOREACH(i) {
> +		if (i == 0)
> +			continue;
> +		thread_lock(td);
> +		sched_bind(td, i);
> +		thread_unlock(td);
> +		cpu_initclocks_ap();
> +	}
> +	thread_lock(td);
> +	if (sched_is_bound(td))
> +		sched_unbind(td);
> +	thread_unlock(td);
> +#else
> +	cpu_initclocks_bsp();
> +#endif
>   }
>   
>   static int
>
> Modified: head/sys/x86/x86/intr_machdep.c
> ==============================================================================
> --- head/sys/x86/x86/intr_machdep.c	Sat May 14 18:02:47 2016	(r299745)
> +++ head/sys/x86/x86/intr_machdep.c	Sat May 14 18:22:52 2016	(r299746)
> @@ -77,7 +77,7 @@ static struct mtx intr_table_lock;
>   static struct mtx intrcnt_lock;
>   static TAILQ_HEAD(pics_head, pic) pics;
>   
> -#ifdef SMP
> +#if defined(SMP) && !defined(EARLY_AP_STARTUP)
>   static int assign_cpu;
>   #endif
>   
> @@ -320,11 +320,16 @@ intr_assign_cpu(void *arg, int cpu)
>   	struct intsrc *isrc;
>   	int error;
>   
> +#ifdef EARLY_AP_STARTUP
> +	MPASS(mp_ncpus == 1 || smp_started);
> +	if (cpu != NOCPU) {
> +#else
>   	/*
>   	 * Don't do anything during early boot.  We will pick up the
>   	 * assignment once the APs are started.
>   	 */
>   	if (assign_cpu && cpu != NOCPU) {
> +#endif
>   		isrc = arg;
>   		mtx_lock(&intr_table_lock);
>   		error = isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]);
> @@ -502,9 +507,13 @@ intr_next_cpu(void)
>   {
>   	u_int apic_id;
>   
> +#ifdef EARLY_AP_STARTUP
> +	MPASS(mp_ncpus == 1 || smp_started);
> +#else
>   	/* Leave all interrupts on the BSP during boot. */
>   	if (!assign_cpu)
>   		return (PCPU_GET(apic_id));
> +#endif
>   
>   	mtx_lock_spin(&icu_lock);
>   	apic_id = cpu_apic_ids[current_cpu];
> @@ -546,6 +555,7 @@ intr_add_cpu(u_int cpu)
>   	CPU_SET(cpu, &intr_cpus);
>   }
>   
> +#ifndef EARLY_AP_STARTUP
>   /*
>    * Distribute all the interrupt sources among the available CPUs once the
>    * AP's have been launched.
> @@ -586,6 +596,7 @@ intr_shuffle_irqs(void *arg __unused)
>   }
>   SYSINIT(intr_shuffle_irqs, SI_SUB_SMP, SI_ORDER_SECOND, intr_shuffle_irqs,
>       NULL);
> +#endif
>   #else
>   /*
>    * Always route interrupts to the current processor in the UP case.
>
> Modified: head/sys/x86/x86/local_apic.c
> ==============================================================================
> --- head/sys/x86/x86/local_apic.c	Sat May 14 18:02:47 2016	(r299745)
> +++ head/sys/x86/x86/local_apic.c	Sat May 14 18:22:52 2016	(r299746)
> @@ -749,6 +749,10 @@ native_lapic_enable_pmc(void)
>   
>   	lvts[APIC_LVT_PMC].lvt_masked = 0;
>   
> +#ifdef EARLY_AP_STARTUP
> +	MPASS(mp_ncpus == 1 || smp_started);
> +	smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL);
> +#else
>   #ifdef SMP
>   	/*
>   	 * If hwpmc was loaded at boot time then the APs may not be
> @@ -760,6 +764,7 @@ native_lapic_enable_pmc(void)
>   	else
>   #endif
>   		lapic_update_pmc(NULL);
> +#endif
>   	return (1);
>   #else
>   	return (0);
>
> Modified: head/sys/x86/x86/mca.c
> ==============================================================================
> --- head/sys/x86/x86/mca.c	Sat May 14 18:02:47 2016	(r299745)
> +++ head/sys/x86/x86/mca.c	Sat May 14 18:22:52 2016	(r299746)
> @@ -726,7 +726,11 @@ mca_startup(void *dummy)
>   
>   	callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan, NULL);
>   }
> +#ifdef EARLY_AP_STARTUP
> +SYSINIT(mca_startup, SI_SUB_KICK_SCHEDULER, SI_ORDER_ANY, mca_startup, NULL);
> +#else
>   SYSINIT(mca_startup, SI_SUB_SMP, SI_ORDER_ANY, mca_startup, NULL);
> +#endif
>   
>   #ifdef DEV_APIC
>   static void
>
> Modified: head/sys/x86/x86/mp_x86.c
> ==============================================================================
> --- head/sys/x86/x86/mp_x86.c	Sat May 14 18:02:47 2016	(r299745)
> +++ head/sys/x86/x86/mp_x86.c	Sat May 14 18:22:52 2016	(r299746)
> @@ -933,8 +933,10 @@ init_secondary_tail(void)
>   	while (atomic_load_acq_int(&smp_started) == 0)
>   		ia32_pause();
>   
> +#ifndef EARLY_AP_STARTUP
>   	/* Start per-CPU event timers. */
>   	cpu_initclocks_ap();
> +#endif
>   
>   	sched_throw(NULL);
>   
>
John, This feels as though it should be settable with a tuneable 
variable. Can you think
of a good way to do this other than having two sysinit entries and making
the tuneable "enable" the right one? There is no tuneable/sysinit 
interaction otherwise.






More information about the svn-src-all mailing list