git: 6a8ea6d17498 - main - sched: split sched_ap_entry() out of sched_throw()

From: Kyle Evans <kevans_at_FreeBSD.org>
Date: Fri, 05 Nov 2021 20:46:07 UTC
The branch main has been updated by kevans:

URL: https://cgit.FreeBSD.org/src/commit/?id=6a8ea6d17498f07c702e2acf4072cf4436037a6a

commit 6a8ea6d17498f07c702e2acf4072cf4436037a6a
Author:     Kyle Evans <kevans@FreeBSD.org>
AuthorDate: 2021-11-04 00:09:56 +0000
Commit:     Kyle Evans <kevans@FreeBSD.org>
CommitDate: 2021-11-05 20:45:51 +0000

    sched: split sched_ap_entry() out of sched_throw()
    
    sched_throw() can no longer take a NULL thread, APs enter through
    sched_ap_entry() instead.  This completely removes branching in the
    common case and cleans up both paths.  No functional change intended.
    
    Reviewed by:    kib, markj
    Differential Revision:  https://reviews.freebsd.org/D32829
---
 sys/arm/arm/mp_machdep.c         |  2 +-
 sys/arm64/arm64/mp_machdep.c     |  2 +-
 sys/kern/sched_4bsd.c            | 51 +++++++++++++++++--------
 sys/kern/sched_ule.c             | 81 ++++++++++++++++++++++++++++------------
 sys/mips/mips/mp_machdep.c       |  2 +-
 sys/powerpc/powerpc/mp_machdep.c |  2 +-
 sys/riscv/riscv/mp_machdep.c     |  2 +-
 sys/sys/sched.h                  |  1 +
 sys/x86/x86/mp_x86.c             |  2 +-
 9 files changed, 100 insertions(+), 45 deletions(-)

diff --git a/sys/arm/arm/mp_machdep.c b/sys/arm/arm/mp_machdep.c
index 4089af5929eb..6f772deee2d4 100644
--- a/sys/arm/arm/mp_machdep.c
+++ b/sys/arm/arm/mp_machdep.c
@@ -217,7 +217,7 @@ init_secondary(int cpu)
 	CTR0(KTR_SMP, "go into scheduler");
 
 	/* Enter the scheduler */
-	sched_throw(NULL);
+	sched_ap_entry();
 
 	panic("scheduler returned us to %s", __func__);
 	/* NOTREACHED */
diff --git a/sys/arm64/arm64/mp_machdep.c b/sys/arm64/arm64/mp_machdep.c
index b42f65b9e399..4eebfe219934 100644
--- a/sys/arm64/arm64/mp_machdep.c
+++ b/sys/arm64/arm64/mp_machdep.c
@@ -293,7 +293,7 @@ init_secondary(uint64_t cpu)
 	MPASS(PCPU_GET(curpcb) == NULL);
 
 	/* Enter the scheduler */
-	sched_throw(NULL);
+	sched_ap_entry();
 
 	panic("scheduler returned us to init_secondary");
 	/* NOTREACHED */
diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c
index 6ba41eb80dcc..25f8bfc04d48 100644
--- a/sys/kern/sched_4bsd.c
+++ b/sys/kern/sched_4bsd.c
@@ -1662,12 +1662,22 @@ sched_idletd(void *dummy)
 	}
 }
 
+static void
+sched_throw_tail(struct thread *td)
+{
+
+	mtx_assert(&sched_lock, MA_OWNED);
+	KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count"));
+	cpu_throw(td, choosethread());	/* doesn't return */
+}
+
 /*
- * A CPU is entering for the first time or a thread is exiting.
+ * A CPU is entering for the first time.
  */
 void
-sched_throw(struct thread *td)
+sched_ap_entry(void)
 {
+
 	/*
 	 * Correct spinlock nesting.  The idle thread context that we are
 	 * borrowing was created so that it would start out with a single
@@ -1677,20 +1687,29 @@ sched_throw(struct thread *td)
 	 * spinlock_exit() will simply adjust the counts without allowing
 	 * spin lock using code to interrupt us.
 	 */
-	if (td == NULL) {
-		mtx_lock_spin(&sched_lock);
-		spinlock_exit();
-		PCPU_SET(switchtime, cpu_ticks());
-		PCPU_SET(switchticks, ticks);
-	} else {
-		lock_profile_release_lock(&sched_lock.lock_object, true);
-		MPASS(td->td_lock == &sched_lock);
-		td->td_lastcpu = td->td_oncpu;
-		td->td_oncpu = NOCPU;
-	}
-	mtx_assert(&sched_lock, MA_OWNED);
-	KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count"));
-	cpu_throw(td, choosethread());	/* doesn't return */
+	mtx_lock_spin(&sched_lock);
+	spinlock_exit();
+	PCPU_SET(switchtime, cpu_ticks());
+	PCPU_SET(switchticks, ticks);
+
+	sched_throw_tail(NULL);
+}
+
+/*
+ * A thread is exiting.
+ */
+void
+sched_throw(struct thread *td)
+{
+
+	MPASS(td != NULL);
+	MPASS(td->td_lock == &sched_lock);
+
+	lock_profile_release_lock(&sched_lock.lock_object, true);
+	td->td_lastcpu = td->td_oncpu;
+	td->td_oncpu = NOCPU;
+
+	sched_throw_tail(td);
 }
 
 void
diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
index ce7ce4cd2bd8..e311c8da8bac 100644
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c
@@ -2985,39 +2985,74 @@ sched_idletd(void *dummy)
 }
 
 /*
- * A CPU is entering for the first time or a thread is exiting.
+ * sched_throw_grab() chooses a thread from the queue to switch to
+ * next.  It returns with the tdq lock dropped in a spinlock section to
+ * keep interrupts disabled until the CPU is running in a proper threaded
+ * context.
  */
-void
-sched_throw(struct thread *td)
+static struct thread *
+sched_throw_grab(struct tdq *tdq)
 {
 	struct thread *newtd;
-	struct tdq *tdq;
 
-	tdq = TDQ_SELF();
-	if (__predict_false(td == NULL)) {
-		TDQ_LOCK(tdq);
-		/* Correct spinlock nesting. */
-		spinlock_exit();
-		PCPU_SET(switchtime, cpu_ticks());
-		PCPU_SET(switchticks, ticks);
-	} else {
-		THREAD_LOCK_ASSERT(td, MA_OWNED);
-		THREAD_LOCKPTR_ASSERT(td, TDQ_LOCKPTR(tdq));
-		tdq_load_rem(tdq, td);
-		td->td_lastcpu = td->td_oncpu;
-		td->td_oncpu = NOCPU;
-		thread_lock_block(td);
-	}
 	newtd = choosethread();
 	spinlock_enter();
 	TDQ_UNLOCK(tdq);
 	KASSERT(curthread->td_md.md_spinlock_count == 1,
 	    ("invalid count %d", curthread->td_md.md_spinlock_count));
+	return (newtd);
+}
+
+/*
+ * A CPU is entering for the first time.
+ */
+void
+sched_ap_entry(void)
+{
+	struct thread *newtd;
+	struct tdq *tdq;
+
+	tdq = TDQ_SELF();
+
+	/* This should have been setup in schedinit_ap(). */
+	THREAD_LOCKPTR_ASSERT(curthread, TDQ_LOCKPTR(tdq));
+
+	TDQ_LOCK(tdq);
+	/* Correct spinlock nesting. */
+	spinlock_exit();
+	PCPU_SET(switchtime, cpu_ticks());
+	PCPU_SET(switchticks, ticks);
+
+	newtd = sched_throw_grab(tdq);
+
 	/* doesn't return */
-	if (__predict_false(td == NULL))
-		cpu_throw(td, newtd);		/* doesn't return */
-	else
-		cpu_switch(td, newtd, TDQ_LOCKPTR(tdq));
+	cpu_throw(NULL, newtd);
+}
+
+/*
+ * A thread is exiting.
+ */
+void
+sched_throw(struct thread *td)
+{
+	struct thread *newtd;
+	struct tdq *tdq;
+
+	tdq = TDQ_SELF();
+
+	MPASS(td != NULL);
+	THREAD_LOCK_ASSERT(td, MA_OWNED);
+	THREAD_LOCKPTR_ASSERT(td, TDQ_LOCKPTR(tdq));
+
+	tdq_load_rem(tdq, td);
+	td->td_lastcpu = td->td_oncpu;
+	td->td_oncpu = NOCPU;
+	thread_lock_block(td);
+
+	newtd = sched_throw_grab(tdq);
+
+	/* doesn't return */
+	cpu_switch(td, newtd, TDQ_LOCKPTR(tdq));
 }
 
 /*
diff --git a/sys/mips/mips/mp_machdep.c b/sys/mips/mips/mp_machdep.c
index dc089db1d189..2582c2b65e78 100644
--- a/sys/mips/mips/mp_machdep.c
+++ b/sys/mips/mips/mp_machdep.c
@@ -335,7 +335,7 @@ smp_init_secondary(u_int32_t cpuid)
 	cpu_initclocks_ap();
 
 	/* enter the scheduler */
-	sched_throw(NULL);
+	sched_ap_entry();
 
 	panic("scheduler returned us to %s", __func__);
 	/* NOTREACHED */
diff --git a/sys/powerpc/powerpc/mp_machdep.c b/sys/powerpc/powerpc/mp_machdep.c
index 627cde77adbf..33ef870b8180 100644
--- a/sys/powerpc/powerpc/mp_machdep.c
+++ b/sys/powerpc/powerpc/mp_machdep.c
@@ -112,7 +112,7 @@ machdep_ap_bootstrap(void)
 	cpu_initclocks_ap();
 
 	/* Announce ourselves awake, and enter the scheduler */
-	sched_throw(NULL);
+	sched_ap_entry();
 }
 
 void
diff --git a/sys/riscv/riscv/mp_machdep.c b/sys/riscv/riscv/mp_machdep.c
index 57d5606a3b88..74647b8fcba4 100644
--- a/sys/riscv/riscv/mp_machdep.c
+++ b/sys/riscv/riscv/mp_machdep.c
@@ -291,7 +291,7 @@ init_secondary(uint64_t hart)
 	MPASS(PCPU_GET(curpcb) == NULL);
 
 	/* Enter the scheduler */
-	sched_throw(NULL);
+	sched_ap_entry();
 
 	panic("scheduler returned us to init_secondary");
 	/* NOTREACHED */
diff --git a/sys/sys/sched.h b/sys/sys/sched.h
index 8041a2bc12d4..a9598767e4cb 100644
--- a/sys/sys/sched.h
+++ b/sys/sys/sched.h
@@ -91,6 +91,7 @@ void	sched_nice(struct proc *p, int nice);
  * Threads are switched in and out, block on resources, have temporary
  * priorities inherited from their procs, and use up cpu time.
  */
+void	sched_ap_entry(void);
 void	sched_exit_thread(struct thread *td, struct thread *child);
 u_int	sched_estcpu(struct thread *td);
 void	sched_fork_thread(struct thread *td, struct thread *child);
diff --git a/sys/x86/x86/mp_x86.c b/sys/x86/x86/mp_x86.c
index 1fac244cbed7..7a72c501ff25 100644
--- a/sys/x86/x86/mp_x86.c
+++ b/sys/x86/x86/mp_x86.c
@@ -1099,7 +1099,7 @@ init_secondary_tail(void)
 	 */
 	MPASS(PCPU_GET(curpcb) == NULL);
 
-	sched_throw(NULL);
+	sched_ap_entry();
 
 	panic("scheduler returned us to %s", __func__);
 	/* NOTREACHED */