svn commit: r358168 - in head/sys: kern sys

Konstantin Belousov kib at FreeBSD.org
Thu Feb 20 15:34:05 UTC 2020


Author: kib
Date: Thu Feb 20 15:34:02 2020
New Revision: 358168
URL: https://svnweb.freebsd.org/changeset/base/358168

Log:
  Do not read sigfastblock word on syscall entry.
  
  On machines with SMAP, fueword executes two serializing instructions
  which can be seen in microbenchmarks.
  
  As a measure to restore microbenchmark numbers, only read the word on
  the attempt to deliver signal in ast().  If the word is set, signal is
  not delivered and word is kept, preventing interruption of
  interruptible sleeps by signals until userspace calls
  sigfastblock(UNBLOCK) which clears the word.
  
  This way, the spurious EINTR that userspace can see while in critical
  section is on first interruptible sleep, if a signal is pending, and
  on signal posting.  It is believed that it is not important for rtld
  and lbithr critical sections.  It might be visible for the application
  code e.g. for the callback of dl_iterate_phdr(3), but again the belief
  is that the non-compliance is acceptable.  Most important is that the
  retry of the sleeping syscall does not interrupt unless additional
  signal is posted.
  
  For now I added the knob kern.sigfastblock_fetch_always to enable the
  word read on syscall entry to be able to diagnose possible issues due
  to spurious EINTR.
  
  While there, do some code restructuting to have all sigfastblock()
  handling located in kern_sig.c.
  
  Reviewed by:	jeff
  Discussed with:	mjg
  Tested by:	pho
  Sponsored by:	The FreeBSD Foundation
  Differential revision:	https://reviews.freebsd.org/D23622

Modified:
  head/sys/kern/kern_exec.c
  head/sys/kern/kern_sig.c
  head/sys/kern/subr_syscall.c
  head/sys/kern/subr_trap.c
  head/sys/sys/signalvar.h

Modified: head/sys/kern/kern_exec.c
==============================================================================
--- head/sys/kern/kern_exec.c	Thu Feb 20 10:56:12 2020	(r358167)
+++ head/sys/kern/kern_exec.c	Thu Feb 20 15:34:02 2020	(r358168)
@@ -1035,9 +1035,7 @@ exec_new_vmspace(struct image_params *imgp, struct sys
 	imgp->vmspace_destroyed = 1;
 	imgp->sysent = sv;
 
-	td->td_pflags &= ~TDP_SIGFASTBLOCK;
-	td->td_sigblock_ptr = NULL;
-	td->td_sigblock_val = 0;
+	sigfastblock_clear(td);
 
 	/* May be called with Giant held */
 	EVENTHANDLER_DIRECT_INVOKE(process_exec, p, imgp);

Modified: head/sys/kern/kern_sig.c
==============================================================================
--- head/sys/kern/kern_sig.c	Thu Feb 20 10:56:12 2020	(r358167)
+++ head/sys/kern/kern_sig.c	Thu Feb 20 15:34:02 2020	(r358168)
@@ -157,6 +157,12 @@ static int	kern_lognosys = 0;
 SYSCTL_INT(_kern, OID_AUTO, lognosys, CTLFLAG_RWTUN, &kern_lognosys, 0,
     "Log invalid syscalls");
 
+__read_frequently bool sigfastblock_fetch_always = false;
+SYSCTL_BOOL(_kern, OID_AUTO, sigfastblock_fetch_always, CTLFLAG_RWTUN,
+    &sigfastblock_fetch_always, 0,
+    "Fetch sigfastblock word on each syscall entry for proper "
+    "blocking semantic");
+
 SYSINIT(signal, SI_SUB_P1003_1B, SI_ORDER_FIRST+3, sigqueue_start, NULL);
 
 /*
@@ -2005,6 +2011,7 @@ trapsignal(struct thread *td, ksiginfo_t *ksi)
 	code = ksi->ksi_code;
 	KASSERT(_SIG_VALID(sig), ("invalid signal"));
 
+	sigfastblock_fetch(td);
 	PROC_LOCK(p);
 	ps = p->p_sigacts;
 	mtx_lock(&ps->ps_mtx);
@@ -3952,6 +3959,42 @@ sig_drop_caught(struct proc *p)
 	}
 }
 
+static void
+sigfastblock_failed(struct thread *td, bool sendsig, bool write)
+{
+	ksiginfo_t ksi;
+
+	/*
+	 * Prevent further fetches and SIGSEGVs, allowing thread to
+	 * issue syscalls despite corruption.
+	 */
+	sigfastblock_clear(td);
+
+	if (!sendsig)
+		return;
+	ksiginfo_init_trap(&ksi);
+	ksi.ksi_signo = SIGSEGV;
+	ksi.ksi_code = write ? SEGV_ACCERR : SEGV_MAPERR;
+	ksi.ksi_addr = td->td_sigblock_ptr;
+	trapsignal(td, &ksi);
+}
+
+static bool
+sigfastblock_fetch_sig(struct thread *td, bool sendsig, uint32_t *valp)
+{
+	uint32_t res;
+
+	if ((td->td_pflags & TDP_SIGFASTBLOCK) == 0)
+		return (true);
+	if (fueword32((void *)td->td_sigblock_ptr, &res) == -1) {
+		sigfastblock_failed(td, sendsig, false);
+		return (false);
+	}
+	*valp = res;
+	td->td_sigblock_val = res & ~SIGFASTBLOCK_FLAGS;
+	return (true);
+}
+
 int
 sys_sigfastblock(struct thread *td, struct sigfastblock_args *uap)
 {
@@ -3960,6 +4003,7 @@ sys_sigfastblock(struct thread *td, struct sigfastbloc
 	uint32_t oldval;
 
 	error = 0;
+	p = td->td_proc;
 	switch (uap->cmd) {
 	case SIGFASTBLOCK_SETPTR:
 		if ((td->td_pflags & TDP_SIGFASTBLOCK) != 0) {
@@ -3975,18 +4019,22 @@ sys_sigfastblock(struct thread *td, struct sigfastbloc
 		break;
 
 	case SIGFASTBLOCK_UNBLOCK:
-		if ((td->td_pflags & TDP_SIGFASTBLOCK) != 0) {
+		if ((td->td_pflags & TDP_SIGFASTBLOCK) == 0) {
 			error = EINVAL;
 			break;
 		}
-again:
-		res = casueword32(td->td_sigblock_ptr, SIGFASTBLOCK_PEND,
-		    &oldval, 0);
-		if (res == -1) {
-			error = EFAULT;
-			break;
-		}
-		if (res == 1) {
+
+		for (;;) {
+			res = casueword32(td->td_sigblock_ptr,
+			    SIGFASTBLOCK_PEND, &oldval, 0);
+			if (res == -1) {
+				error = EFAULT;
+				sigfastblock_failed(td, false, true);
+				break;
+			}
+			if (res == 0)
+				break;
+			MPASS(res == 1);
 			if (oldval != SIGFASTBLOCK_PEND) {
 				error = EBUSY;
 				break;
@@ -3994,8 +4042,22 @@ again:
 			error = thread_check_susp(td, false);
 			if (error != 0)
 				break;
-			goto again;
 		}
+		if (error != 0)
+			break;
+
+		/*
+		 * td_sigblock_val is cleared there, but not on a
+		 * syscall exit.  The end effect is that a single
+		 * interruptible sleep, while user sigblock word is
+		 * set, might return EINTR or ERESTART to usermode
+		 * without delivering signal.  All further sleeps,
+		 * until userspace clears the word and does
+		 * sigfastblock(UNBLOCK), observe current word and no
+		 * longer get interrupted.  It is slight
+		 * non-conformance, with alternative to have read the
+		 * sigblock word on each syscall entry.
+		 */
 		td->td_sigblock_val = 0;
 
 		/*
@@ -4003,7 +4065,6 @@ again:
 		 * signals to current thread.  But notify others about
 		 * fake unblock.
 		 */
-		p = td->td_proc;
 		if (error == 0 && p->p_numthreads != 1) {
 			PROC_LOCK(p);
 			reschedule_signals(p, td->td_sigmask, 0);
@@ -4016,8 +4077,7 @@ again:
 			error = EINVAL;
 			break;
 		}
-		res = fueword32(td->td_sigblock_ptr, &oldval);
-		if (res == -1) {
+		if (!sigfastblock_fetch_sig(td, false, &oldval)) {
 			error = EFAULT;
 			break;
 		}
@@ -4025,8 +4085,7 @@ again:
 			error = EBUSY;
 			break;
 		}
-		td->td_pflags &= ~TDP_SIGFASTBLOCK;
-		td->td_sigblock_val = 0;
+		sigfastblock_clear(td);
 		break;
 
 	default:
@@ -4037,32 +4096,59 @@ again:
 }
 
 void
-fetch_sigfastblock(struct thread *td)
+sigfastblock_clear(struct thread *td)
 {
+	struct proc *p;
+	bool resched;
 
 	if ((td->td_pflags & TDP_SIGFASTBLOCK) == 0)
 		return;
-	if (fueword32(td->td_sigblock_ptr, &td->td_sigblock_val) == -1) {
-		fetch_sigfastblock_failed(td, false);
-		return;
+	td->td_sigblock_val = 0;
+	resched = (td->td_pflags & TDP_SIGFASTPENDING) != 0;
+	td->td_pflags &= ~(TDP_SIGFASTBLOCK | TDP_SIGFASTPENDING);
+	if (resched) {
+		p = td->td_proc;
+		PROC_LOCK(p);
+		reschedule_signals(p, td->td_sigmask, 0);
+		PROC_UNLOCK(p);
 	}
-	td->td_sigblock_val &= ~SIGFASTBLOCK_FLAGS;
 }
 
 void
-fetch_sigfastblock_failed(struct thread *td, bool write)
+sigfastblock_fetch(struct thread *td)
 {
-	ksiginfo_t ksi;
+	uint32_t val;
 
-	/*
-	 * Prevent further fetches and SIGSEGVs, allowing thread to
-	 * issue syscalls despite corruption.
-	 */
-	td->td_pflags &= ~TDP_SIGFASTBLOCK;
+	(void)sigfastblock_fetch_sig(td, true, &val);
+}
 
-	ksiginfo_init_trap(&ksi);
-	ksi.ksi_signo = SIGSEGV;
-	ksi.ksi_code = write ? SEGV_ACCERR : SEGV_MAPERR;
-	ksi.ksi_addr = td->td_sigblock_ptr;
-	trapsignal(td, &ksi);
+void
+sigfastblock_setpend(struct thread *td)
+{
+	int res;
+	uint32_t oldval;
+
+	if ((td->td_pflags & TDP_SIGFASTBLOCK) == 0)
+		return;
+	res = fueword32((void *)td->td_sigblock_ptr, &oldval);
+	if (res == -1) {
+		sigfastblock_failed(td, true, false);
+		return;
+	}
+	for (;;) {
+		res = casueword32(td->td_sigblock_ptr, oldval, &oldval,
+		    oldval | SIGFASTBLOCK_PEND);
+		if (res == -1) {
+			sigfastblock_failed(td, true, true);
+			return;
+		}
+		if (res == 0) {
+			td->td_sigblock_val = oldval & ~SIGFASTBLOCK_FLAGS;
+			td->td_pflags &= ~TDP_SIGFASTPENDING;
+			break;
+		}
+		MPASS(res == 1);
+		if (thread_check_susp(td, false) != 0)
+			break;
+	}
 }

Modified: head/sys/kern/subr_syscall.c
==============================================================================
--- head/sys/kern/subr_syscall.c	Thu Feb 20 10:56:12 2020	(r358167)
+++ head/sys/kern/subr_syscall.c	Thu Feb 20 15:34:02 2020	(r358168)
@@ -132,11 +132,11 @@ syscallenter(struct thread *td)
 	}
 
 	/*
-	 * Fetch fast sigblock value at the time of syscall
-	 * entry because sleepqueue primitives might call
-	 * cursig().
+	 * Fetch fast sigblock value at the time of syscall entry to
+	 * handle sleepqueue primitives which might call cursig().
 	 */
-	fetch_sigfastblock(td);
+	if (__predict_false(sigfastblock_fetch_always))
+		(void)sigfastblock_fetch(td);
 
 	/* Let system calls set td_errno directly. */
 	td->td_pflags &= ~TDP_NERRNO;

Modified: head/sys/kern/subr_trap.c
==============================================================================
--- head/sys/kern/subr_trap.c	Thu Feb 20 10:56:12 2020	(r358167)
+++ head/sys/kern/subr_trap.c	Thu Feb 20 15:34:02 2020	(r358168)
@@ -134,6 +134,7 @@ userret(struct thread *td, struct trapframe *frame)
 #ifdef KTRACE
 	KTRUSERRET(td);
 #endif
+
 	td_softdep_cleanup(td);
 	MPASS(td->td_su == NULL);
 
@@ -222,8 +223,7 @@ ast(struct trapframe *framep)
 {
 	struct thread *td;
 	struct proc *p;
-	uint32_t oldval;
-	int flags, sig, res;
+	int flags, sig;
 
 	td = curthread;
 	p = td->td_proc;
@@ -325,11 +325,12 @@ ast(struct trapframe *framep)
 	 */
 	if (flags & TDF_NEEDSIGCHK || p->p_pendingcnt > 0 ||
 	    !SIGISEMPTY(p->p_siglist)) {
-		fetch_sigfastblock(td);
+		sigfastblock_fetch(td);
 		PROC_LOCK(p);
 		mtx_lock(&p->p_sigacts->ps_mtx);
 		if ((td->td_pflags & TDP_SIGFASTBLOCK) != 0 &&
 		    td->td_sigblock_val != 0) {
+			sigfastblock_setpend(td);
 			reschedule_signals(p, fastblock_mask,
 			    SIGPROCMASK_PS_LOCKED | SIGPROCMASK_FASTBLK);
 		} else {
@@ -346,32 +347,8 @@ ast(struct trapframe *framep)
 	 * Handle deferred update of the fast sigblock value, after
 	 * the postsig() loop was performed.
 	 */
-	if (td->td_pflags & TDP_SIGFASTPENDING) {
-		td->td_pflags &= ~TDP_SIGFASTPENDING;
-		res = fueword32(td->td_sigblock_ptr, &oldval);
-		if (res == -1) {
-			fetch_sigfastblock_failed(td, false);
-		} else {
-			for (;;) {
-				oldval |= SIGFASTBLOCK_PEND;
-				res = casueword32(td->td_sigblock_ptr, oldval,
-				    &oldval, oldval | SIGFASTBLOCK_PEND);
-				if (res == -1) {
-					fetch_sigfastblock_failed(td, true);
-					break;
-				}
-				if (res == 0) {
-					td->td_sigblock_val = oldval &
-					    ~SIGFASTBLOCK_FLAGS;
-					break;
-				}
-				MPASS(res == 1);
-				res = thread_check_susp(td, false);
-				if (res != 0)
-					break;
-			}
-		}
-	}
+	if (td->td_pflags & TDP_SIGFASTPENDING)
+		sigfastblock_setpend(td);
 
 	/*
 	 * We need to check to see if we have to exit or wait due to a

Modified: head/sys/sys/signalvar.h
==============================================================================
--- head/sys/sys/signalvar.h	Thu Feb 20 10:56:12 2020	(r358167)
+++ head/sys/sys/signalvar.h	Thu Feb 20 15:34:02 2020	(r358168)
@@ -273,6 +273,7 @@ int __sys_sigfastblock(int cmd, void *ptr);
 
 #ifdef _KERNEL
 extern sigset_t fastblock_mask;
+extern bool sigfastblock_fetch_always;
 
 /* Return nonzero if process p has an unmasked pending signal. */
 #define	SIGPENDING(td)							\
@@ -382,8 +383,6 @@ sigallowstop(int prev)
 
 int	cursig(struct thread *td);
 void	execsigs(struct proc *p);
-void	fetch_sigfastblock(struct thread *td);
-void	fetch_sigfastblock_failed(struct thread *td, bool write);
 void	gsignal(int pgid, int sig, ksiginfo_t *ksi);
 void	killproc(struct proc *p, char *why);
 ksiginfo_t * ksiginfo_alloc(int wait);
@@ -405,6 +404,9 @@ void	sig_drop_caught(struct proc *p);
 void	sigexit(struct thread *td, int sig) __dead2;
 int	sigev_findtd(struct proc *p, struct sigevent *sigev, struct thread **);
 int	sig_ffs(sigset_t *set);
+void	sigfastblock_clear(struct thread *td);
+void	sigfastblock_fetch(struct thread *td);
+void	sigfastblock_setpend(struct thread *td);
 void	siginit(struct proc *p);
 void	signotify(struct thread *td);
 void	sigqueue_delete(struct sigqueue *queue, int sig);


More information about the svn-src-all mailing list