svn commit: r316120 - in stable/11: share/man/man9 sys/kern sys/sys

Eric van Gyzen vangyzen at FreeBSD.org
Wed Mar 29 01:21:50 UTC 2017


Author: vangyzen
Date: Wed Mar 29 01:21:48 2017
New Revision: 316120
URL: https://svnweb.freebsd.org/changeset/base/316120

Log:
  MFC r315280 r315287
  
  When the RTC is adjusted, reevaluate absolute sleep times based on the RTC
  
  POSIX 2008 says this about clock_settime(2):
  
      If the value of the CLOCK_REALTIME clock is set via clock_settime(),
      the new value of the clock shall be used to determine the time
      of expiration for absolute time services based upon the
      CLOCK_REALTIME clock.  This applies to the time at which armed
      absolute timers expire.  If the absolute time requested at the
      invocation of such a time service is before the new value of
      the clock, the time service shall expire immediately as if the
      clock had reached the requested time normally.
  
      Setting the value of the CLOCK_REALTIME clock via clock_settime()
      shall have no effect on threads that are blocked waiting for
      a relative time service based upon this clock, including the
      nanosleep() function; nor on the expiration of relative timers
      based upon this clock.  Consequently, these time services shall
      expire when the requested relative interval elapses, independently
      of the new or old value of the clock.
  
  When the real-time clock is adjusted, such as by clock_settime(3),
  wake any threads sleeping until an absolute real-clock time.
  Such a sleep is indicated by a non-zero td_rtcgen.  The sleep functions
  will set that field to zero and return zero to tell the caller
  to reevaluate its sleep duration based on the new value of the clock.
  
  At present, this affects the following functions:
  
      pthread_cond_timedwait(3)
      pthread_mutex_timedlock(3)
      pthread_rwlock_timedrdlock(3)
      pthread_rwlock_timedwrlock(3)
      sem_timedwait(3)
      sem_clockwait_np(3)
  
  I'm working on adding clock_nanosleep(2), which will also be affected.
  
  Reported by:	Sebastian Huber <sebastian.huber at embedded-brains.de>
  Relnotes:	yes
  Sponsored by:	Dell EMC

Modified:
  stable/11/share/man/man9/sleep.9
  stable/11/sys/kern/kern_tc.c
  stable/11/sys/kern/kern_umtx.c
  stable/11/sys/kern/subr_sleepqueue.c
  stable/11/sys/sys/proc.h
  stable/11/sys/sys/sleepqueue.h
  stable/11/sys/sys/time.h
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/share/man/man9/sleep.9
==============================================================================
--- stable/11/share/man/man9/sleep.9	Tue Mar 28 23:56:02 2017	(r316119)
+++ stable/11/share/man/man9/sleep.9	Wed Mar 29 01:21:48 2017	(r316120)
@@ -280,6 +280,21 @@ to
 pay particular attention to ensure that no other threads wait on the
 same
 .Fa chan .
+.Pp
+If the timeout given by
+.Fa timo
+or
+.Fa sbt
+is based on an absolute real-time clock value,
+then the thread should copy the global
+.Va rtc_generation
+into its
+.Va td_rtcgen
+member before reading the RTC.
+If the real-time clock is adjusted, these functions will set
+.Va td_rtcgen
+to zero and return zero.
+The caller should reconsider its orientation with the new RTC value.
 .Sh RETURN VALUES
 When awakened by a call to
 .Fn wakeup
@@ -298,6 +313,9 @@ the
 .Fn msleep_spin ,
 .Fn tsleep ,
 and locking primitive sleep functions return 0.
+Zero can also be returned when the real-time clock is adjusted;
+see above regarding
+.Va td_rtcgen .
 Otherwise, a non-zero error code is returned.
 .Sh ERRORS
 .Fn msleep ,

Modified: stable/11/sys/kern/kern_tc.c
==============================================================================
--- stable/11/sys/kern/kern_tc.c	Tue Mar 28 23:56:02 2017	(r316119)
+++ stable/11/sys/kern/kern_tc.c	Wed Mar 29 01:21:48 2017	(r316120)
@@ -28,7 +28,9 @@ __FBSDID("$FreeBSD$");
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
+#include <sys/proc.h>
 #include <sys/sbuf.h>
+#include <sys/sleepqueue.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
@@ -128,6 +130,8 @@ SYSCTL_PROC(_kern_timecounter, OID_AUTO,
     sysctl_kern_timecounter_adjprecision, "I",
     "Allowed time interval deviation in percents");
 
+volatile int rtc_generation = 1;
+
 static int tc_chosen;	/* Non-zero if a specific tc was chosen via sysctl. */
 
 static void tc_windup(struct bintime *new_boottimebin);
@@ -1263,6 +1267,26 @@ tc_getfrequency(void)
 	return (timehands->th_counter->tc_frequency);
 }
 
+static bool
+sleeping_on_old_rtc(struct thread *td)
+{
+
+	/*
+	 * td_rtcgen is modified by curthread when it is running,
+	 * and by other threads in this function.  By finding the thread
+	 * on a sleepqueue and holding the lock on the sleepqueue
+	 * chain, we guarantee that the thread is not running and that
+	 * modifying td_rtcgen is safe.  Setting td_rtcgen to zero informs
+	 * the thread that it was woken due to a real-time clock adjustment.
+	 * (The declaration of td_rtcgen refers to this comment.)
+	 */
+	if (td->td_rtcgen != 0 && td->td_rtcgen != rtc_generation) {
+		td->td_rtcgen = 0;
+		return (true);
+	}
+	return (false);
+}
+
 static struct mtx tc_setclock_mtx;
 MTX_SYSINIT(tc_setclock_init, &tc_setclock_mtx, "tcsetc", MTX_SPIN);
 
@@ -1288,6 +1312,10 @@ tc_setclock(struct timespec *ts)
 	mtx_unlock_spin(&tc_setclock_mtx);
 	getboottimebin(&boottimebin);
 	bintime2timeval(&boottimebin, &boottime);
+
+	/* Avoid rtc_generation == 0, since td_rtcgen == 0 is special. */
+	atomic_add_rel_int(&rtc_generation, 2);
+	sleepq_chains_remove_matching(sleeping_on_old_rtc);
 	if (timestepwarnings) {
 		nanotime(&taft);
 		log(LOG_INFO,

Modified: stable/11/sys/kern/kern_umtx.c
==============================================================================
--- stable/11/sys/kern/kern_umtx.c	Tue Mar 28 23:56:02 2017	(r316119)
+++ stable/11/sys/kern/kern_umtx.c	Wed Mar 29 01:21:48 2017	(r316120)
@@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/sysproto.h>
 #include <sys/syscallsubr.h>
 #include <sys/taskqueue.h>
+#include <sys/time.h>
 #include <sys/eventhandler.h>
 #include <sys/umtx.h>
 
@@ -70,6 +71,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 
+#include <machine/atomic.h>
 #include <machine/cpu.h>
 
 #ifdef COMPAT_FREEBSD32
@@ -208,6 +210,7 @@ struct umtxq_chain {
 
 struct abs_timeout {
 	int clockid;
+	bool is_abs_real;	/* TIMER_ABSTIME && CLOCK_REALTIME* */
 	struct timespec cur;
 	struct timespec end;
 };
@@ -255,6 +258,8 @@ SYSCTL_LONG(_debug_umtx, OID_AUTO, max_l
 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats");
 #endif
 
+static void abs_timeout_update(struct abs_timeout *timo);
+
 static void umtx_shm_init(void);
 static void umtxq_sysinit(void *);
 static void umtxq_hash(struct umtx_key *key);
@@ -770,12 +775,22 @@ abs_timeout_init(struct abs_timeout *tim
 
 	timo->clockid = clockid;
 	if (!absolute) {
-		kern_clock_gettime(curthread, clockid, &timo->end);
-		timo->cur = timo->end;
+		timo->is_abs_real = false;
+		abs_timeout_update(timo);
+		timo->end = timo->cur;
 		timespecadd(&timo->end, timeout);
 	} else {
 		timo->end = *timeout;
-		kern_clock_gettime(curthread, clockid, &timo->cur);
+		timo->is_abs_real = clockid == CLOCK_REALTIME ||
+		    clockid == CLOCK_REALTIME_FAST ||
+		    clockid == CLOCK_REALTIME_PRECISE;
+		/*
+		 * If is_abs_real, umtxq_sleep will read the clock
+		 * after setting td_rtcgen; otherwise, read it here.
+		 */
+		if (!timo->is_abs_real) {
+			abs_timeout_update(timo);
+		}
 	}
 }
 
@@ -829,26 +844,41 @@ umtxq_sleep(struct umtx_q *uq, const cha
 	struct umtxq_chain *uc;
 	int error, timo;
 
+	if (abstime != NULL && abstime->is_abs_real) {
+		curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation);
+		abs_timeout_update(abstime);
+	}
+
 	uc = umtxq_getchain(&uq->uq_key);
 	UMTXQ_LOCKED_ASSERT(uc);
 	for (;;) {
-		if (!(uq->uq_flags & UQF_UMTXQ))
-			return (0);
+		if (!(uq->uq_flags & UQF_UMTXQ)) {
+			error = 0;
+			break;
+		}
 		if (abstime != NULL) {
 			timo = abs_timeout_gethz(abstime);
-			if (timo < 0)
-				return (ETIMEDOUT);
+			if (timo < 0) {
+				error = ETIMEDOUT;
+				break;
+			}
 		} else
 			timo = 0;
 		error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo);
-		if (error != EWOULDBLOCK) {
+		if (error == EINTR || error == ERESTART) {
 			umtxq_lock(&uq->uq_key);
 			break;
 		}
-		if (abstime != NULL)
+		if (abstime != NULL) {
+			if (abstime->is_abs_real)
+				curthread->td_rtcgen =
+				    atomic_load_acq_int(&rtc_generation);
 			abs_timeout_update(abstime);
+		}
 		umtxq_lock(&uq->uq_key);
 	}
+
+	curthread->td_rtcgen = 0;
 	return (error);
 }
 

Modified: stable/11/sys/kern/subr_sleepqueue.c
==============================================================================
--- stable/11/sys/kern/subr_sleepqueue.c	Tue Mar 28 23:56:02 2017	(r316119)
+++ stable/11/sys/kern/subr_sleepqueue.c	Wed Mar 29 01:21:48 2017	(r316120)
@@ -78,6 +78,9 @@ __FBSDID("$FreeBSD$");
 #include <sys/sleepqueue.h>
 #include <sys/stack.h>
 #include <sys/sysctl.h>
+#include <sys/time.h>
+
+#include <machine/atomic.h>
 
 #include <vm/uma.h>
 
@@ -539,6 +542,7 @@ sleepq_switch(void *wchan, int pri)
 	struct sleepqueue_chain *sc;
 	struct sleepqueue *sq;
 	struct thread *td;
+	bool rtc_changed;
 
 	td = curthread;
 	sc = SC_LOOKUP(wchan);
@@ -558,8 +562,26 @@ sleepq_switch(void *wchan, int pri)
 	 * If TDF_TIMEOUT is set, then our sleep has been timed out
 	 * already but we are still on the sleep queue, so dequeue the
 	 * thread and return.
+	 *
+	 * Do the same if the real-time clock has been adjusted since this
+	 * thread calculated its timeout based on that clock.  This handles
+	 * the following race:
+	 * - The Ts thread needs to sleep until an absolute real-clock time.
+	 *   It copies the global rtc_generation into curthread->td_rtcgen,
+	 *   reads the RTC, and calculates a sleep duration based on that time.
+	 *   See umtxq_sleep() for an example.
+	 * - The Tc thread adjusts the RTC, bumps rtc_generation, and wakes
+	 *   threads that are sleeping until an absolute real-clock time.
+	 *   See tc_setclock() and the POSIX specification of clock_settime().
+	 * - Ts reaches the code below.  It holds the sleepqueue chain lock,
+	 *   so Tc has finished waking, so this thread must test td_rtcgen.
+	 * (The declaration of td_rtcgen refers to this comment.)
 	 */
-	if (td->td_flags & TDF_TIMEOUT) {
+	rtc_changed = td->td_rtcgen != 0 && td->td_rtcgen != rtc_generation;
+	if ((td->td_flags & TDF_TIMEOUT) || rtc_changed) {
+		if (rtc_changed) {
+			td->td_rtcgen = 0;
+		}
 		MPASS(TD_ON_SLEEPQ(td));
 		sq = sleepq_lookup(wchan);
 		if (sleepq_resume_thread(sq, td, 0)) {
@@ -886,6 +908,13 @@ sleepq_signal(void *wchan, int flags, in
 	return (wakeup_swapper);
 }
 
+static bool
+match_any(struct thread *td __unused)
+{
+
+	return (true);
+}
+
 /*
  * Resume all threads sleeping on a specified wait channel.
  */
@@ -893,8 +922,6 @@ int
 sleepq_broadcast(void *wchan, int flags, int pri, int queue)
 {
 	struct sleepqueue *sq;
-	struct thread *td, *tdn;
-	int wakeup_swapper;
 
 	CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags);
 	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
@@ -905,18 +932,33 @@ sleepq_broadcast(void *wchan, int flags,
 	KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
 	    ("%s: mismatch between sleep/wakeup and cv_*", __func__));
 
+	return (sleepq_remove_matching(sq, queue, match_any, pri));
+}
+
+/*
+ * Resume threads on the sleep queue that match the given predicate.
+ */
+int
+sleepq_remove_matching(struct sleepqueue *sq, int queue,
+    bool (*matches)(struct thread *), int pri)
+{
+	struct thread *td, *tdn;
+	int wakeup_swapper;
+
 	/*
-	 * Resume all blocked threads on the sleep queue.  The last thread will
-	 * be given ownership of sq and may re-enqueue itself before
-	 * sleepq_resume_thread() returns, so we must cache the "next" queue
-	 * item at the beginning of the final iteration.
+	 * The last thread will be given ownership of sq and may
+	 * re-enqueue itself before sleepq_resume_thread() returns,
+	 * so we must cache the "next" queue item at the beginning
+	 * of the final iteration.
 	 */
 	wakeup_swapper = 0;
 	TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, tdn) {
 		thread_lock(td);
-		wakeup_swapper |= sleepq_resume_thread(sq, td, pri);
+		if (matches(td))
+			wakeup_swapper |= sleepq_resume_thread(sq, td, pri);
 		thread_unlock(td);
 	}
+
 	return (wakeup_swapper);
 }
 
@@ -1052,6 +1094,32 @@ sleepq_abort(struct thread *td, int intr
 	return (sleepq_resume_thread(sq, td, 0));
 }
 
+void
+sleepq_chains_remove_matching(bool (*matches)(struct thread *))
+{
+	struct sleepqueue_chain *sc;
+	struct sleepqueue *sq;
+	int i, wakeup_swapper;
+
+	wakeup_swapper = 0;
+	for (sc = &sleepq_chains[0]; sc < sleepq_chains + SC_TABLESIZE; ++sc) {
+		if (LIST_EMPTY(&sc->sc_queues)) {
+			continue;
+		}
+		mtx_lock_spin(&sc->sc_lock);
+		LIST_FOREACH(sq, &sc->sc_queues, sq_hash) {
+			for (i = 0; i < NR_SLEEPQS; ++i) {
+				wakeup_swapper |= sleepq_remove_matching(sq, i,
+				    matches, 0);
+			}
+		}
+		mtx_unlock_spin(&sc->sc_lock);
+	}
+	if (wakeup_swapper) {
+		kick_proc0();
+	}
+}
+
 /*
  * Prints the stacks of all threads presently sleeping on wchan/queue to
  * the sbuf sb.  Sets count_stacks_printed to the number of stacks actually

Modified: stable/11/sys/sys/proc.h
==============================================================================
--- stable/11/sys/sys/proc.h	Tue Mar 28 23:56:02 2017	(r316119)
+++ stable/11/sys/sys/proc.h	Wed Mar 29 01:21:48 2017	(r316120)
@@ -148,6 +148,7 @@ struct pargs {
  *      o - ktrace lock
  *      q - td_contested lock
  *      r - p_peers lock
+ *      s - see sleepq_switch(), sleeping_on_old_rtc(), and sleep(9)
  *      t - thread lock
  *	u - process stat lock
  *	w - process timer lock
@@ -282,6 +283,7 @@ struct thread {
 	int		td_no_sleeping;	/* (k) Sleeping disabled count. */
 	int		td_dom_rr_idx;	/* (k) RR Numa domain selection. */
 	void		*td_su;		/* (k) FFS SU private */
+	int		td_rtcgen;	/* (s) rtc_generation of abs. sleep */
 #define	td_endzero td_sigmask
 
 /* Copied during fork1() or create_thread(). */

Modified: stable/11/sys/sys/sleepqueue.h
==============================================================================
--- stable/11/sys/sys/sleepqueue.h	Tue Mar 28 23:56:02 2017	(r316119)
+++ stable/11/sys/sys/sleepqueue.h	Wed Mar 29 01:21:48 2017	(r316120)
@@ -90,11 +90,14 @@ void	sleepq_add(void *wchan, struct lock
 	    int flags, int queue);
 struct sleepqueue *sleepq_alloc(void);
 int	sleepq_broadcast(void *wchan, int flags, int pri, int queue);
+void	sleepq_chains_remove_matching(bool (*matches)(struct thread *));
 void	sleepq_free(struct sleepqueue *sq);
 void	sleepq_lock(void *wchan);
 struct sleepqueue *sleepq_lookup(void *wchan);
 void	sleepq_release(void *wchan);
 void	sleepq_remove(struct thread *td, void *wchan);
+int	sleepq_remove_matching(struct sleepqueue *sq, int queue,
+	    bool (*matches)(struct thread *), int pri);
 int	sleepq_signal(void *wchan, int flags, int pri, int queue);
 void	sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt,
 	    sbintime_t pr, int flags);

Modified: stable/11/sys/sys/time.h
==============================================================================
--- stable/11/sys/sys/time.h	Tue Mar 28 23:56:02 2017	(r316119)
+++ stable/11/sys/sys/time.h	Wed Mar 29 01:21:48 2017	(r316120)
@@ -383,6 +383,8 @@ extern struct bintime bt_tickthreshold;
 extern sbintime_t sbt_timethreshold;
 extern sbintime_t sbt_tickthreshold;
 
+extern volatile int rtc_generation;
+
 /*
  * Functions for looking at our clock: [get]{bin,nano,micro}[up]time()
  *


More information about the svn-src-all mailing list