svn commit: r302235 - in head/sys: kern sys

Konstantin Belousov kib at FreeBSD.org
Mon Jun 27 21:52:19 UTC 2016


Author: kib
Date: Mon Jun 27 21:52:17 2016
New Revision: 302235
URL: https://svnweb.freebsd.org/changeset/base/302235

Log:
  When filt_proc() removes event from the knlist due to the process
  exiting (NOTE_EXIT->knlist_remove_inevent()), two things happen:
  - knote kn_knlist pointer is reset
  - INFLUX knote is removed from the process knlist.
  And, there are two consequences:
  - KN_LIST_UNLOCK() on such knote is nop
  - there is nothing which would block exit1() from processing past the
    knlist_destroy() (and knlist_destroy() resets knlist lock pointers).
  Both consequences result either in leaked process lock, or
  dereferencing NULL function pointers for locking.
  
  Handle this by stopping embedding the process knlist into struct proc.
  Instead, the knlist is allocated together with struct proc, but marked
  as autodestroy on the zombie reap, by knlist_detach() function.  The
  knlist is freed when last kevent is removed from the list, in
  particular, at the zombie reap time if the list is empty.  As result,
  the knlist_remove_inevent() is no longer needed and removed.
  
  Other changes:
  
  In filt_procattach(), clear NOTE_EXEC and NOTE_FORK desired events
  from kn_sfflags for knote registered by kernel to only get NOTE_CHILD
  notifications.  The flags leak resulted in excessive
  NOTE_EXEC/NOTE_FORK reports.
  
  Fix immediate note activation in filt_procattach().  Condition should
  be either the immediate CHILD_NOTE activation, or immediate NOTE_EXIT
  report for the exiting process.
  
  In knote_fork(), do not perform racy check for KN_INFLUX before kq
  lock is taken.  Besides being racy, it did not accounted for notes
  just added by scan (KN_SCAN).
  
  Some minor and incomplete style fixes.
  
  Analyzed and tested by:	Eric Badger <eric at badgerio.us>
  Reviewed by:	jhb
  Sponsored by:	The FreeBSD Foundation
  MFC after:	2 weeks
  Approved by:	re (gjb)
  Differential revision:	https://reviews.freebsd.org/D6859

Modified:
  head/sys/kern/init_main.c
  head/sys/kern/kern_event.c
  head/sys/kern/kern_exec.c
  head/sys/kern/kern_exit.c
  head/sys/kern/kern_fork.c
  head/sys/kern/kern_sig.c
  head/sys/sys/event.h
  head/sys/sys/proc.h

Modified: head/sys/kern/init_main.c
==============================================================================
--- head/sys/kern/init_main.c	Mon Jun 27 21:50:30 2016	(r302234)
+++ head/sys/kern/init_main.c	Mon Jun 27 21:52:17 2016	(r302235)
@@ -482,7 +482,7 @@ proc0_init(void *dummy __unused)
 	p->p_flag = P_SYSTEM | P_INMEM | P_KPROC;
 	p->p_flag2 = 0;
 	p->p_state = PRS_NORMAL;
-	knlist_init_mtx(&p->p_klist, &p->p_mtx);
+	p->p_klist = knlist_alloc(&p->p_mtx);
 	STAILQ_INIT(&p->p_ktr);
 	p->p_nice = NZERO;
 	/* pid_max cannot be greater than PID_MAX */

Modified: head/sys/kern/kern_event.c
==============================================================================
--- head/sys/kern/kern_event.c	Mon Jun 27 21:50:30 2016	(r302234)
+++ head/sys/kern/kern_event.c	Mon Jun 27 21:52:17 2016	(r302235)
@@ -227,14 +227,33 @@ SYSCTL_UINT(_kern, OID_AUTO, kq_calloutm
 #define KQ_NOTOWNED(kq) do {						\
 	mtx_assert(&(kq)->kq_lock, MA_NOTOWNED);			\
 } while (0)
-#define KN_LIST_LOCK(kn) do {						\
-	if (kn->kn_knlist != NULL)					\
-		kn->kn_knlist->kl_lock(kn->kn_knlist->kl_lockarg);	\
-} while (0)
-#define KN_LIST_UNLOCK(kn) do {						\
-	if (kn->kn_knlist != NULL) 					\
-		kn->kn_knlist->kl_unlock(kn->kn_knlist->kl_lockarg);	\
-} while (0)
+
+static struct knlist *
+kn_list_lock(struct knote *kn)
+{
+	struct knlist *knl;
+
+	knl = kn->kn_knlist;
+	if (knl != NULL)
+		knl->kl_lock(knl->kl_lockarg);
+	return (knl);
+}
+
+static void
+kn_list_unlock(struct knlist *knl)
+{
+	bool do_free;
+
+	if (knl == NULL)
+		return;
+	do_free = knl->kl_autodestroy && knlist_empty(knl);
+	knl->kl_unlock(knl->kl_lockarg);
+	if (do_free) {
+		knlist_destroy(knl);
+		free(knl, M_KQUEUE);
+	}
+}
+
 #define	KNL_ASSERT_LOCK(knl, islocked) do {				\
 	if (islocked)							\
 		KNL_ASSERT_LOCKED(knl);				\
@@ -350,16 +369,16 @@ static int
 filt_procattach(struct knote *kn)
 {
 	struct proc *p;
-	int immediate;
 	int error;
+	bool exiting, immediate;
 
-	immediate = 0;
+	exiting = immediate = false;
 	p = pfind(kn->kn_id);
 	if (p == NULL && (kn->kn_sfflags & NOTE_EXIT)) {
 		p = zpfind(kn->kn_id);
-		immediate = 1;
+		exiting = true;
 	} else if (p != NULL && (p->p_flag & P_WEXIT)) {
-		immediate = 1;
+		exiting = true;
 	}
 
 	if (p == NULL)
@@ -380,8 +399,8 @@ filt_procattach(struct knote *kn)
 		kn->kn_flags &= ~EV_FLAG2;
 		kn->kn_data = kn->kn_sdata;		/* ppid */
 		kn->kn_fflags = NOTE_CHILD;
-                kn->kn_sfflags &= ~NOTE_EXIT;
-		immediate = 1; /* Force immediate activation of child note. */
+		kn->kn_sfflags &= ~(NOTE_EXIT | NOTE_EXEC | NOTE_FORK);
+		immediate = true; /* Force immediate activation of child note. */
 	}
 	/*
 	 * Internal flag indicating registration done by kernel (for other than
@@ -391,8 +410,7 @@ filt_procattach(struct knote *kn)
 		kn->kn_flags &= ~EV_FLAG1;
 	}
 
-	if (immediate == 0)
-		knlist_add(&p->p_klist, kn, 1);
+	knlist_add(p->p_klist, kn, 1);
 
 	/*
 	 * Immediately activate any child notes or, in the case of a zombie
@@ -400,7 +418,7 @@ filt_procattach(struct knote *kn)
 	 * case where the target process, e.g. a child, dies before the kevent
 	 * is registered.
 	 */
-	if (immediate && filt_proc(kn, NOTE_EXIT))
+	if (immediate || (exiting && filt_proc(kn, NOTE_EXIT)))
 		KNOTE_ACTIVATE(kn, 0);
 
 	PROC_UNLOCK(p);
@@ -420,10 +438,8 @@ filt_procattach(struct knote *kn)
 static void
 filt_procdetach(struct knote *kn)
 {
-	struct proc *p;
 
-	p = kn->kn_ptr.p_proc;
-	knlist_remove(&p->p_klist, kn, 0);
+	knlist_remove(kn->kn_knlist, kn, 0);
 	kn->kn_ptr.p_proc = NULL;
 }
 
@@ -444,8 +460,6 @@ filt_proc(struct knote *kn, long hint)
 
 	/* Process is gone, so flag the event as finished. */
 	if (event == NOTE_EXIT) {
-		if (!(kn->kn_status & KN_DETACHED))
-			knlist_remove_inevent(&p->p_klist, kn);
 		kn->kn_flags |= EV_EOF | EV_ONESHOT;
 		kn->kn_ptr.p_proc = NULL;
 		if (kn->kn_fflags & NOTE_EXIT)
@@ -479,12 +493,6 @@ knote_fork(struct knlist *list, int pid)
 	list->kl_lock(list->kl_lockarg);
 
 	SLIST_FOREACH(kn, &list->kl_list, kn_selnext) {
-		/*
-		 * XXX - Why do we skip the kn if it is _INFLUX?  Does this
-		 * mean we will not properly wake up some notes?
-		 */
-		if ((kn->kn_status & KN_INFLUX) == KN_INFLUX)
-			continue;
 		kq = kn->kn_kq;
 		KQ_LOCK(kq);
 		if ((kn->kn_status & (KN_INFLUX | KN_SCAN)) == KN_INFLUX) {
@@ -525,7 +533,8 @@ knote_fork(struct knlist *list, int pid)
 		 */
 		kev.ident = pid;
 		kev.filter = kn->kn_filter;
-		kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_ONESHOT | EV_FLAG2;
+		kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_ONESHOT |
+		    EV_FLAG2;
 		kev.fflags = kn->kn_sfflags;
 		kev.data = kn->kn_id;		/* parent */
 		kev.udata = kn->kn_kevent.udata;/* preserve udata */
@@ -1137,6 +1146,7 @@ kqueue_register(struct kqueue *kq, struc
 	struct filterops *fops;
 	struct file *fp;
 	struct knote *kn, *tkn;
+	struct knlist *knl;
 	cap_rights_t rights;
 	int error, filt, event;
 	int haskqglobal, filedesc_unlock;
@@ -1146,6 +1156,7 @@ kqueue_register(struct kqueue *kq, struc
 
 	fp = NULL;
 	kn = NULL;
+	knl = NULL;
 	error = 0;
 	haskqglobal = 0;
 	filedesc_unlock = 0;
@@ -1300,7 +1311,7 @@ findkn:
 				knote_drop(kn, td);
 				goto done;
 			}
-			KN_LIST_LOCK(kn);
+			knl = kn_list_lock(kn);
 			goto done_ev_add;
 		} else {
 			/* No matching knote and the EV_ADD flag is not set. */
@@ -1331,7 +1342,7 @@ findkn:
 	 */
 	kn->kn_status |= KN_INFLUX | KN_SCAN;
 	KQ_UNLOCK(kq);
-	KN_LIST_LOCK(kn);
+	knl = kn_list_lock(kn);
 	kn->kn_kevent.udata = kev->udata;
 	if (!fops->f_isfd && fops->f_touch != NULL) {
 		fops->f_touch(kn, kev, EVENT_REGISTER);
@@ -1365,7 +1376,7 @@ done_ev_add:
 	    KN_ACTIVE)
 		knote_enqueue(kn);
 	kn->kn_status &= ~(KN_INFLUX | KN_SCAN);
-	KN_LIST_UNLOCK(kn);
+	kn_list_unlock(knl);
 	KQ_UNLOCK_FLUX(kq);
 
 done:
@@ -1535,6 +1546,7 @@ kqueue_scan(struct kqueue *kq, int maxev
 {
 	struct kevent *kevp;
 	struct knote *kn, *marker;
+	struct knlist *knl;
 	sbintime_t asbt, rsbt;
 	int count, error, haskqglobal, influx, nkev, touch;
 
@@ -1660,7 +1672,7 @@ retry:
 			KQ_UNLOCK(kq);
 			if ((kn->kn_status & KN_KQUEUE) == KN_KQUEUE)
 				KQ_GLOBAL_LOCK(&kq_global, haskqglobal);
-			KN_LIST_LOCK(kn);
+			knl = kn_list_lock(kn);
 			if (kn->kn_fop->f_event(kn, 0) == 0) {
 				KQ_LOCK(kq);
 				KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
@@ -1668,7 +1680,7 @@ retry:
 				    ~(KN_QUEUED | KN_ACTIVE | KN_INFLUX |
 				    KN_SCAN);
 				kq->kq_count--;
-				KN_LIST_UNLOCK(kn);
+				kn_list_unlock(knl);
 				influx = 1;
 				continue;
 			}
@@ -1697,7 +1709,7 @@ retry:
 				TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
 			
 			kn->kn_status &= ~(KN_INFLUX | KN_SCAN);
-			KN_LIST_UNLOCK(kn);
+			kn_list_unlock(knl);
 			influx = 1;
 		}
 
@@ -2062,7 +2074,8 @@ knlist_add(struct knlist *knl, struct kn
 }
 
 static void
-knlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked, int kqislocked)
+knlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked,
+    int kqislocked)
 {
 	KASSERT(!(!!kqislocked && !knlislocked), ("kq locked w/o knl locked"));
 	KNL_ASSERT_LOCK(knl, knlislocked);
@@ -2075,7 +2088,7 @@ knlist_remove_kq(struct knlist *knl, str
 	SLIST_REMOVE(&knl->kl_list, kn, knote, kn_selnext);
 	kn->kn_knlist = NULL;
 	if (!knlislocked)
-		knl->kl_unlock(knl->kl_lockarg);
+		kn_list_unlock(knl);
 	if (!kqislocked)
 		KQ_LOCK(kn->kn_kq);
 	kn->kn_status |= KN_DETACHED;
@@ -2093,17 +2106,6 @@ knlist_remove(struct knlist *knl, struct
 	knlist_remove_kq(knl, kn, islocked, 0);
 }
 
-/*
- * remove knote from the specified knlist while in f_event handler.
- */
-void
-knlist_remove_inevent(struct knlist *knl, struct knote *kn)
-{
-
-	knlist_remove_kq(knl, kn, 1,
-	    (kn->kn_status & KN_HASKQLOCK) == KN_HASKQLOCK);
-}
-
 int
 knlist_empty(struct knlist *knl)
 {
@@ -2202,6 +2204,7 @@ knlist_init(struct knlist *knl, void *lo
 	else
 		knl->kl_assert_unlocked = kl_assert_unlocked;
 
+	knl->kl_autodestroy = false;
 	SLIST_INIT(&knl->kl_list);
 }
 
@@ -2212,6 +2215,16 @@ knlist_init_mtx(struct knlist *knl, stru
 	knlist_init(knl, lock, NULL, NULL, NULL, NULL);
 }
 
+struct knlist *
+knlist_alloc(struct mtx *lock)
+{
+	struct knlist *knl;
+
+	knl = malloc(sizeof(struct knlist), M_KQUEUE, M_WAITOK);
+	knlist_init_mtx(knl, lock);
+	return (knl);
+}
+
 void
 knlist_init_rw_reader(struct knlist *knl, struct rwlock *lock)
 {
@@ -2237,6 +2250,18 @@ knlist_destroy(struct knlist *knl)
 	SLIST_INIT(&knl->kl_list);
 }
 
+void
+knlist_detach(struct knlist *knl)
+{
+
+	KNL_ASSERT_LOCKED(knl);
+	knl->kl_autodestroy = true;
+	if (knlist_empty(knl)) {
+		knlist_destroy(knl);
+		free(knl, M_KQUEUE);
+	}
+}
+
 /*
  * Even if we are locked, we may need to drop the lock to allow any influx
  * knotes time to "settle".
@@ -2247,6 +2272,7 @@ knlist_cleardel(struct knlist *knl, stru
 	struct knote *kn, *kn2;
 	struct kqueue *kq;
 
+	KASSERT(!knl->kl_autodestroy, ("cleardel for autodestroy %p", knl));
 	if (islocked)
 		KNL_ASSERT_LOCKED(knl);
 	else {

Modified: head/sys/kern/kern_exec.c
==============================================================================
--- head/sys/kern/kern_exec.c	Mon Jun 27 21:50:30 2016	(r302234)
+++ head/sys/kern/kern_exec.c	Mon Jun 27 21:52:17 2016	(r302235)
@@ -832,7 +832,7 @@ interpret:
 	 * Notify others that we exec'd, and clear the P_INEXEC flag
 	 * as we're now a bona fide freshly-execed process.
 	 */
-	KNOTE_LOCKED(&p->p_klist, NOTE_EXEC);
+	KNOTE_LOCKED(p->p_klist, NOTE_EXEC);
 	p->p_flag &= ~P_INEXEC;
 
 	/* clear "fork but no exec" flag, as we _are_ execing */

Modified: head/sys/kern/kern_exit.c
==============================================================================
--- head/sys/kern/kern_exit.c	Mon Jun 27 21:50:30 2016	(r302234)
+++ head/sys/kern/kern_exit.c	Mon Jun 27 21:52:17 2016	(r302235)
@@ -512,7 +512,7 @@ exit1(struct thread *td, int rval, int s
 	/*
 	 * Notify interested parties of our demise.
 	 */
-	KNOTE_LOCKED(&p->p_klist, NOTE_EXIT);
+	KNOTE_LOCKED(p->p_klist, NOTE_EXIT);
 
 #ifdef KDTRACE_HOOKS
 	int reason = CLD_EXITED;
@@ -524,13 +524,6 @@ exit1(struct thread *td, int rval, int s
 #endif
 
 	/*
-	 * Just delete all entries in the p_klist. At this point we won't
-	 * report any more events, and there are nasty race conditions that
-	 * can beat us if we don't.
-	 */
-	knlist_clear(&p->p_klist, 1);
-
-	/*
 	 * If this is a process with a descriptor, we may not need to deliver
 	 * a signal to the parent.  proctree_lock is held over
 	 * procdesc_exit() to serialize concurrent calls to close() and
@@ -603,12 +596,6 @@ exit1(struct thread *td, int rval, int s
 	PROC_UNLOCK(p->p_pptr);
 
 	/*
-	 * Hopefully no one will try to deliver a signal to the process this
-	 * late in the game.
-	 */
-	knlist_destroy(&p->p_klist);
-
-	/*
 	 * Save our children's rusage information in our exit rusage.
 	 */
 	PROC_STATLOCK(p);
@@ -853,6 +840,11 @@ proc_reap(struct thread *td, struct proc
 		procdesc_reap(p);
 	sx_xunlock(&proctree_lock);
 
+	PROC_LOCK(p);
+	knlist_detach(p->p_klist);
+	p->p_klist = NULL;
+	PROC_UNLOCK(p);
+
 	/*
 	 * Removal from allproc list and process group list paired with
 	 * PROC_LOCK which was executed during that time should guarantee

Modified: head/sys/kern/kern_fork.c
==============================================================================
--- head/sys/kern/kern_fork.c	Mon Jun 27 21:50:30 2016	(r302234)
+++ head/sys/kern/kern_fork.c	Mon Jun 27 21:52:17 2016	(r302235)
@@ -748,7 +748,7 @@ do_fork(struct thread *td, struct fork_r
 	/*
 	 * Tell any interested parties about the new process.
 	 */
-	knote_fork(&p1->p_klist, p2->p_pid);
+	knote_fork(p1->p_klist, p2->p_pid);
 	SDT_PROBE3(proc, , , create, p2, p1, fr->fr_flags);
 
 	if (fr->fr_flags & RFPROCDESC) {
@@ -950,7 +950,7 @@ fork1(struct thread *td, struct fork_req
 #ifdef MAC
 	mac_proc_init(newproc);
 #endif
-	knlist_init_mtx(&newproc->p_klist, &newproc->p_mtx);
+	newproc->p_klist = knlist_alloc(&newproc->p_mtx);
 	STAILQ_INIT(&newproc->p_ktr);
 
 	/* We have to lock the process tree while we look for a pid. */

Modified: head/sys/kern/kern_sig.c
==============================================================================
--- head/sys/kern/kern_sig.c	Mon Jun 27 21:50:30 2016	(r302234)
+++ head/sys/kern/kern_sig.c	Mon Jun 27 21:52:17 2016	(r302235)
@@ -2112,7 +2112,7 @@ tdsendsignal(struct proc *p, struct thre
 	}
 
 	ps = p->p_sigacts;
-	KNOTE_LOCKED(&p->p_klist, NOTE_SIGNAL | sig);
+	KNOTE_LOCKED(p->p_klist, NOTE_SIGNAL | sig);
 	prop = sigprop(sig);
 
 	if (td == NULL) {
@@ -3542,7 +3542,7 @@ filt_sigattach(struct knote *kn)
 	kn->kn_ptr.p_proc = p;
 	kn->kn_flags |= EV_CLEAR;		/* automatically set */
 
-	knlist_add(&p->p_klist, kn, 0);
+	knlist_add(p->p_klist, kn, 0);
 
 	return (0);
 }
@@ -3552,7 +3552,7 @@ filt_sigdetach(struct knote *kn)
 {
 	struct proc *p = kn->kn_ptr.p_proc;
 
-	knlist_remove(&p->p_klist, kn, 0);
+	knlist_remove(p->p_klist, kn, 0);
 }
 
 /*

Modified: head/sys/sys/event.h
==============================================================================
--- head/sys/sys/event.h	Mon Jun 27 21:50:30 2016	(r302234)
+++ head/sys/sys/event.h	Mon Jun 27 21:52:17 2016	(r302235)
@@ -158,7 +158,8 @@ struct knlist {
 	void    (*kl_unlock)(void *);
 	void	(*kl_assert_locked)(void *);
 	void	(*kl_assert_unlocked)(void *);
-	void *kl_lockarg;		/* argument passed to kl_lockf() */
+	void	*kl_lockarg;		/* argument passed to lock functions */
+	bool	kl_autodestroy;
 };
 
 
@@ -258,9 +259,10 @@ struct rwlock;
 
 extern void	knote(struct knlist *list, long hint, int lockflags);
 extern void	knote_fork(struct knlist *list, int pid);
+extern struct knlist *knlist_alloc(struct mtx *lock);
+extern void	knlist_detach(struct knlist *knl);
 extern void	knlist_add(struct knlist *knl, struct knote *kn, int islocked);
 extern void	knlist_remove(struct knlist *knl, struct knote *kn, int islocked);
-extern void	knlist_remove_inevent(struct knlist *knl, struct knote *kn);
 extern int	knlist_empty(struct knlist *knl);
 extern void	knlist_init(struct knlist *knl, void *lock,
     void (*kl_lock)(void *), void (*kl_unlock)(void *),

Modified: head/sys/sys/proc.h
==============================================================================
--- head/sys/sys/proc.h	Mon Jun 27 21:50:30 2016	(r302234)
+++ head/sys/sys/proc.h	Mon Jun 27 21:52:17 2016	(r302235)
@@ -611,7 +611,7 @@ struct proc {
 /* End area that is copied on creation. */
 #define	p_endcopy	p_xsig
 	struct pgrp	*p_pgrp;	/* (c + e) Pointer to process group. */
-	struct knlist	p_klist;	/* (c) Knotes attached to this proc. */
+	struct knlist	*p_klist;	/* (c) Knotes attached to this proc. */
 	int		p_numthreads;	/* (c) Number of threads. */
 	struct mdproc	p_md;		/* Any machine-dependent fields. */
 	struct callout	p_itcallout;	/* (h + c) Interval timer callout. */


More information about the svn-src-head mailing list