svn commit: r295012 - in head: sys/kern sys/sys tests/sys/kqueue
Eric van Gyzen
vangyzen at FreeBSD.org
Thu Jan 28 20:24:17 UTC 2016
Author: vangyzen
Date: Thu Jan 28 20:24:15 2016
New Revision: 295012
URL: https://svnweb.freebsd.org/changeset/base/295012
Log:
kqueue EVFILT_PROC: avoid collision between NOTE_CHILD and NOTE_EXIT
NOTE_CHILD and NOTE_EXIT return something in kevent.data: the parent
pid (ppid) for NOTE_CHILD and the exit status for NOTE_EXIT.
Do not let the two events be combined, since one would overwrite
the other's data.
PR: 180385
Submitted by: David A. Bright <david_a_bright at dell.com>
Reviewed by: jhb
MFC after: 1 month
Sponsored by: Dell Inc.
Differential Revision: https://reviews.freebsd.org/D4900
Modified:
head/sys/kern/kern_event.c
head/sys/sys/event.h
head/tests/sys/kqueue/common.h
head/tests/sys/kqueue/main.c
head/tests/sys/kqueue/proc.c
Modified: head/sys/kern/kern_event.c
==============================================================================
--- head/sys/kern/kern_event.c Thu Jan 28 20:21:15 2016 (r295011)
+++ head/sys/kern/kern_event.c Thu Jan 28 20:24:15 2016 (r295012)
@@ -373,11 +373,21 @@ filt_procattach(struct knote *kn)
kn->kn_flags |= EV_CLEAR; /* automatically set */
/*
- * internal flag indicating registration done by kernel
+ * Internal flag indicating registration done by kernel for the
+ * purposes of getting a NOTE_CHILD notification.
*/
- if (kn->kn_flags & EV_FLAG1) {
+ if (kn->kn_flags & EV_FLAG2) {
+ kn->kn_flags &= ~EV_FLAG2;
kn->kn_data = kn->kn_sdata; /* ppid */
kn->kn_fflags = NOTE_CHILD;
+ kn->kn_sfflags &= ~NOTE_EXIT;
+ immediate = 1; /* Force immediate activation of child note. */
+ }
+ /*
+ * Internal flag indicating registration done by kernel (for other than
+ * NOTE_CHILD).
+ */
+ if (kn->kn_flags & EV_FLAG1) {
kn->kn_flags &= ~EV_FLAG1;
}
@@ -385,9 +395,10 @@ filt_procattach(struct knote *kn)
knlist_add(&p->p_klist, kn, 1);
/*
- * Immediately activate any exit notes if the target process is a
- * zombie. This is necessary to handle the case where the target
- * process, e.g. a child, dies before the kevent is registered.
+ * Immediately activate any child notes or, in the case of a zombie
+ * target process, exit notes. The latter is necessary to handle the
+ * case where the target process, e.g. a child, dies before the kevent
+ * is registered.
*/
if (immediate && filt_proc(kn, NOTE_EXIT))
KNOTE_ACTIVATE(kn, 0);
@@ -495,7 +506,7 @@ knote_fork(struct knlist *list, int pid)
/*
* The NOTE_TRACK case. In addition to the activation
- * of the event, we need to register new event to
+ * of the event, we need to register new events to
* track the child. Drop the locks in preparation for
* the call to kqueue_register().
*/
@@ -504,8 +515,27 @@ knote_fork(struct knlist *list, int pid)
list->kl_unlock(list->kl_lockarg);
/*
- * Activate existing knote and register a knote with
+ * Activate existing knote and register tracking knotes with
* new process.
+ *
+ * First register a knote to get just the child notice. This
+ * must be a separate note from a potential NOTE_EXIT
+ * notification since both NOTE_CHILD and NOTE_EXIT are defined
+ * to use the data field (in conflicting ways).
+ */
+ kev.ident = pid;
+ kev.filter = kn->kn_filter;
+ kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_ONESHOT | EV_FLAG2;
+ kev.fflags = kn->kn_sfflags;
+ kev.data = kn->kn_id; /* parent */
+ kev.udata = kn->kn_kevent.udata;/* preserve udata */
+ error = kqueue_register(kq, &kev, NULL, 0);
+ if (error)
+ kn->kn_fflags |= NOTE_TRACKERR;
+
+ /*
+ * Then register another knote to track other potential events
+ * from the new process.
*/
kev.ident = pid;
kev.filter = kn->kn_filter;
@@ -1129,7 +1159,7 @@ findkn:
if (fp->f_type == DTYPE_KQUEUE) {
/*
- * if we add some inteligence about what we are doing,
+ * If we add some intelligence about what we are doing,
* we should be able to support events on ourselves.
* We need to know when we are doing this to prevent
* getting both the knlist lock and the kq lock since
@@ -1161,7 +1191,18 @@ findkn:
kqueue_expand(kq, fops, kev->ident, waitok);
KQ_LOCK(kq);
- if (kq->kq_knhashmask != 0) {
+
+ /*
+ * If possible, find an existing knote to use for this kevent.
+ */
+ if (kev->filter == EVFILT_PROC &&
+ (kev->flags & (EV_FLAG1 | EV_FLAG2)) != 0) {
+ /* This is an internal creation of a process tracking
+ * note. Don't attempt to coalesce this with an
+ * existing note.
+ */
+ ;
+ } else if (kq->kq_knhashmask != 0) {
struct klist *list;
list = &kq->kq_knhash[
@@ -1173,7 +1214,7 @@ findkn:
}
}
- /* knote is in the process of changing, wait for it to stablize. */
+ /* knote is in the process of changing, wait for it to stabilize. */
if (kn != NULL && (kn->kn_status & KN_INFLUX) == KN_INFLUX) {
KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
if (filedesc_unlock) {
Modified: head/sys/sys/event.h
==============================================================================
--- head/sys/sys/event.h Thu Jan 28 20:21:15 2016 (r295011)
+++ head/sys/sys/event.h Thu Jan 28 20:24:15 2016 (r295012)
@@ -80,6 +80,7 @@ struct kevent {
#define EV_SYSFLAGS 0xF000 /* reserved by system */
#define EV_DROP 0x1000 /* note should be dropped */
#define EV_FLAG1 0x2000 /* filter-specific flag */
+#define EV_FLAG2 0x4000 /* filter-specific flag */
/* returned values */
#define EV_EOF 0x8000 /* EOF detected */
Modified: head/tests/sys/kqueue/common.h
==============================================================================
--- head/tests/sys/kqueue/common.h Thu Jan 28 20:21:15 2016 (r295011)
+++ head/tests/sys/kqueue/common.h Thu Jan 28 20:24:15 2016 (r295012)
@@ -46,6 +46,7 @@ int vnode_fd;
extern const char * kevent_to_str(struct kevent *);
struct kevent * kevent_get(int);
+struct kevent * kevent_get_timeout(int, int);
void kevent_cmp(struct kevent *, struct kevent *);
Modified: head/tests/sys/kqueue/main.c
==============================================================================
--- head/tests/sys/kqueue/main.c Thu Jan 28 20:21:15 2016 (r295011)
+++ head/tests/sys/kqueue/main.c Thu Jan 28 20:24:15 2016 (r295012)
@@ -69,6 +69,28 @@ kevent_get(int kqfd)
return (kev);
}
+/* Retrieve a single kevent, specifying a maximum time to wait for it. */
+struct kevent *
+kevent_get_timeout(int kqfd, int seconds)
+{
+ int nfds;
+ struct kevent *kev;
+ struct timespec timeout = {seconds, 0};
+
+ if ((kev = calloc(1, sizeof(*kev))) == NULL)
+ err(1, "out of memory");
+
+ nfds = kevent(kqfd, NULL, 0, kev, 1, &timeout);
+ if (nfds < 0) {
+ err(1, "kevent(2)");
+ } else if (nfds == 0) {
+ free(kev);
+ kev = NULL;
+ }
+
+ return (kev);
+}
+
char *
kevent_fflags_dump(struct kevent *kev)
{
@@ -82,25 +104,39 @@ kevent_fflags_dump(struct kevent *kev)
abort();
/* Not every filter has meaningful fflags */
- if (kev->filter != EVFILT_VNODE) {
- snprintf(buf, 1024, "fflags = %d", kev->fflags);
- return (buf);
- }
-
- snprintf(buf, 1024, "fflags = %d (", kev->fflags);
- KEVFFL_DUMP(NOTE_DELETE);
- KEVFFL_DUMP(NOTE_WRITE);
- KEVFFL_DUMP(NOTE_EXTEND);
+ if (kev->filter == EVFILT_PROC) {
+ snprintf(buf, 1024, "fflags = %x (", kev->fflags);
+ KEVFFL_DUMP(NOTE_EXIT);
+ KEVFFL_DUMP(NOTE_FORK);
+ KEVFFL_DUMP(NOTE_EXEC);
+ KEVFFL_DUMP(NOTE_CHILD);
+ KEVFFL_DUMP(NOTE_TRACKERR);
+ KEVFFL_DUMP(NOTE_TRACK);
+ buf[strlen(buf) - 1] = ')';
+ } else if (kev->filter == EVFILT_PROCDESC) {
+ snprintf(buf, 1024, "fflags = %x (", kev->fflags);
+ KEVFFL_DUMP(NOTE_EXIT);
+ KEVFFL_DUMP(NOTE_FORK);
+ KEVFFL_DUMP(NOTE_EXEC);
+ buf[strlen(buf) - 1] = ')';
+ } else if (kev->filter == EVFILT_VNODE) {
+ snprintf(buf, 1024, "fflags = %x (", kev->fflags);
+ KEVFFL_DUMP(NOTE_DELETE);
+ KEVFFL_DUMP(NOTE_WRITE);
+ KEVFFL_DUMP(NOTE_EXTEND);
#if HAVE_NOTE_TRUNCATE
- KEVFFL_DUMP(NOTE_TRUNCATE);
+ KEVFFL_DUMP(NOTE_TRUNCATE);
#endif
- KEVFFL_DUMP(NOTE_ATTRIB);
- KEVFFL_DUMP(NOTE_LINK);
- KEVFFL_DUMP(NOTE_RENAME);
+ KEVFFL_DUMP(NOTE_ATTRIB);
+ KEVFFL_DUMP(NOTE_LINK);
+ KEVFFL_DUMP(NOTE_RENAME);
#if HAVE_NOTE_REVOKE
- KEVFFL_DUMP(NOTE_REVOKE);
+ KEVFFL_DUMP(NOTE_REVOKE);
#endif
- buf[strlen(buf) - 1] = ')';
+ buf[strlen(buf) - 1] = ')';
+ } else {
+ snprintf(buf, 1024, "fflags = %x", kev->fflags);
+ }
return (buf);
}
@@ -260,6 +296,15 @@ main(int argc, char **argv)
argc--;
}
+ /*
+ * Some tests fork. If output is fully buffered,
+ * the children inherit some buffered data and flush
+ * it when they exit, causing some data to be printed twice.
+ * Use line buffering to avoid this problem.
+ */
+ setlinebuf(stdout);
+ setlinebuf(stderr);
+
test_kqueue();
test_kqueue_close();
Modified: head/tests/sys/kqueue/proc.c
==============================================================================
--- head/tests/sys/kqueue/proc.c Thu Jan 28 20:21:15 2016 (r295011)
+++ head/tests/sys/kqueue/proc.c Thu Jan 28 20:24:15 2016 (r295012)
@@ -74,6 +74,172 @@ add_and_delete(void)
}
+static void
+proc_track(int sleep_time)
+{
+ char test_id[64];
+ struct kevent kev;
+ pid_t pid;
+ int pipe_fd[2];
+ ssize_t result;
+
+ snprintf(test_id, sizeof(test_id),
+ "kevent(EVFILT_PROC, NOTE_TRACK); sleep %d", sleep_time);
+ test_begin(test_id);
+ test_no_kevents();
+
+ if (pipe(pipe_fd)) {
+ err(1, "pipe (parent) failed! (%s() at %s:%d)",
+ __func__, __FILE__, __LINE__);
+ }
+
+ /* Create a child to track. */
+ pid = fork();
+ if (pid == 0) { /* Child */
+ pid_t grandchild = -1;
+
+ /*
+ * Give the parent a chance to start tracking us.
+ */
+ result = read(pipe_fd[1], test_id, 1);
+ if (result != 1) {
+ err(1, "read from pipe in child failed! (ret %zd) (%s() at %s:%d)",
+ result, __func__, __FILE__, __LINE__);
+ }
+
+ /*
+ * Spawn a grandchild that will immediately exit. If the kernel has bug
+ * 180385, the parent will see a kevent with both NOTE_CHILD and
+ * NOTE_EXIT. If that bug is fixed, it will see two separate kevents
+ * for those notes. Note that this triggers the conditions for
+ * detecting the bug quite reliably on a 1 CPU system (or if the test
+ * process is restricted to a single CPU), but may not trigger it on a
+ * multi-CPU system.
+ */
+ grandchild = fork();
+ if (grandchild == 0) { /* Grandchild */
+ if (sleep_time) sleep(sleep_time);
+ exit(1);
+ } else if (grandchild == -1) { /* Error */
+ err(1, "fork (grandchild) failed! (%s() at %s:%d)",
+ __func__, __FILE__, __LINE__);
+ } else { /* Child (Grandchild Parent) */
+ printf(" -- grandchild created (pid %d)\n", (int) grandchild);
+ }
+ if (sleep_time) sleep(sleep_time);
+ exit(0);
+ } else if (pid == -1) { /* Error */
+ err(1, "fork (child) failed! (%s() at %s:%d)",
+ __func__, __FILE__, __LINE__);
+ }
+
+ printf(" -- child created (pid %d)\n", (int) pid);
+
+ kevent_add(kqfd, &kev, pid, EVFILT_PROC, EV_ADD | EV_ENABLE,
+ NOTE_TRACK | NOTE_EXEC | NOTE_EXIT | NOTE_FORK,
+ 0, NULL);
+
+ printf(" -- tracking child (pid %d)\n", (int) pid);
+
+ /* Now that we're tracking the child, tell it to proceed. */
+ result = write(pipe_fd[0], test_id, 1);
+ if (result != 1) {
+ err(1, "write to pipe in parent failed! (ret %zd) (%s() at %s:%d)",
+ result, __func__, __FILE__, __LINE__);
+ }
+
+ /*
+ * Several events should be received:
+ * - NOTE_FORK (from child)
+ * - NOTE_CHILD (from grandchild)
+ * - NOTE_EXIT (from grandchild)
+ * - NOTE_EXIT (from child)
+ *
+ * The NOTE_FORK and NOTE_EXIT from the child could be combined into a
+ * single event, but the NOTE_CHILD and NOTE_EXIT from the grandchild must
+ * not be combined.
+ *
+ * The loop continues until no events are received within a 5 second
+ * period, at which point it is assumed that no more will be coming. The
+ * loop is deliberately designed to attempt to get events even after all
+ * the expected ones are received in case some spurious events are
+ * generated as well as the expected ones.
+ */
+ {
+ int child_exit = 0;
+ int child_fork = 0;
+ int gchild_exit = 0;
+ int gchild_note = 0;
+ pid_t gchild_pid = -1;
+ int done = 0;
+
+ while (!done)
+ {
+ int handled = 0;
+ struct kevent *kevp;
+
+ kevp = kevent_get_timeout(kqfd, 5);
+ if (kevp == NULL) {
+ done = 1;
+ } else {
+ printf(" -- Received kevent: %s\n", kevent_to_str(kevp));
+
+ if ((kevp->fflags & NOTE_CHILD) && (kevp->fflags & NOTE_EXIT)) {
+ errx(1, "NOTE_CHILD and NOTE_EXIT in same kevent: %s", kevent_to_str(kevp));
+ }
+
+ if (kevp->fflags & NOTE_CHILD) {
+ if (kevp->data == pid) {
+ if (!gchild_note) {
+ ++gchild_note;
+ gchild_pid = kevp->ident;
+ ++handled;
+ } else {
+ errx(1, "Spurious NOTE_CHILD: %s", kevent_to_str(kevp));
+ }
+ }
+ }
+
+ if (kevp->fflags & NOTE_EXIT) {
+ if ((kevp->ident == pid) && (!child_exit)) {
+ ++child_exit;
+ ++handled;
+ } else if ((kevp->ident == gchild_pid) && (!gchild_exit)) {
+ ++gchild_exit;
+ ++handled;
+ } else {
+ errx(1, "Spurious NOTE_EXIT: %s", kevent_to_str(kevp));
+ }
+ }
+
+ if (kevp->fflags & NOTE_FORK) {
+ if ((kevp->ident == pid) && (!child_fork)) {
+ ++child_fork;
+ ++handled;
+ } else {
+ errx(1, "Spurious NOTE_FORK: %s", kevent_to_str(kevp));
+ }
+ }
+
+ if (!handled) {
+ errx(1, "Spurious kevent: %s", kevent_to_str(kevp));
+ }
+
+ free(kevp);
+ }
+ }
+
+ /* Make sure all expected events were received. */
+ if (child_exit && child_fork && gchild_exit && gchild_note) {
+ printf(" -- Received all expected events.\n");
+ } else {
+ errx(1, "Did not receive all expected events.");
+ }
+ }
+
+ success();
+}
+
#ifdef TODO
static void
event_trigger(void)
@@ -236,6 +402,8 @@ test_evfilt_proc()
signal(SIGUSR1, sig_handler);
add_and_delete();
+ proc_track(0); /* Run without sleeping before children exit. */
+ proc_track(1); /* Sleep a bit in the children before exiting. */
#if TODO
event_trigger();
More information about the svn-src-head
mailing list