standards/116221: SUS issue -- FreeBSD has not flag WNOWAIT for wait*() calls

Jukka Ukkonen jau at
Sun Sep 9 00:30:08 PDT 2007

>Number:         116221
>Category:       standards
>Synopsis:       SUS issue -- FreeBSD has not flag WNOWAIT for wait*() calls
>Confidential:   no
>Severity:       non-critical
>Priority:       medium
>Responsible:    freebsd-standards
>State:          open
>Class:          change-request
>Submitter-Id:   current-users
>Arrival-Date:   Sun Sep 09 07:30:07 GMT 2007
>Originator:     Jukka Ukkonen
>Release:        6.2-STABLE
FreeBSD mjolnir 6.2-STABLE FreeBSD 6.2-STABLE #0: Sun Sep  2 12:24:49 EET DST 2007     root at mjolnir:/usr/obj/usr/src/sys/Mjolnir  i386
This is a complete rewrite of kern/109946 which was initially and in error
categorized as a kern issue.
Obviously this is much more a standards or compatibility issue.

FreeBSD (and other BSDs) has had no WNOWAIT flag for the wait*() functions
to tell the kernel not to remove the proc structure from which the exit status
is returned.
Sometimes one has only a limited set of functions which are aware of a certain
child process. In threaded programs this is even more probable than when using
traditional single threaded programs.
To avoid a situation in which parts of a program removing unknown proc
structures before another part which expects to be able to call wait*()
to collect such an exit status it is worthwhile to have a flag allowing
the program to first peek inside the structure without deleting the zombie
from the process table.
One can always call e.g. waitpid() to remove exactly one known zombie
when one finds out a known child process has terminated. If one OTOH finds
a zombie the PID of which is unknown to the caller, one can always just
give up waiting instead of calling waitpid() with the recently found unknown

Removing a zombie before the functions knowing of the existence of a "hidden"
child have a chance to wait for it makes other parts within a program
unable to confirm the fate of such a child process.

WNOWAIT is a SUS issue anyhow, and e.g. Sun/Solaris has had support for it
for quite a long time.
This is very quick and simple to change to become more compatible with other
UNIX style systems.

Here are minimal tools to test whether WNOWAIT works on your system.

#include <sys/types.h>
#include <sys/wait.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>

#  define RUSAGE_SELF	    0

#  define RUSAGE_CHILDREN   -1

dump_rusage (file, rusage, header)
    FILE	    *file;
    struct rusage   *rusage;
    const char	    *header;
    if (! file || ! rusage) {
	errno = EFAULT;

	return (-1);

    if (! header || ! *header)
	header = "Resource utilization:";

    fprintf (file, "%s%s\n",
	     ((header[strlen (header) - 1] == ':') ? "" : ":"));

    fprintf (file, "\tuser time:\t\t\t%lu.%.6lu\n",
	     rusage->ru_utime.tv_sec, rusage->ru_utime.tv_usec);

    fprintf (file, "\tsystem time:\t\t\t%lu.%.6lu\n",
	     rusage->ru_stime.tv_sec, rusage->ru_stime.tv_usec);

    fprintf (file, "\tmax resident set size:\t\t%ld\n", rusage->ru_maxrss);
    fprintf (file, "\tshared text memory size:\t%ld\n", rusage->ru_ixrss);
    fprintf (file, "\tunshared data size:\t\t%ld\n", rusage->ru_idrss);
    fprintf (file, "\tunshared stack size:\t\t%ld\n", rusage->ru_isrss);

    fprintf (file, "\tpage reclaims:\t\t\t%ld\n", rusage->ru_minflt);
    fprintf (file, "\tpage faults:\t\t\t%ld\n", rusage->ru_majflt);
    fprintf (file, "\tswaps:\t\t\t\t%ld\n", rusage->ru_nswap);
    fprintf (file, "\tblock input operations:\t\t%ld\n", rusage->ru_inblock);
    fprintf (file, "\tblock output operations:\t%ld\n", rusage->ru_oublock);

    fprintf (file, "\tmessages sent:\t\t\t%ld\n", rusage->ru_msgsnd);
    fprintf (file, "\tmessages received:\t\t%ld\n", rusage->ru_msgrcv);

    fprintf (file, "\tsignals received:\t\t%ld\n", rusage->ru_nsignals);
    fprintf (file, "\tvoluntary context switches:\t%ld\n", rusage->ru_nvcsw);
    fprintf (file,
	     "\tinvoluntary context switches:\t%ld\n", rusage->ru_nivcsw);

    return (0);

#ifndef	WNOWAIT
#  warning WNOWAIT undefined and will be defined as 0.
#  define WNOWAIT   0

main (ac, av)
    int	    ac;
    char    *av[];
    pid_t   child;
    pid_t   pid;
    int	    status;
    struct rusage   res;

    child = fork ();

    if (child < 0) {
	perror ("fork()");
	exit (-1);

    if (! child) {
	_exit (0);

    pid = wait4 (-1, &status, WNOWAIT, &res);

    if (pid < 0) {
	perror ("wait4()");
	pid = -1;

    if (pid != child) {
	fprintf (stderr, "wait4(): pid != child\n");

    printf ("PID = %d ...\n", (int) pid);
    dump_rusage (stdout, &res, NULL);

    pid = wait4 (child, &status, 0, &res);

    if (pid < 0) {
	perror ("wait4()");
    else {
	printf ("PID = %d ...\n", (int) pid);
	dump_rusage (stdout, &res, NULL);

    pid = wait4 (child, &status, 0, &res);

    if (pid < 0) {
	perror ("wait4()");
    else {
	printf ("PID = %d ...\n", (int) pid);
	dump_rusage (stdout, &res, NULL);

    return (0);

Apply the attached patch.

I have been running 6-STABLE with this modification since early 2007 with
no ill effects at all.

Patch attached with submission follows:

--- /usr/src/sys/sys/wait.h.orig	2005-01-06 00:19:44.000000000 +0200
+++ /usr/src/sys/sys/wait.h	2007-09-09 05:32:34.000000000 +0300
@@ -78,7 +78,9 @@
 #define	WNOHANG		1	/* Don't hang in wait. */
 #define	WUNTRACED	2	/* Tell about stopped, untraced children. */
+#define	WSTOPPED	WUNTRACED   /* SUS compatibility */
 #define	WCONTINUED	4	/* Report a job control continued process. */
+#define	WNOWAIT		8	/* Poll only. Don't delete the proc entry. */
 #define	WLINUXCLONE 0x80000000	/* Wait for kthread spawned from linux_clone. */
--- /usr/src/sys/kern/kern_exit.c.orig	2007-05-17 16:52:37.000000000 +0300
+++ /usr/src/sys/kern/kern_exit.c	2007-09-09 05:32:34.000000000 +0300
@@ -584,7 +584,7 @@
 		pid = -q->p_pgid;
 		return (EINVAL);
 	if (q->p_flag & P_STATCHILD) {
@@ -644,11 +644,32 @@
 				calcru(p, &rusage->ru_utime, &rusage->ru_stime);
+			if (options & WNOWAIT) {
+				/*
+				 *  SUS compatibility.
+				 *
+				 *  We poll only returning the status.
+				 *  We do not wish to release the proc
+				 *  struct just yet.
+				 *  ==> If another thread created this
+				 *  process, it is sometimes better to
+				 *  leave this one as is for now and let
+				 *  the other thread reap the remnants
+				 *  of the child instead of automatically
+				 *  destroying the proc entry and making
+				 *  it impossible for the other thread to
+				 *  wait for its own child process.
+				 */
+				sx_xunlock(&proctree_lock);
+				return (0);
+			}
 			 * If we got the child via a ptrace 'attach',
 			 * we need to give it back to the old parent.
 			if (p->p_oppid && (t = pfind(p->p_oppid)) != NULL) {
 				p->p_oppid = 0;


