poll()-ing a pipe descriptor, watching for POLLHUP

Kostik Belousov kostikbel at gmail.com
Wed Jun 3 15:07:15 UTC 2009


On Wed, Jun 03, 2009 at 05:30:51PM +0300, Kostik Belousov wrote:
> On Wed, Jun 03, 2009 at 04:10:34PM +0300, Vlad Galu wrote:
> > Hm, I was having an issue with an internal piece of software, but
> > never checked what kind of pipe caused the problem. Turns out it was a
> > FIFO, and I got bitten by the same bug described here:
> > http://lists.freebsd.org/pipermail/freebsd-bugs/2006-March/017591.html
> > 
> > The problem is that the reader process isn't notified when the writer
> > process exits or closes the FIFO fd...
> 
> So you did found the relevant PR with long audit trail and patches
> attached. You obviously should contact the author of the patches,
> Oliver Fromme, who is FreeBSD committer for some time (CCed).
> 
> I agree that the thing shall be fixed finally. Skimming over the
> patches in kern/94772, I have some doubts about removal of
> POLLINIGNEOF flag. The reason is that we are generally do not
> remove exposed user interfaces.

I forward-ported Bruce' patch to the CURRENT. It passes the tests
from tools/regression/fifo and a test from kern/94772.
For my liking, I did not removed POLLINIGNEOF.

diff --git a/sys/fs/fifofs/fifo_vnops.c b/sys/fs/fifofs/fifo_vnops.c
index 66963bc..7e279ca 100644
--- a/sys/fs/fifofs/fifo_vnops.c
+++ b/sys/fs/fifofs/fifo_vnops.c
@@ -226,11 +226,47 @@ fail1:
 	if (ap->a_mode & FREAD) {
 		fip->fi_readers++;
 		if (fip->fi_readers == 1) {
+			SOCKBUF_LOCK(&fip->fi_readsock->so_rcv);
+			if (fip->fi_writers > 0)
+				fip->fi_readsock->so_rcv.sb_state |=
+				    SBS_COULDRCV;
+			else
+				/*
+				 * Sloppy?  Might be necessary to clear it
+				 * in all the places where fi_readers is
+				 * decremented to 0.  I think only writers
+				 * polling for input could be confused by
+				 * having it not set, and there is a problem
+				 * with these anyway now that we have
+				 * reversed the sense of the flag -- they
+				 * now block (?), but shouldn't.
+				 */
+				fip->fi_readsock->so_rcv.sb_state &=
+				    ~SBS_COULDRCV;
+			SOCKBUF_UNLOCK(&fip->fi_readsock->so_rcv);
 			SOCKBUF_LOCK(&fip->fi_writesock->so_snd);
 			fip->fi_writesock->so_snd.sb_state &= ~SBS_CANTSENDMORE;
 			SOCKBUF_UNLOCK(&fip->fi_writesock->so_snd);
 			if (fip->fi_writers > 0) {
 				wakeup(&fip->fi_writers);
+			SOCKBUF_LOCK(&fip->fi_readsock->so_rcv);
+			if (fip->fi_writers > 0)
+				fip->fi_readsock->so_rcv.sb_state |=
+				    SBS_COULDRCV;
+			else
+				/*
+				 * Sloppy?  Might be necessary to clear it
+				 * in all the places where fi_readers is
+				 * decremented to 0.  I think only writers
+				 * polling for input could be confused by
+				 * having it not set, and there is a problem
+				 * with these anyway now that we have
+				 * reversed the sense of the flag -- they
+				 * now block (?), but shouldn't.
+				 */
+				fip->fi_readsock->so_rcv.sb_state &=
+				    ~SBS_COULDRCV;
+			SOCKBUF_UNLOCK(&fip->fi_readsock->so_rcv);
 				sowwakeup(fip->fi_writesock);
 			}
 		}
@@ -244,6 +280,7 @@ fail1:
 		if (fip->fi_writers == 1) {
 			SOCKBUF_LOCK(&fip->fi_readsock->so_rcv);
 			fip->fi_readsock->so_rcv.sb_state &= ~SBS_CANTRCVMORE;
+			fip->fi_readsock->so_rcv.sb_state |= SBS_COULDRCV;
 			SOCKBUF_UNLOCK(&fip->fi_readsock->so_rcv);
 			if (fip->fi_readers > 0) {
 				wakeup(&fip->fi_readers);
@@ -672,28 +709,10 @@ fifo_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td)
 	levents = events &
 	    (POLLIN | POLLINIGNEOF | POLLPRI | POLLRDNORM | POLLRDBAND);
 	if ((fp->f_flag & FREAD) && levents) {
-		/*
-		 * If POLLIN or POLLRDNORM is requested and POLLINIGNEOF is
-		 * not, then convert the first two to the last one.  This
-		 * tells the socket poll function to ignore EOF so that we
-		 * block if there is no writer (and no data).  Callers can
-		 * set POLLINIGNEOF to get non-blocking behavior.
-		 */
-		if (levents & (POLLIN | POLLRDNORM) &&
-		    !(levents & POLLINIGNEOF)) {
-			levents &= ~(POLLIN | POLLRDNORM);
-			levents |= POLLINIGNEOF;
-		}
-
 		filetmp.f_data = fip->fi_readsock;
 		filetmp.f_cred = cred;
-		revents |= soo_poll(&filetmp, levents, cred, td);
-
-		/* Reverse the above conversion. */
-		if ((revents & POLLINIGNEOF) && !(events & POLLINIGNEOF)) {
-			revents |= (events & (POLLIN | POLLRDNORM));
-			revents &= ~POLLINIGNEOF;
-		}
+		revents |= soo_poll(&filetmp, levents | (events & POLLPOLL),
+		    cred, td);
 	}
 	levents = events & (POLLOUT | POLLWRNORM | POLLWRBAND);
 	if ((fp->f_flag & FWRITE) && levents) {
diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c
index 616c5b7..98ccc9e 100644
--- a/sys/kern/sys_generic.c
+++ b/sys/kern/sys_generic.c
@@ -1226,8 +1226,8 @@ pollscan(td, fds, nfd)
 				 * POLLERR if appropriate.
 				 */
 				selfdalloc(td, fds);
-				fds->revents = fo_poll(fp, fds->events,
-				    td->td_ucred, td);
+				fds->revents = fo_poll(fp,
+				    fds->events | POLLPOLL, td->td_ucred, td);
 				if (fds->revents != 0)
 					n++;
 			}
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 7341d3f..a13d648 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -2706,6 +2706,42 @@ sopoll_generic(struct socket *so, int events, struct ucred *active_cred,
 		if (sowriteable(so))
 			revents |= events & (POLLOUT | POLLWRNORM);
 
+	/*
+	 * SBS_CANTRCVMORE (which is checked by soreadable()) normally
+	 * implies EOF (and thus readable) and hung up, but for
+	 * compatibility with other systems and to obtain behavior that
+	 * is otherwise unavailable we make the case of poll() on a fifo
+	 * that has never had any writers during the lifetime of any
+	 * current reader special: then we pretend that the fifo is
+	 * unreadable unless it contains non-null data, and that it is
+	 * not hung up.  The POLLPOLL flag is set by poll() to identify
+	 * poll() here, and the SBS_COULDRCV flag is set by the fifo
+	 * layer to indicate a fifo that is not in the special state.
+	 */
+	if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
+		if (!(events & POLLPOLL) || so->so_rcv.sb_state & SBS_COULDRCV)
+			revents |= POLLHUP;	/* finish settings */
+		else if (!(so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
+		    !TAILQ_EMPTY(&so->so_comp) || so->so_error))
+			revents &= ~(POLLIN | POLLRDNORM); /* undo settings */
+	}
+
+	/*
+	 * Testing of hangup for writers could be optimized by combining
+	 * it with testing for writeability, but we keep the test separate
+	 * and with the same organization as the test for readers for
+	 * clarity.  Note that writeable implies not hung up, so if POLLHUP
+	 * is set here then (POLLOUT | POLLWRNORM) is not set above, as
+	 * standards require.  Less obviously, if POLLHUP was set above for
+	 * a reader, then the output flags cannot have been set above for
+	 * a writer.  Even less obviously, we cannot end up with both
+	 * POLLHUP output flags set in revents after ORing the revents for
+	 * the read and write socket in fifo_poll().
+	 */
+	if (so->so_snd.sb_state & SBS_CANTSENDMORE)
+		revents |= POLLHUP;
+
+
 	if (events & (POLLPRI | POLLRDBAND))
 		if (so->so_oobmark || (so->so_rcv.sb_state & SBS_RCVATMARK))
 			revents |= events & (POLLPRI | POLLRDBAND);
diff --git a/sys/sys/poll.h b/sys/sys/poll.h
index c955f32..cfd5f01 100644
--- a/sys/sys/poll.h
+++ b/sys/sys/poll.h
@@ -71,6 +71,10 @@ struct pollfd {
 #define	POLLINIGNEOF	0x2000		/* like POLLIN, except ignore EOF */
 #endif
 
+#ifdef _KERNEL
+#define	POLLPOLL	0x8000		/* system call is actually poll() */
+#endif
+
 /*
  * These events are set if they occur regardless of whether they were
  * requested.
diff --git a/sys/sys/sockbuf.h b/sys/sys/sockbuf.h
index b8e6699..0da4fa1 100644
--- a/sys/sys/sockbuf.h
+++ b/sys/sys/sockbuf.h
@@ -56,6 +56,7 @@
 #define	SBS_CANTSENDMORE	0x0010	/* can't send more data to peer */
 #define	SBS_CANTRCVMORE		0x0020	/* can't receive more data from peer */
 #define	SBS_RCVATMARK		0x0040	/* at mark on input */
+#define	SBS_COULDRCV		0x0080	/* could receive previously (or now) */
 
 struct mbuf;
 struct sockaddr;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 195 bytes
Desc: not available
Url : http://lists.freebsd.org/pipermail/freebsd-stable/attachments/20090603/3036444f/attachment.pgp


More information about the freebsd-stable mailing list