poll()-ing a pipe descriptor, watching for POLLHUP
Kostik Belousov
kostikbel at gmail.com
Wed Jun 3 15:07:15 UTC 2009
On Wed, Jun 03, 2009 at 05:30:51PM +0300, Kostik Belousov wrote:
> On Wed, Jun 03, 2009 at 04:10:34PM +0300, Vlad Galu wrote:
> > Hm, I was having an issue with an internal piece of software, but
> > never checked what kind of pipe caused the problem. Turns out it was a
> > FIFO, and I got bitten by the same bug described here:
> > http://lists.freebsd.org/pipermail/freebsd-bugs/2006-March/017591.html
> >
> > The problem is that the reader process isn't notified when the writer
> > process exits or closes the FIFO fd...
>
> So you did found the relevant PR with long audit trail and patches
> attached. You obviously should contact the author of the patches,
> Oliver Fromme, who is FreeBSD committer for some time (CCed).
>
> I agree that the thing shall be fixed finally. Skimming over the
> patches in kern/94772, I have some doubts about removal of
> POLLINIGNEOF flag. The reason is that we are generally do not
> remove exposed user interfaces.
I forward-ported Bruce' patch to the CURRENT. It passes the tests
from tools/regression/fifo and a test from kern/94772.
For my liking, I did not removed POLLINIGNEOF.
diff --git a/sys/fs/fifofs/fifo_vnops.c b/sys/fs/fifofs/fifo_vnops.c
index 66963bc..7e279ca 100644
--- a/sys/fs/fifofs/fifo_vnops.c
+++ b/sys/fs/fifofs/fifo_vnops.c
@@ -226,11 +226,47 @@ fail1:
if (ap->a_mode & FREAD) {
fip->fi_readers++;
if (fip->fi_readers == 1) {
+ SOCKBUF_LOCK(&fip->fi_readsock->so_rcv);
+ if (fip->fi_writers > 0)
+ fip->fi_readsock->so_rcv.sb_state |=
+ SBS_COULDRCV;
+ else
+ /*
+ * Sloppy? Might be necessary to clear it
+ * in all the places where fi_readers is
+ * decremented to 0. I think only writers
+ * polling for input could be confused by
+ * having it not set, and there is a problem
+ * with these anyway now that we have
+ * reversed the sense of the flag -- they
+ * now block (?), but shouldn't.
+ */
+ fip->fi_readsock->so_rcv.sb_state &=
+ ~SBS_COULDRCV;
+ SOCKBUF_UNLOCK(&fip->fi_readsock->so_rcv);
SOCKBUF_LOCK(&fip->fi_writesock->so_snd);
fip->fi_writesock->so_snd.sb_state &= ~SBS_CANTSENDMORE;
SOCKBUF_UNLOCK(&fip->fi_writesock->so_snd);
if (fip->fi_writers > 0) {
wakeup(&fip->fi_writers);
+ SOCKBUF_LOCK(&fip->fi_readsock->so_rcv);
+ if (fip->fi_writers > 0)
+ fip->fi_readsock->so_rcv.sb_state |=
+ SBS_COULDRCV;
+ else
+ /*
+ * Sloppy? Might be necessary to clear it
+ * in all the places where fi_readers is
+ * decremented to 0. I think only writers
+ * polling for input could be confused by
+ * having it not set, and there is a problem
+ * with these anyway now that we have
+ * reversed the sense of the flag -- they
+ * now block (?), but shouldn't.
+ */
+ fip->fi_readsock->so_rcv.sb_state &=
+ ~SBS_COULDRCV;
+ SOCKBUF_UNLOCK(&fip->fi_readsock->so_rcv);
sowwakeup(fip->fi_writesock);
}
}
@@ -244,6 +280,7 @@ fail1:
if (fip->fi_writers == 1) {
SOCKBUF_LOCK(&fip->fi_readsock->so_rcv);
fip->fi_readsock->so_rcv.sb_state &= ~SBS_CANTRCVMORE;
+ fip->fi_readsock->so_rcv.sb_state |= SBS_COULDRCV;
SOCKBUF_UNLOCK(&fip->fi_readsock->so_rcv);
if (fip->fi_readers > 0) {
wakeup(&fip->fi_readers);
@@ -672,28 +709,10 @@ fifo_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td)
levents = events &
(POLLIN | POLLINIGNEOF | POLLPRI | POLLRDNORM | POLLRDBAND);
if ((fp->f_flag & FREAD) && levents) {
- /*
- * If POLLIN or POLLRDNORM is requested and POLLINIGNEOF is
- * not, then convert the first two to the last one. This
- * tells the socket poll function to ignore EOF so that we
- * block if there is no writer (and no data). Callers can
- * set POLLINIGNEOF to get non-blocking behavior.
- */
- if (levents & (POLLIN | POLLRDNORM) &&
- !(levents & POLLINIGNEOF)) {
- levents &= ~(POLLIN | POLLRDNORM);
- levents |= POLLINIGNEOF;
- }
-
filetmp.f_data = fip->fi_readsock;
filetmp.f_cred = cred;
- revents |= soo_poll(&filetmp, levents, cred, td);
-
- /* Reverse the above conversion. */
- if ((revents & POLLINIGNEOF) && !(events & POLLINIGNEOF)) {
- revents |= (events & (POLLIN | POLLRDNORM));
- revents &= ~POLLINIGNEOF;
- }
+ revents |= soo_poll(&filetmp, levents | (events & POLLPOLL),
+ cred, td);
}
levents = events & (POLLOUT | POLLWRNORM | POLLWRBAND);
if ((fp->f_flag & FWRITE) && levents) {
diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c
index 616c5b7..98ccc9e 100644
--- a/sys/kern/sys_generic.c
+++ b/sys/kern/sys_generic.c
@@ -1226,8 +1226,8 @@ pollscan(td, fds, nfd)
* POLLERR if appropriate.
*/
selfdalloc(td, fds);
- fds->revents = fo_poll(fp, fds->events,
- td->td_ucred, td);
+ fds->revents = fo_poll(fp,
+ fds->events | POLLPOLL, td->td_ucred, td);
if (fds->revents != 0)
n++;
}
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 7341d3f..a13d648 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -2706,6 +2706,42 @@ sopoll_generic(struct socket *so, int events, struct ucred *active_cred,
if (sowriteable(so))
revents |= events & (POLLOUT | POLLWRNORM);
+ /*
+ * SBS_CANTRCVMORE (which is checked by soreadable()) normally
+ * implies EOF (and thus readable) and hung up, but for
+ * compatibility with other systems and to obtain behavior that
+ * is otherwise unavailable we make the case of poll() on a fifo
+ * that has never had any writers during the lifetime of any
+ * current reader special: then we pretend that the fifo is
+ * unreadable unless it contains non-null data, and that it is
+ * not hung up. The POLLPOLL flag is set by poll() to identify
+ * poll() here, and the SBS_COULDRCV flag is set by the fifo
+ * layer to indicate a fifo that is not in the special state.
+ */
+ if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
+ if (!(events & POLLPOLL) || so->so_rcv.sb_state & SBS_COULDRCV)
+ revents |= POLLHUP; /* finish settings */
+ else if (!(so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
+ !TAILQ_EMPTY(&so->so_comp) || so->so_error))
+ revents &= ~(POLLIN | POLLRDNORM); /* undo settings */
+ }
+
+ /*
+ * Testing of hangup for writers could be optimized by combining
+ * it with testing for writeability, but we keep the test separate
+ * and with the same organization as the test for readers for
+ * clarity. Note that writeable implies not hung up, so if POLLHUP
+ * is set here then (POLLOUT | POLLWRNORM) is not set above, as
+ * standards require. Less obviously, if POLLHUP was set above for
+ * a reader, then the output flags cannot have been set above for
+ * a writer. Even less obviously, we cannot end up with both
+ * POLLHUP output flags set in revents after ORing the revents for
+ * the read and write socket in fifo_poll().
+ */
+ if (so->so_snd.sb_state & SBS_CANTSENDMORE)
+ revents |= POLLHUP;
+
+
if (events & (POLLPRI | POLLRDBAND))
if (so->so_oobmark || (so->so_rcv.sb_state & SBS_RCVATMARK))
revents |= events & (POLLPRI | POLLRDBAND);
diff --git a/sys/sys/poll.h b/sys/sys/poll.h
index c955f32..cfd5f01 100644
--- a/sys/sys/poll.h
+++ b/sys/sys/poll.h
@@ -71,6 +71,10 @@ struct pollfd {
#define POLLINIGNEOF 0x2000 /* like POLLIN, except ignore EOF */
#endif
+#ifdef _KERNEL
+#define POLLPOLL 0x8000 /* system call is actually poll() */
+#endif
+
/*
* These events are set if they occur regardless of whether they were
* requested.
diff --git a/sys/sys/sockbuf.h b/sys/sys/sockbuf.h
index b8e6699..0da4fa1 100644
--- a/sys/sys/sockbuf.h
+++ b/sys/sys/sockbuf.h
@@ -56,6 +56,7 @@
#define SBS_CANTSENDMORE 0x0010 /* can't send more data to peer */
#define SBS_CANTRCVMORE 0x0020 /* can't receive more data from peer */
#define SBS_RCVATMARK 0x0040 /* at mark on input */
+#define SBS_COULDRCV 0x0080 /* could receive previously (or now) */
struct mbuf;
struct sockaddr;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 195 bytes
Desc: not available
Url : http://lists.freebsd.org/pipermail/freebsd-stable/attachments/20090603/3036444f/attachment.pgp
More information about the freebsd-stable
mailing list