svn commit: r192359 - in user/kmacy/releng_7_2_fcs/sys: kern netncp
netsmb sys
Kip Macy
kmacy at FreeBSD.org
Tue May 19 00:02:59 UTC 2009
Author: kmacy
Date: Tue May 19 00:02:58 2009
New Revision: 192359
URL: http://svn.freebsd.org/changeset/base/192359
Log:
merge 174647
Refactor select to reduce contention and hide internal implementation
details from consumers.
- Track individual selecters on a per-descriptor basis such that there
are no longer collisions and after sleeping for events only those
descriptors which triggered events must be rescaned.
- Protect the selinfo (per descriptor) structure with a mtx pool mutex.
mtx pool mutexes were chosen to preserve api compatibility with
existing code which does nothing but bzero() to setup selinfo
structures.
- Use a per-thread wait channel rather than a global wait channel.
- Hide select implementation details in a seltd structure which is
opaque to the rest of the kernel.
- Provide a 'selsocket' interface for those kernel consumers who wish to
select on a socket when they have no fd so they no longer have to
be aware of select implementation details.
Tested by: kris
Reviewed on: arch
Modified:
user/kmacy/releng_7_2_fcs/sys/kern/kern_event.c
user/kmacy/releng_7_2_fcs/sys/kern/kern_thread.c
user/kmacy/releng_7_2_fcs/sys/kern/sys_generic.c
user/kmacy/releng_7_2_fcs/sys/kern/sys_pipe.c
user/kmacy/releng_7_2_fcs/sys/kern/uipc_sockbuf.c
user/kmacy/releng_7_2_fcs/sys/netncp/ncp_rq.c
user/kmacy/releng_7_2_fcs/sys/netncp/ncp_sock.c
user/kmacy/releng_7_2_fcs/sys/netncp/ncp_sock.h
user/kmacy/releng_7_2_fcs/sys/netsmb/smb_trantcp.c
user/kmacy/releng_7_2_fcs/sys/sys/proc.h
user/kmacy/releng_7_2_fcs/sys/sys/selinfo.h
user/kmacy/releng_7_2_fcs/sys/sys/socketvar.h
user/kmacy/releng_7_2_fcs/sys/sys/systm.h
Modified: user/kmacy/releng_7_2_fcs/sys/kern/kern_event.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/kern/kern_event.c Mon May 18 23:36:11 2009 (r192358)
+++ user/kmacy/releng_7_2_fcs/sys/kern/kern_event.c Tue May 19 00:02:58 2009 (r192359)
@@ -1460,7 +1460,8 @@ kqueue_poll(struct file *fp, int events,
revents |= events & (POLLIN | POLLRDNORM);
} else {
selrecord(td, &kq->kq_sel);
- kq->kq_state |= KQ_SEL;
+ if (SEL_WAITING(&kq->kq_sel))
+ kq->kq_state |= KQ_SEL;
}
}
kqueue_release(kq, 1);
@@ -1553,8 +1554,9 @@ kqueue_close(struct file *fp, struct thr
}
if ((kq->kq_state & KQ_SEL) == KQ_SEL) {
- kq->kq_state &= ~KQ_SEL;
selwakeuppri(&kq->kq_sel, PSOCK);
+ if (!SEL_WAITING(&kq->kq_sel))
+ kq->kq_state &= ~KQ_SEL;
}
KQ_UNLOCK(kq);
@@ -1589,8 +1591,9 @@ kqueue_wakeup(struct kqueue *kq)
wakeup(kq);
}
if ((kq->kq_state & KQ_SEL) == KQ_SEL) {
- kq->kq_state &= ~KQ_SEL;
selwakeuppri(&kq->kq_sel, PSOCK);
+ if (!SEL_WAITING(&kq->kq_sel))
+ kq->kq_state &= ~KQ_SEL;
}
if (!knlist_empty(&kq->kq_sel.si_note))
kqueue_schedtask(kq);
Modified: user/kmacy/releng_7_2_fcs/sys/kern/kern_thread.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/kern/kern_thread.c Mon May 18 23:36:11 2009 (r192358)
+++ user/kmacy/releng_7_2_fcs/sys/kern/kern_thread.c Tue May 19 00:02:58 2009 (r192359)
@@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/sched.h>
#include <sys/sleepqueue.h>
+#include <sys/selinfo.h>
#include <sys/turnstile.h>
#include <sys/ktr.h>
#include <sys/umtx.h>
@@ -214,6 +215,7 @@ thread_fini(void *mem, int size)
turnstile_free(td->td_turnstile);
sleepq_free(td->td_sleepqueue);
umtx_thread_fini(td);
+ seltdfini(td);
}
/*
Modified: user/kmacy/releng_7_2_fcs/sys/kern/sys_generic.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/kern/sys_generic.c Mon May 18 23:36:11 2009 (r192358)
+++ user/kmacy/releng_7_2_fcs/sys/kern/sys_generic.c Tue May 19 00:02:58 2009 (r192359)
@@ -69,18 +69,60 @@ __FBSDID("$FreeBSD$");
#include <sys/ktrace.h>
#endif
+#include <sys/ktr.h>
+
static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer");
static MALLOC_DEFINE(M_SELECT, "select", "select() buffer");
MALLOC_DEFINE(M_IOV, "iov", "large iov's");
static int pollout(struct pollfd *, struct pollfd *, u_int);
static int pollscan(struct thread *, struct pollfd *, u_int);
+static int pollrescan(struct thread *);
static int selscan(struct thread *, fd_mask **, fd_mask **, int);
+static int selrescan(struct thread *, fd_mask **, fd_mask **);
+static void selfdalloc(struct thread *, void *);
+static void selfdfree(struct seltd *, struct selfd *);
static int dofileread(struct thread *, int, struct file *, struct uio *,
off_t, int);
static int dofilewrite(struct thread *, int, struct file *, struct uio *,
off_t, int);
static void doselwakeup(struct selinfo *, int);
+static void seltdinit(struct thread *);
+static int seltdwait(struct thread *, int);
+static void seltdclear(struct thread *);
+
+/*
+ * One seltd per-thread allocated on demand as needed.
+ *
+ * t - protected by st_mtx
+ * k - Only accessed by curthread or read-only
+ */
+struct seltd {
+ STAILQ_HEAD(, selfd) st_selq; /* (k) List of selfds. */
+ struct selfd *st_free1; /* (k) free fd for read set. */
+ struct selfd *st_free2; /* (k) free fd for write set. */
+ struct mtx st_mtx; /* Protects struct seltd */
+ struct cv st_wait; /* (t) Wait channel. */
+ int st_flags; /* (t) SELTD_ flags. */
+};
+
+#define SELTD_PENDING 0x0001 /* We have pending events. */
+#define SELTD_RESCAN 0x0002 /* Doing a rescan. */
+
+/*
+ * One selfd allocated per-thread per-file-descriptor.
+ * f - protected by sf_mtx
+ */
+struct selfd {
+ STAILQ_ENTRY(selfd) sf_link; /* (k) fds owned by this td. */
+ TAILQ_ENTRY(selfd) sf_threads; /* (f) fds on this selinfo. */
+ struct selinfo *sf_si; /* (f) selinfo when linked. */
+ struct mtx *sf_mtx; /* Pointer to selinfo mtx. */
+ struct seltd *sf_td; /* (k) owning seltd. */
+ void *sf_cookie; /* (k) fd or pollfd. */
+};
+
+static uma_zone_t selfd_zone;
#ifndef _SYS_SYSPROTO_H_
struct read_args {
@@ -630,14 +672,6 @@ out:
return (error);
}
-/*
- * sellock and selwait are initialized in selectinit() via SYSINIT.
- */
-struct mtx sellock;
-struct cv selwait;
-u_int nselcoll; /* Select collisions since boot */
-SYSCTL_UINT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, "");
-
int
poll_no_poll(int events)
{
@@ -695,7 +729,7 @@ kern_select(struct thread *td, int nd, f
fd_mask *ibits[3], *obits[3], *selbits, *sbp;
struct timeval atv, rtv, ttv;
int error, timo;
- u_int ncoll, nbufbytes, ncpbytes, nfdbits;
+ u_int nbufbytes, ncpbytes, nfdbits;
if (nd < 0)
return (EINVAL);
@@ -740,7 +774,7 @@ kern_select(struct thread *td, int nd, f
sbp += ncpbytes / sizeof *sbp; \
error = copyin(name, ibits[x], ncpbytes); \
if (error != 0) \
- goto done_nosellock; \
+ goto done; \
} \
} while (0)
getbits(fd_in, 0);
@@ -754,7 +788,7 @@ kern_select(struct thread *td, int nd, f
atv = *tvp;
if (itimerfix(&atv)) {
error = EINVAL;
- goto done_nosellock;
+ goto done;
}
getmicrouptime(&rtv);
timevaladd(&atv, &rtv);
@@ -763,58 +797,31 @@ kern_select(struct thread *td, int nd, f
atv.tv_usec = 0;
}
timo = 0;
- TAILQ_INIT(&td->td_selq);
- mtx_lock(&sellock);
-retry:
- ncoll = nselcoll;
- thread_lock(td);
- td->td_flags |= TDF_SELECT;
- thread_unlock(td);
- mtx_unlock(&sellock);
-
- error = selscan(td, ibits, obits, nd);
- mtx_lock(&sellock);
- if (error || td->td_retval[0])
- goto done;
- if (atv.tv_sec || atv.tv_usec) {
- getmicrouptime(&rtv);
- if (timevalcmp(&rtv, &atv, >=))
- goto done;
- ttv = atv;
- timevalsub(&ttv, &rtv);
- timo = ttv.tv_sec > 24 * 60 * 60 ?
- 24 * 60 * 60 * hz : tvtohz(&ttv);
- }
-
- /*
- * An event of interest may occur while we do not hold
- * sellock, so check TDF_SELECT and the number of
- * collisions and rescan the file descriptors if
- * necessary.
- */
- thread_lock(td);
- if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
- thread_unlock(td);
- goto retry;
+ seltdinit(td);
+ /* Iterate until the timeout expires or descriptors become ready. */
+ for (;;) {
+ error = selscan(td, ibits, obits, nd);
+ if (error || td->td_retval[0] != 0)
+ break;
+ if (atv.tv_sec || atv.tv_usec) {
+ getmicrouptime(&rtv);
+ if (timevalcmp(&rtv, &atv, >=))
+ break;
+ ttv = atv;
+ timevalsub(&ttv, &rtv);
+ timo = ttv.tv_sec > 24 * 60 * 60 ?
+ 24 * 60 * 60 * hz : tvtohz(&ttv);
+ }
+ error = seltdwait(td, timo);
+ if (error)
+ break;
+ error = selrescan(td, ibits, obits);
+ if (error || td->td_retval[0] != 0)
+ break;
}
- thread_unlock(td);
-
- if (timo > 0)
- error = cv_timedwait_sig(&selwait, &sellock, timo);
- else
- error = cv_wait_sig(&selwait, &sellock);
-
- if (error == 0)
- goto retry;
+ seltdclear(td);
done:
- clear_selinfo_list(td);
- thread_lock(td);
- td->td_flags &= ~TDF_SELECT;
- thread_unlock(td);
- mtx_unlock(&sellock);
-
-done_nosellock:
/* select is not restarted after signals... */
if (error == ERESTART)
error = EINTR;
@@ -837,6 +844,60 @@ done_nosellock:
return (error);
}
+/*
+ * Traverse the list of fds attached to this thread's seltd and check for
+ * completion.
+ */
+static int
+selrescan(struct thread *td, fd_mask **ibits, fd_mask **obits)
+{
+ struct seltd *stp;
+ struct selfd *sfp;
+ struct selfd *sfn;
+ struct selinfo *si;
+ struct file *fp;
+ int msk, fd;
+ int n = 0;
+ /* Note: backend also returns POLLHUP/POLLERR if appropriate. */
+ static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND };
+ struct filedesc *fdp = td->td_proc->p_fd;
+
+ stp = td->td_sel;
+ FILEDESC_SLOCK(fdp);
+ STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) {
+ fd = (int)(uintptr_t)sfp->sf_cookie;
+ si = sfp->sf_si;
+ selfdfree(stp, sfp);
+ /* If the selinfo wasn't cleared the event didn't fire. */
+ if (si != NULL)
+ continue;
+ if ((fp = fget_locked(fdp, fd)) == NULL) {
+ FILEDESC_SUNLOCK(fdp);
+ return (EBADF);
+ }
+ for (msk = 0; msk < 3; msk++) {
+ if (ibits[msk] == NULL)
+ continue;
+ if ((ibits[msk][fd/NFDBITS] &
+ ((fd_mask) 1 << (fd % NFDBITS))) == 0)
+ continue;
+ if (fo_poll(fp, flag[msk], td->td_ucred, td)) {
+ obits[msk][(fd)/NFDBITS] |=
+ ((fd_mask)1 << ((fd) % NFDBITS));
+ n++;
+ }
+ }
+ }
+ FILEDESC_SUNLOCK(fdp);
+ stp->st_flags = 0;
+ td->td_retval[0] = n;
+ return (0);
+}
+
+/*
+ * Perform the initial filedescriptor scan and register ourselves with
+ * each selinfo.
+ */
static int
selscan(td, ibits, obits, nfd)
struct thread *td;
@@ -865,6 +926,7 @@ selscan(td, ibits, obits, nfd)
FILEDESC_SUNLOCK(fdp);
return (EBADF);
}
+ selfdalloc(td, (void *)(uintptr_t)fd);
if (fo_poll(fp, flag[msk], td->td_ucred,
td)) {
obits[msk][(fd)/NFDBITS] |=
@@ -895,7 +957,7 @@ poll(td, uap)
struct pollfd smallbits[32];
struct timeval atv, rtv, ttv;
int error = 0, timo;
- u_int ncoll, nfds;
+ u_int nfds;
size_t ni;
nfds = uap->nfds;
@@ -911,8 +973,7 @@ poll(td, uap)
if ((nfds > lim_cur(td->td_proc, RLIMIT_NOFILE)) &&
(nfds > FD_SETSIZE)) {
PROC_UNLOCK(td->td_proc);
- error = EINVAL;
- goto done2;
+ return (EINVAL);
}
PROC_UNLOCK(td->td_proc);
ni = nfds * sizeof(struct pollfd);
@@ -922,13 +983,13 @@ poll(td, uap)
bits = smallbits;
error = copyin(uap->fds, bits, ni);
if (error)
- goto done_nosellock;
+ goto done;
if (uap->timeout != INFTIM) {
atv.tv_sec = uap->timeout / 1000;
atv.tv_usec = (uap->timeout % 1000) * 1000;
if (itimerfix(&atv)) {
error = EINVAL;
- goto done_nosellock;
+ goto done;
}
getmicrouptime(&rtv);
timevaladd(&atv, &rtv);
@@ -937,56 +998,31 @@ poll(td, uap)
atv.tv_usec = 0;
}
timo = 0;
- TAILQ_INIT(&td->td_selq);
- mtx_lock(&sellock);
-retry:
- ncoll = nselcoll;
- thread_lock(td);
- td->td_flags |= TDF_SELECT;
- thread_unlock(td);
- mtx_unlock(&sellock);
-
- error = pollscan(td, bits, nfds);
- mtx_lock(&sellock);
- if (error || td->td_retval[0])
- goto done;
- if (atv.tv_sec || atv.tv_usec) {
- getmicrouptime(&rtv);
- if (timevalcmp(&rtv, &atv, >=))
- goto done;
- ttv = atv;
- timevalsub(&ttv, &rtv);
- timo = ttv.tv_sec > 24 * 60 * 60 ?
- 24 * 60 * 60 * hz : tvtohz(&ttv);
- }
- /*
- * An event of interest may occur while we do not hold
- * sellock, so check TDF_SELECT and the number of collisions
- * and rescan the file descriptors if necessary.
- */
- thread_lock(td);
- if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
- thread_unlock(td);
- goto retry;
+ seltdinit(td);
+ /* Iterate until the timeout expires or descriptors become ready. */
+ for (;;) {
+ error = pollscan(td, bits, nfds);
+ if (error || td->td_retval[0] != 0)
+ break;
+ if (atv.tv_sec || atv.tv_usec) {
+ getmicrouptime(&rtv);
+ if (timevalcmp(&rtv, &atv, >=))
+ break;
+ ttv = atv;
+ timevalsub(&ttv, &rtv);
+ timo = ttv.tv_sec > 24 * 60 * 60 ?
+ 24 * 60 * 60 * hz : tvtohz(&ttv);
+ }
+ error = seltdwait(td, timo);
+ if (error)
+ break;
+ error = pollrescan(td);
+ if (error || td->td_retval[0] != 0)
+ break;
}
- thread_unlock(td);
-
- if (timo > 0)
- error = cv_timedwait_sig(&selwait, &sellock, timo);
- else
- error = cv_wait_sig(&selwait, &sellock);
-
- if (error == 0)
- goto retry;
+ seltdclear(td);
done:
- clear_selinfo_list(td);
- thread_lock(td);
- td->td_flags &= ~TDF_SELECT;
- thread_unlock(td);
- mtx_unlock(&sellock);
-
-done_nosellock:
/* poll is not restarted after signals... */
if (error == ERESTART)
error = EINTR;
@@ -1000,7 +1036,6 @@ done_nosellock:
out:
if (ni > sizeof(smallbits))
free(bits, M_TEMP);
-done2:
return (error);
}
@@ -1025,12 +1060,56 @@ pollout(fds, ufds, nfd)
}
static int
+pollrescan(struct thread *td)
+{
+ struct seltd *stp;
+ struct selfd *sfp;
+ struct selfd *sfn;
+ struct selinfo *si;
+ struct filedesc *fdp;
+ struct file *fp;
+ struct pollfd *fd;
+ int n;
+
+ n = 0;
+ fdp = td->td_proc->p_fd;
+ stp = td->td_sel;
+ FILEDESC_SLOCK(fdp);
+ STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) {
+ fd = (struct pollfd *)sfp->sf_cookie;
+ si = sfp->sf_si;
+ selfdfree(stp, sfp);
+ /* If the selinfo wasn't cleared the event didn't fire. */
+ if (si != NULL)
+ continue;
+ fp = fdp->fd_ofiles[fd->fd];
+ if (fp == NULL) {
+ fd->revents = POLLNVAL;
+ n++;
+ continue;
+ }
+ /*
+ * Note: backend also returns POLLHUP and
+ * POLLERR if appropriate.
+ */
+ fd->revents = fo_poll(fp, fd->events, td->td_ucred, td);
+ if (fd->revents != 0)
+ n++;
+ }
+ FILEDESC_SUNLOCK(fdp);
+ stp->st_flags = 0;
+ td->td_retval[0] = n;
+ return (0);
+}
+
+
+static int
pollscan(td, fds, nfd)
struct thread *td;
struct pollfd *fds;
u_int nfd;
{
- register struct filedesc *fdp = td->td_proc->p_fd;
+ struct filedesc *fdp = td->td_proc->p_fd;
int i;
struct file *fp;
int n = 0;
@@ -1052,6 +1131,7 @@ pollscan(td, fds, nfd)
* Note: backend also returns POLLHUP and
* POLLERR if appropriate.
*/
+ selfdalloc(td, fds);
fds->revents = fo_poll(fp, fds->events,
td->td_ucred, td);
if (fds->revents != 0)
@@ -1085,23 +1165,90 @@ openbsd_poll(td, uap)
}
/*
- * Remove the references to the thread from all of the objects we were
- * polling.
- *
- * This code assumes that the underlying owner of the selinfo structure will
- * hold sellock before it changes it, and that it will unlink itself from our
- * list if it goes away.
+ * XXX This was created specifically to support netncp and netsmb. This
+ * allows the caller to specify a socket to wait for events on. It returns
+ * 0 if any events matched and an error otherwise. There is no way to
+ * determine which events fired.
*/
-void
-clear_selinfo_list(td)
- struct thread *td;
+int
+selsocket(struct socket *so, int events, struct timeval *tvp, struct thread *td)
{
- struct selinfo *si;
+ struct timeval atv, rtv, ttv;
+ int error, timo;
+
+ if (tvp != NULL) {
+ atv = *tvp;
+ if (itimerfix(&atv))
+ return (EINVAL);
+ getmicrouptime(&rtv);
+ timevaladd(&atv, &rtv);
+ } else {
+ atv.tv_sec = 0;
+ atv.tv_usec = 0;
+ }
- mtx_assert(&sellock, MA_OWNED);
- TAILQ_FOREACH(si, &td->td_selq, si_thrlist)
- si->si_thread = NULL;
- TAILQ_INIT(&td->td_selq);
+ timo = 0;
+ seltdinit(td);
+ /*
+ * Iterate until the timeout expires or the socket becomes ready.
+ */
+ for (;;) {
+ selfdalloc(td, NULL);
+ error = sopoll(so, events, NULL, td);
+ /* error here is actually the ready events. */
+ if (error)
+ return (0);
+ if (atv.tv_sec || atv.tv_usec) {
+ getmicrouptime(&rtv);
+ if (timevalcmp(&rtv, &atv, >=)) {
+ seltdclear(td);
+ return (EWOULDBLOCK);
+ }
+ ttv = atv;
+ timevalsub(&ttv, &rtv);
+ timo = ttv.tv_sec > 24 * 60 * 60 ?
+ 24 * 60 * 60 * hz : tvtohz(&ttv);
+ }
+ error = seltdwait(td, timo);
+ seltdclear(td);
+ if (error)
+ break;
+ }
+ /* XXX Duplicates ncp/smb behavior. */
+ if (error == ERESTART)
+ error = 0;
+ return (error);
+}
+
+/*
+ * Preallocate two selfds associated with 'cookie'. Some fo_poll routines
+ * have two select sets, one for read and another for write.
+ */
+static void
+selfdalloc(struct thread *td, void *cookie)
+{
+ struct seltd *stp;
+
+ stp = td->td_sel;
+ if (stp->st_free1 == NULL)
+ stp->st_free1 = uma_zalloc(selfd_zone, M_WAITOK|M_ZERO);
+ stp->st_free1->sf_td = stp;
+ stp->st_free1->sf_cookie = cookie;
+ if (stp->st_free2 == NULL)
+ stp->st_free2 = uma_zalloc(selfd_zone, M_WAITOK|M_ZERO);
+ stp->st_free2->sf_td = stp;
+ stp->st_free2->sf_cookie = cookie;
+}
+
+static void
+selfdfree(struct seltd *stp, struct selfd *sfp)
+{
+ STAILQ_REMOVE(&stp->st_selq, sfp, selfd, sf_link);
+ mtx_lock(sfp->sf_mtx);
+ if (sfp->sf_si)
+ TAILQ_REMOVE(&sfp->sf_si->si_tdlist, sfp, sf_threads);
+ mtx_unlock(sfp->sf_mtx);
+ uma_zfree(selfd_zone, sfp);
}
/*
@@ -1112,26 +1259,46 @@ selrecord(selector, sip)
struct thread *selector;
struct selinfo *sip;
{
+ struct selfd *sfp;
+ struct seltd *stp;
+ struct mtx *mtxp;
- mtx_lock(&sellock);
+ stp = selector->td_sel;
+ /*
+ * Don't record when doing a rescan.
+ */
+ if (stp->st_flags & SELTD_RESCAN)
+ return;
+ /*
+ * Grab one of the preallocated descriptors.
+ */
+ sfp = NULL;
+ if ((sfp = stp->st_free1) != NULL)
+ stp->st_free1 = NULL;
+ else if ((sfp = stp->st_free2) != NULL)
+ stp->st_free2 = NULL;
+ else
+ panic("selrecord: No free selfd on selq");
+ mtxp = mtx_pool_find(mtxpool_sleep, sip);
+ /*
+ * Initialize the sfp and queue it in the thread.
+ */
+ sfp->sf_si = sip;
+ sfp->sf_mtx = mtxp;
+ STAILQ_INSERT_TAIL(&stp->st_selq, sfp, sf_link);
/*
- * If the selinfo's thread pointer is NULL then take ownership of it.
- *
- * If the thread pointer is not NULL and it points to another
- * thread, then we have a collision.
- *
- * If the thread pointer is not NULL and points back to us then leave
- * it alone as we've already added pointed it at us and added it to
- * our list.
+ * Now that we've locked the sip, check for initialization.
*/
- if (sip->si_thread == NULL) {
- sip->si_thread = selector;
- TAILQ_INSERT_TAIL(&selector->td_selq, sip, si_thrlist);
- } else if (sip->si_thread != selector) {
- sip->si_flags |= SI_COLL;
+ mtx_lock(mtxp);
+ if (sip->si_mtx == NULL) {
+ sip->si_mtx = mtxp;
+ TAILQ_INIT(&sip->si_tdlist);
}
-
- mtx_unlock(&sellock);
+ /*
+ * Add this thread to the list of selfds listening on this selinfo.
+ */
+ TAILQ_INSERT_TAIL(&sip->si_tdlist, sfp, sf_threads);
+ mtx_unlock(sip->si_mtx);
}
/* Wake up a selecting thread. */
@@ -1159,36 +1326,115 @@ doselwakeup(sip, pri)
struct selinfo *sip;
int pri;
{
- struct thread *td;
+ struct selfd *sfp;
+ struct selfd *sfn;
+ struct seltd *stp;
- mtx_lock(&sellock);
- td = sip->si_thread;
- if ((sip->si_flags & SI_COLL) != 0) {
- nselcoll++;
- sip->si_flags &= ~SI_COLL;
- cv_broadcastpri(&selwait, pri);
- }
- if (td == NULL) {
- mtx_unlock(&sellock);
+ /* If it's not initialized there can't be any waiters. */
+ if (sip->si_mtx == NULL)
return;
+ /*
+ * Locking the selinfo locks all selfds associated with it.
+ */
+ mtx_lock(sip->si_mtx);
+ TAILQ_FOREACH_SAFE(sfp, &sip->si_tdlist, sf_threads, sfn) {
+ /*
+ * Once we remove this sfp from the list and clear the
+ * sf_si seltdclear will know to ignore this si.
+ */
+ TAILQ_REMOVE(&sip->si_tdlist, sfp, sf_threads);
+ sfp->sf_si = NULL;
+ stp = sfp->sf_td;
+ mtx_lock(&stp->st_mtx);
+ stp->st_flags |= SELTD_PENDING;
+ cv_broadcastpri(&stp->st_wait, pri);
+ mtx_unlock(&stp->st_mtx);
}
- TAILQ_REMOVE(&td->td_selq, sip, si_thrlist);
- sip->si_thread = NULL;
- thread_lock(td);
- td->td_flags &= ~TDF_SELECT;
- thread_unlock(td);
- sleepq_remove(td, &selwait);
- mtx_unlock(&sellock);
+ mtx_unlock(sip->si_mtx);
}
-static void selectinit(void *);
-SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, selectinit, NULL)
+static void
+seltdinit(struct thread *td)
+{
+ struct seltd *stp;
+
+ if ((stp = td->td_sel) != NULL)
+ goto out;
+ td->td_sel = stp = malloc(sizeof(*stp), M_SELECT, M_WAITOK|M_ZERO);
+ mtx_init(&stp->st_mtx, "sellck", NULL, MTX_DEF);
+ cv_init(&stp->st_wait, "select");
+out:
+ stp->st_flags = 0;
+ STAILQ_INIT(&stp->st_selq);
+}
+
+static int
+seltdwait(struct thread *td, int timo)
+{
+ struct seltd *stp;
+ int error;
-/* ARGSUSED*/
+ stp = td->td_sel;
+ /*
+ * An event of interest may occur while we do not hold the seltd
+ * locked so check the pending flag before we sleep.
+ */
+ mtx_lock(&stp->st_mtx);
+ /*
+ * Any further calls to selrecord will be a rescan.
+ */
+ stp->st_flags |= SELTD_RESCAN;
+ if (stp->st_flags & SELTD_PENDING) {
+ mtx_unlock(&stp->st_mtx);
+ return (0);
+ }
+ if (timo > 0)
+ error = cv_timedwait_sig(&stp->st_wait, &stp->st_mtx, timo);
+ else
+ error = cv_wait_sig(&stp->st_wait, &stp->st_mtx);
+ mtx_unlock(&stp->st_mtx);
+
+ return (error);
+}
+
+void
+seltdfini(struct thread *td)
+{
+ struct seltd *stp;
+
+ stp = td->td_sel;
+ if (stp == NULL)
+ return;
+ if (stp->st_free1)
+ uma_zfree(selfd_zone, stp->st_free1);
+ if (stp->st_free2)
+ uma_zfree(selfd_zone, stp->st_free2);
+ td->td_sel = NULL;
+ free(stp, M_SELECT);
+}
+
+/*
+ * Remove the references to the thread from all of the objects we were
+ * polling.
+ */
+static void
+seltdclear(struct thread *td)
+{
+ struct seltd *stp;
+ struct selfd *sfp;
+ struct selfd *sfn;
+
+ stp = td->td_sel;
+ STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn)
+ selfdfree(stp, sfp);
+ stp->st_flags = 0;
+}
+
+static void selectinit(void *);
+SYSINIT(select, SI_SUB_SYSCALLS, SI_ORDER_ANY, selectinit, NULL);
static void
-selectinit(dummy)
- void *dummy;
+selectinit(void *dummy __unused)
{
- cv_init(&selwait, "select");
- mtx_init(&sellock, "sellck", NULL, MTX_DEF);
+ selfd_zone = uma_zcreate("selfd", sizeof(struct selfd), NULL, NULL,
+ NULL, NULL, UMA_ALIGN_PTR, 0);
}
Modified: user/kmacy/releng_7_2_fcs/sys/kern/sys_pipe.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/kern/sys_pipe.c Mon May 18 23:36:11 2009 (r192358)
+++ user/kmacy/releng_7_2_fcs/sys/kern/sys_pipe.c Tue May 19 00:02:58 2009 (r192359)
@@ -524,8 +524,9 @@ pipeselwakeup(cpipe)
PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
if (cpipe->pipe_state & PIPE_SEL) {
- cpipe->pipe_state &= ~PIPE_SEL;
selwakeuppri(&cpipe->pipe_sel, PSOCK);
+ if (!SEL_WAITING(&cpipe->pipe_sel))
+ cpipe->pipe_state &= ~PIPE_SEL;
}
if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio)
pgsigio(&cpipe->pipe_sigio, SIGIO, 0);
@@ -1356,12 +1357,14 @@ pipe_poll(fp, events, active_cred, td)
if (revents == 0) {
if (events & (POLLIN | POLLRDNORM)) {
selrecord(td, &rpipe->pipe_sel);
- rpipe->pipe_state |= PIPE_SEL;
+ if (SEL_WAITING(&rpipe->pipe_sel))
+ rpipe->pipe_state |= PIPE_SEL;
}
if (events & (POLLOUT | POLLWRNORM)) {
selrecord(td, &wpipe->pipe_sel);
- wpipe->pipe_state |= PIPE_SEL;
+ if (SEL_WAITING(&wpipe->pipe_sel))
+ wpipe->pipe_state |= PIPE_SEL;
}
}
#ifdef MAC
Modified: user/kmacy/releng_7_2_fcs/sys/kern/uipc_sockbuf.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/kern/uipc_sockbuf.c Mon May 18 23:36:11 2009 (r192358)
+++ user/kmacy/releng_7_2_fcs/sys/kern/uipc_sockbuf.c Tue May 19 00:02:58 2009 (r192359)
@@ -179,7 +179,8 @@ sowakeup(struct socket *so, struct sockb
SOCKBUF_LOCK_ASSERT(sb);
selwakeuppri(&sb->sb_sel, PSOCK);
- sb->sb_flags &= ~SB_SEL;
+ if (!SEL_WAITING(&sb->sb_sel))
+ sb->sb_flags &= ~SB_SEL;
if (sb->sb_flags & SB_WAIT) {
sb->sb_flags &= ~SB_WAIT;
wakeup(&sb->sb_cc);
Modified: user/kmacy/releng_7_2_fcs/sys/netncp/ncp_rq.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/netncp/ncp_rq.c Mon May 18 23:36:11 2009 (r192358)
+++ user/kmacy/releng_7_2_fcs/sys/netncp/ncp_rq.c Tue May 19 00:02:58 2009 (r192359)
@@ -43,6 +43,8 @@ __FBSDID("$FreeBSD$");
#include <sys/mbuf.h>
#include <sys/poll.h>
#include <sys/proc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
#include <sys/uio.h>
#include <netncp/ncp.h>
@@ -274,7 +276,9 @@ ncp_request_int(struct ncp_rq *rqp)
/*
* Flush out replies on previous reqs
*/
- while (ncp_poll(so, POLLIN) != 0) {
+ tv.tv_sec = 0;
+ tv.tv_usec = 0;
+ while (selsocket(so, POLLIN, &tv, td) == 0) {
if (ncp_sock_recv(so, &m, &len) != 0)
break;
m_freem(m);
@@ -319,7 +323,7 @@ ncp_request_int(struct ncp_rq *rqp)
}
tv.tv_sec = conn->li.timeout;
tv.tv_usec = 0;
- error = ncp_sock_rselect(so, td, &tv, POLLIN);
+ error = selsocket(so, POLLIN, &tv, td);
if (error == EWOULDBLOCK ) /* timeout expired */
continue;
error = ncp_chkintr(conn, td);
@@ -335,7 +339,9 @@ ncp_request_int(struct ncp_rq *rqp)
dosend = 1; /* resend rq if error */
for (;;) {
error = 0;
- if (ncp_poll(so, POLLIN) == 0)
+ tv.tv_sec = 0;
+ tv.tv_usec = 0;
+ if (selsocket(so, POLLIN, &tv, td) != 0)
break;
/* if (so->so_rcv.sb_cc == 0) {
break;
Modified: user/kmacy/releng_7_2_fcs/sys/netncp/ncp_sock.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/netncp/ncp_sock.c Mon May 18 23:36:11 2009 (r192358)
+++ user/kmacy/releng_7_2_fcs/sys/netncp/ncp_sock.c Tue May 19 00:02:58 2009 (r192359)
@@ -65,7 +65,6 @@ __FBSDID("$FreeBSD$");
#define ipx_setnullhost(x) ((x).x_host.s_host[0] = 0); \
((x).x_host.s_host[1] = 0); ((x).x_host.s_host[2] = 0);
-/*int ncp_poll(struct socket *so, int events);*/
/*static int ncp_getsockname(struct socket *so, caddr_t asa, int *alen);*/
static int ncp_soconnect(struct socket *so, struct sockaddr *target,
struct thread *td);
@@ -181,110 +180,6 @@ ncp_sock_send(struct socket *so, struct
return error;
}
-int
-ncp_poll(struct socket *so, int events)
-{
- struct thread *td = curthread;
- int revents;
-
- /* Fake up enough state to look like we are in poll(2). */
- mtx_lock(&sellock);
- thread_lock(td);
- td->td_flags |= TDF_SELECT;
- thread_unlock(td);
- mtx_unlock(&sellock);
- TAILQ_INIT(&td->td_selq);
-
- revents = sopoll(so, events, NULL, td);
-
- /* Tear down the fake poll(2) state. */
- mtx_lock(&sellock);
- clear_selinfo_list(td);
- thread_lock(td);
- td->td_flags &= ~TDF_SELECT;
- thread_unlock(td);
- mtx_unlock(&sellock);
-
- return (revents);
-}
-
-int
-ncp_sock_rselect(struct socket *so, struct thread *td, struct timeval *tv,
- int events)
-{
- struct timeval atv, rtv, ttv;
- int ncoll, timo, error, revents;
-
- if (tv) {
- atv = *tv;
- if (itimerfix(&atv)) {
- error = EINVAL;
- goto done_noproclock;
- }
- getmicrouptime(&rtv);
- timevaladd(&atv, &rtv);
- }
- timo = 0;
- mtx_lock(&sellock);
-
-retry:
- ncoll = nselcoll;
- thread_lock(td);
- td->td_flags |= TDF_SELECT;
- thread_unlock(td);
- mtx_unlock(&sellock);
-
- TAILQ_INIT(&td->td_selq);
- revents = sopoll(so, events, NULL, td);
- mtx_lock(&sellock);
- if (revents) {
- error = 0;
- goto done;
- }
- if (tv) {
- getmicrouptime(&rtv);
- if (timevalcmp(&rtv, &atv, >=)) {
- error = EWOULDBLOCK;
- goto done;
- }
- ttv = atv;
- timevalsub(&ttv, &rtv);
- timo = tvtohz(&ttv);
- }
- /*
- * An event of our interest may occur during locking a thread.
- * In order to avoid missing the event that occurred during locking
- * the process, test TDF_SELECT and rescan file descriptors if
- * necessary.
- */
- thread_lock(td);
- if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
- thread_unlock(td);
- goto retry;
- }
- thread_unlock(td);
-
- if (timo > 0)
- error = cv_timedwait(&selwait, &sellock, timo);
- else {
- cv_wait(&selwait, &sellock);
- error = 0;
- }
-
-done:
- clear_selinfo_list(td);
-
- thread_lock(td);
- td->td_flags &= ~TDF_SELECT;
- thread_unlock(td);
- mtx_unlock(&sellock);
-
-done_noproclock:
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-user
mailing list