svn commit: r266316 - in user/dchagin/lemul/sys: amd64/linux amd64/linux32 compat/linux i386/linux sys
Chagin Dmitry
dchagin at freebsd.org
Mon May 19 19:03:00 UTC 2014
On Sun, May 18, 2014 at 02:44:53PM +0200, Jilles Tjoelker wrote:
> On Sat, May 17, 2014 at 02:39:40PM +0000, Dmitry Chagin wrote:
> > Author: dchagin
> > Date: Sat May 17 14:39:40 2014
> > New Revision: 266316
> > URL: http://svnweb.freebsd.org/changeset/base/266316
>
> > Log:
> > Implement eventfd system call.
> > [snip]
> > Modified: user/dchagin/lemul/sys/compat/linux/linux_event.c
> > ==============================================================================
> > --- user/dchagin/lemul/sys/compat/linux/linux_event.c Sat May 17 14:35:18 2014 (r266315)
> > +++ user/dchagin/lemul/sys/compat/linux/linux_event.c Sat May 17 14:39:40 2014 (r266316)
> > [snip]
> > +static int
> > +eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
> > + int flags, struct thread *td)
> > +{
> > + struct eventfd *efd;
> > + eventfd_t count;
> > + int error;
> > +
> > + efd = fp->f_data;
> > + if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
> > + return (EBADF);
> > +
> > + if (uio->uio_resid < sizeof(eventfd_t))
> > + return (EINVAL);
> > +
> > + error = 0;
> > + mtx_lock(&efd->efd_lock);
> > + if (efd->efd_count == 0) {
>
> This should be a while loop: a successful return of mtx_sleep() does not
> guarantee anything about the predicate (for example if two threads are
> trying to read).
>
yes, agree. thanks
> > + if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) {
>
> > + mtx_unlock(&efd->efd_lock);
> > + return (EAGAIN);
>
> These two lines could be:
> error = EAGAIN;
> break;
>
> > + }
> > + error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH, "lefdrd", 0);
> > + if (error == ERESTART)
> > + error = EINTR;
>
> Why disable system call restart here? The Linux man page does not say
> so, and it may cause subtle bugs in applications that do not expect an
> [EINTR] failure.
>
whoops, misreads linux code. thanks!
> > + }
> > + if (error == 0) {
> > + if ((efd->efd_flags & LINUX_EFD_SEMAPHORE) != 0) {
> > + count = 1;
> > + --efd->efd_count;
> > + } else {
> > + count = efd->efd_count;
> > + efd->efd_count = 0;
> > + }
> > + error = uiomove_nofault(&count, sizeof(eventfd_t), uio);
>
> This uiomove_nofault() call will fail if the destination user buffer is
> not physically. This may lead to almost irreproducible bugs. Instead, it
> is better to unlock efd->efd_lock and then call uiomove().
>
> > + if (error == 0) {
> > + KNOTE_LOCKED(&efd->efd_sel.si_note, 0);
> > + selwakeup(&efd->efd_sel);
> > + wakeup(&efd->efd_count);
> > + }
>
> I don't think this block needs to be conditional on the success of
> uiomove. The wakeups are because of the change to efd->efd_count which
> happened anyway. Therefore, it can be moved to before uiomove.
>
hm, looks like i overdid here )) as we are in reader context..
> > + }
> > + mtx_unlock(&efd->efd_lock);
> > +
> > + return (error);
> > +}
> > +
> > +static int
> > +eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
> > + int flags, struct thread *td)
> > +{
> > + struct eventfd *efd;
> > + eventfd_t count;
> > + int error;
> > +
> > + efd = fp->f_data;
> > + if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
> > + return (EBADF);
> > +
> > + if (uio->uio_resid < sizeof(eventfd_t))
> > + return (EINVAL);
> > +
> > + error = uiomove(&count, sizeof(eventfd_t), uio);
> > + if (error)
> > + return (error);
> > + if (count == UINT64_MAX)
> > + return (EINVAL);
> > +
> > + mtx_lock(&efd->efd_lock);
> > +retry:
> > + if (UINT64_MAX - efd->efd_count <= count) {
> > + if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) {
> > + mtx_unlock(&efd->efd_lock);
> > + return (EAGAIN);
> > + }
> > + error = mtx_sleep(&efd->efd_count, &efd->efd_lock,
> > + PCATCH, "lefdwr", 0);
>
> > + if (error == ERESTART)
> > + error = EINTR;
>
> As in eventfd_read(), why disable system call restart here?
>
> > + if (error == 0)
> > + goto retry;
> > + }
> > + if (error == 0) {
> > + efd->efd_count += count;
> > + KNOTE_LOCKED(&efd->efd_sel.si_note, 0);
> > + selwakeup(&efd->efd_sel);
> > + wakeup(&efd->efd_count);
> > + }
> > + mtx_unlock(&efd->efd_lock);
> > +
> > + return (error);
> > +}
> > +
> > +static int
> > +eventfd_poll(struct file *fp, int events, struct ucred *active_cred,
> > + struct thread *td)
> > +{
> > + struct eventfd *efd;
> > + int revents = 0;
> > +
> > + efd = fp->f_data;
> > + if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
> > + return (POLLERR);
> > +
> > + mtx_lock(&efd->efd_lock);
> > + if (efd->efd_count == UINT64_MAX)
> > + revents |= events & POLLERR;
>
> POLLERR should be returned regardless of whether it is set in events.
>
indeed.
> > + if ((events & (POLLIN|POLLRDNORM)) && efd->efd_count > 0)
> > + revents |= events & (POLLIN|POLLRDNORM);
> > + if ((events & (POLLOUT|POLLWRNORM)) && UINT64_MAX - 1 > efd->efd_count)
> > + revents |= events & (POLLOUT|POLLWRNORM);
> > + if (revents == 0)
> > + selrecord(td, &efd->efd_sel);
> > + mtx_unlock(&efd->efd_lock);
> > +
> > + return (revents);
> > +}
> > [snip]
> > +/*ARGSUSED*/
> > +static int
> > +filt_eventfdread(struct knote *kn, long hint)
> > +{
> > + struct eventfd *efd = kn->kn_hook;
> > + int ret;
> > +
> > + mtx_assert(&efd->efd_lock, MA_OWNED);
> > + ret = (efd->efd_count > 0);
> > +
> > + return (ret);
> > +}
> > +
> > +/*ARGSUSED*/
> > +static int
> > +filt_eventfdwrite(struct knote *kn, long hint)
> > +{
> > + struct eventfd *efd = kn->kn_hook;
> > + int ret;
> > +
> > + mtx_assert(&efd->efd_lock, MA_OWNED);
> > + ret = (UINT64_MAX - 1 > efd->efd_count);
>
> Perhaps set EV_EOF or EV_ERROR on overflow, so the epoll emulation can
> reconstruct POLLERR.
>
> Note, though, that sockets set EV_EOF when the corresponding direction
> is closed, while POLLHUP and POLLERR should only be returned if both
> directions are closed (otherwise, applications cannot properly finish
> the other direction when one direction is closed).
>
> > +
> > + return (ret);
> > +}
> > [snip]
> > +/*ARGSUSED*/
> > +static int
> > +eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
> > + struct thread *td)
> > +{
> > +
> > + return (ENXIO);
> > +}
>
> Linux appears to permit fstat(), though the returned data is rather
> meaningless.
>
no, did not find this in linux code.
> > [snip]
>
> --
> Jilles Tjoelker
Thank you Jilles, glad to see that someone is interested.
--
Have fun!
chd
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 181 bytes
Desc: not available
URL: <http://lists.freebsd.org/pipermail/svn-src-user/attachments/20140519/eb3bf70d/attachment.sig>
More information about the svn-src-user
mailing list