svn commit: r266316 - in user/dchagin/lemul/sys: amd64/linux amd64/linux32 compat/linux i386/linux sys

Jilles Tjoelker jilles at stack.nl
Sun May 18 12:44:57 UTC 2014


On Sat, May 17, 2014 at 02:39:40PM +0000, Dmitry Chagin wrote:
> Author: dchagin
> Date: Sat May 17 14:39:40 2014
> New Revision: 266316
> URL: http://svnweb.freebsd.org/changeset/base/266316

> Log:
>   Implement eventfd system call.
> [snip]
> Modified: user/dchagin/lemul/sys/compat/linux/linux_event.c
> ==============================================================================
> --- user/dchagin/lemul/sys/compat/linux/linux_event.c	Sat May 17 14:35:18 2014	(r266315)
> +++ user/dchagin/lemul/sys/compat/linux/linux_event.c	Sat May 17 14:39:40 2014	(r266316)
> [snip]
> +static int
> +eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
> +	int flags, struct thread *td)
> +{
> +	struct eventfd *efd;
> +	eventfd_t count;
> +	int error;
> +
> +	efd = fp->f_data;
> +	if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
> +		return (EBADF);
> +
> +	if (uio->uio_resid < sizeof(eventfd_t))
> +		return (EINVAL);
> +
> +	error = 0;
> +	mtx_lock(&efd->efd_lock);
> +	if (efd->efd_count == 0) {

This should be a while loop: a successful return of mtx_sleep() does not
guarantee anything about the predicate (for example if two threads are
trying to read).

> +		if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) {

> +			mtx_unlock(&efd->efd_lock);
> +			return (EAGAIN);

These two lines could be:
			error = EAGAIN;
			break;

> +		}
> +		error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH, "lefdrd", 0);
> +		if (error == ERESTART)
> +			error = EINTR;

Why disable system call restart here? The Linux man page does not say
so, and it may cause subtle bugs in applications that do not expect an
[EINTR] failure.

> +	}
> +	if (error == 0) {
> +		if ((efd->efd_flags & LINUX_EFD_SEMAPHORE) != 0) {
> +			count = 1;
> +			--efd->efd_count;
> +		} else {
> +			count = efd->efd_count;
> +			efd->efd_count = 0;
> +		}
> +		error = uiomove_nofault(&count, sizeof(eventfd_t), uio);

This uiomove_nofault() call will fail if the destination user buffer is
not physically. This may lead to almost irreproducible bugs. Instead, it
is better to unlock efd->efd_lock and then call uiomove().

> +		if (error == 0) {
> +			KNOTE_LOCKED(&efd->efd_sel.si_note, 0);
> +			selwakeup(&efd->efd_sel);
> +			wakeup(&efd->efd_count);
> +		}

I don't think this block needs to be conditional on the success of
uiomove. The wakeups are because of the change to efd->efd_count which
happened anyway. Therefore, it can be moved to before uiomove.

> +	}
> +	mtx_unlock(&efd->efd_lock);
> +
> +	return (error);
> +}
> +
> +static int
> +eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
> +	 int flags, struct thread *td)
> +{
> +	struct eventfd *efd;
> +	eventfd_t count;
> +	int error;
> +
> +	efd = fp->f_data;
> +	if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
> +		return (EBADF);
> +
> +	if (uio->uio_resid < sizeof(eventfd_t))
> +		return (EINVAL);
> +
> +	error = uiomove(&count, sizeof(eventfd_t), uio);
> +	if (error)
> +		return (error);
> +	if (count == UINT64_MAX)
> +		return (EINVAL);
> +
> +	mtx_lock(&efd->efd_lock);
> +retry:
> +	if (UINT64_MAX - efd->efd_count <= count) {
> +		if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) {
> +			mtx_unlock(&efd->efd_lock);
> +			return (EAGAIN);
> +		}
> +		error = mtx_sleep(&efd->efd_count, &efd->efd_lock,
> +		    PCATCH, "lefdwr", 0);

> +		if (error == ERESTART)
> +			error = EINTR;

As in eventfd_read(), why disable system call restart here?

> +		if (error == 0)
> +			goto retry;
> +	}
> +	if (error == 0) {
> +		efd->efd_count += count;
> +		KNOTE_LOCKED(&efd->efd_sel.si_note, 0);
> +		selwakeup(&efd->efd_sel);
> +		wakeup(&efd->efd_count);
> +	}
> +	mtx_unlock(&efd->efd_lock);
> +
> +	return (error);
> +}
> +
> +static int
> +eventfd_poll(struct file *fp, int events, struct ucred *active_cred,
> +	struct thread *td)
> +{
> +	struct eventfd *efd;
> +	int revents = 0;
> +
> +	efd = fp->f_data;
> +	if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
> +		return (POLLERR);
> +
> +	mtx_lock(&efd->efd_lock);
> +	if (efd->efd_count == UINT64_MAX)
> +		revents |= events & POLLERR;

POLLERR should be returned regardless of whether it is set in events.

> +	if ((events & (POLLIN|POLLRDNORM)) && efd->efd_count > 0)
> +		revents |= events & (POLLIN|POLLRDNORM);
> +	if ((events & (POLLOUT|POLLWRNORM)) && UINT64_MAX - 1 > efd->efd_count)
> +		revents |= events & (POLLOUT|POLLWRNORM);
> +	if (revents == 0)
> +		selrecord(td, &efd->efd_sel);
> +	mtx_unlock(&efd->efd_lock);
> +
> +	return (revents);
> +}
> [snip]
> +/*ARGSUSED*/
> +static int
> +filt_eventfdread(struct knote *kn, long hint)
> +{
> +	struct eventfd *efd = kn->kn_hook;
> +	int ret;
> +
> +	mtx_assert(&efd->efd_lock, MA_OWNED);
> +	ret = (efd->efd_count > 0);
> +
> +	return (ret);
> +}
> +
> +/*ARGSUSED*/
> +static int
> +filt_eventfdwrite(struct knote *kn, long hint)
> +{
> +	struct eventfd *efd = kn->kn_hook;
> +	int ret;
> +
> +	mtx_assert(&efd->efd_lock, MA_OWNED);
> +	ret = (UINT64_MAX - 1 > efd->efd_count);

Perhaps set EV_EOF or EV_ERROR on overflow, so the epoll emulation can
reconstruct POLLERR.

Note, though, that sockets set EV_EOF when the corresponding direction
is closed, while POLLHUP and POLLERR should only be returned if both
directions are closed (otherwise, applications cannot properly finish
the other direction when one direction is closed).

> +
> +	return (ret);
> +}
> [snip]
> +/*ARGSUSED*/
> +static int
> +eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
> +	struct thread *td)
> +{
> +
> +	return (ENXIO);
> +}

Linux appears to permit fstat(), though the returned data is rather
meaningless.

> [snip]

-- 
Jilles Tjoelker


More information about the svn-src-user mailing list