svn commit: r266316 - in user/dchagin/lemul/sys: amd64/linux amd64/linux32 compat/linux i386/linux sys

Chagin Dmitry dchagin at freebsd.org
Mon May 19 19:03:00 UTC 2014


On Sun, May 18, 2014 at 02:44:53PM +0200, Jilles Tjoelker wrote:
> On Sat, May 17, 2014 at 02:39:40PM +0000, Dmitry Chagin wrote:
> > Author: dchagin
> > Date: Sat May 17 14:39:40 2014
> > New Revision: 266316
> > URL: http://svnweb.freebsd.org/changeset/base/266316
> 
> > Log:
> >   Implement eventfd system call.
> > [snip]
> > Modified: user/dchagin/lemul/sys/compat/linux/linux_event.c
> > ==============================================================================
> > --- user/dchagin/lemul/sys/compat/linux/linux_event.c	Sat May 17 14:35:18 2014	(r266315)
> > +++ user/dchagin/lemul/sys/compat/linux/linux_event.c	Sat May 17 14:39:40 2014	(r266316)
> > [snip]
> > +static int
> > +eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
> > +	int flags, struct thread *td)
> > +{
> > +	struct eventfd *efd;
> > +	eventfd_t count;
> > +	int error;
> > +
> > +	efd = fp->f_data;
> > +	if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
> > +		return (EBADF);
> > +
> > +	if (uio->uio_resid < sizeof(eventfd_t))
> > +		return (EINVAL);
> > +
> > +	error = 0;
> > +	mtx_lock(&efd->efd_lock);
> > +	if (efd->efd_count == 0) {
> 
> This should be a while loop: a successful return of mtx_sleep() does not
> guarantee anything about the predicate (for example if two threads are
> trying to read).
> 

yes, agree. thanks

> > +		if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) {
> 
> > +			mtx_unlock(&efd->efd_lock);
> > +			return (EAGAIN);
> 
> These two lines could be:
> 			error = EAGAIN;
> 			break;
> 
> > +		}
> > +		error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH, "lefdrd", 0);
> > +		if (error == ERESTART)
> > +			error = EINTR;
> 
> Why disable system call restart here? The Linux man page does not say
> so, and it may cause subtle bugs in applications that do not expect an
> [EINTR] failure.
> 

whoops, misreads linux code. thanks!

> > +	}
> > +	if (error == 0) {
> > +		if ((efd->efd_flags & LINUX_EFD_SEMAPHORE) != 0) {
> > +			count = 1;
> > +			--efd->efd_count;
> > +		} else {
> > +			count = efd->efd_count;
> > +			efd->efd_count = 0;
> > +		}
> > +		error = uiomove_nofault(&count, sizeof(eventfd_t), uio);
> 
> This uiomove_nofault() call will fail if the destination user buffer is
> not physically. This may lead to almost irreproducible bugs. Instead, it
> is better to unlock efd->efd_lock and then call uiomove().
> 
> > +		if (error == 0) {
> > +			KNOTE_LOCKED(&efd->efd_sel.si_note, 0);
> > +			selwakeup(&efd->efd_sel);
> > +			wakeup(&efd->efd_count);
> > +		}
> 
> I don't think this block needs to be conditional on the success of
> uiomove. The wakeups are because of the change to efd->efd_count which
> happened anyway. Therefore, it can be moved to before uiomove.
> 

hm, looks like i overdid here )) as we are in reader context..

> > +	}
> > +	mtx_unlock(&efd->efd_lock);
> > +
> > +	return (error);
> > +}
> > +
> > +static int
> > +eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
> > +	 int flags, struct thread *td)
> > +{
> > +	struct eventfd *efd;
> > +	eventfd_t count;
> > +	int error;
> > +
> > +	efd = fp->f_data;
> > +	if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
> > +		return (EBADF);
> > +
> > +	if (uio->uio_resid < sizeof(eventfd_t))
> > +		return (EINVAL);
> > +
> > +	error = uiomove(&count, sizeof(eventfd_t), uio);
> > +	if (error)
> > +		return (error);
> > +	if (count == UINT64_MAX)
> > +		return (EINVAL);
> > +
> > +	mtx_lock(&efd->efd_lock);
> > +retry:
> > +	if (UINT64_MAX - efd->efd_count <= count) {
> > +		if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) {
> > +			mtx_unlock(&efd->efd_lock);
> > +			return (EAGAIN);
> > +		}
> > +		error = mtx_sleep(&efd->efd_count, &efd->efd_lock,
> > +		    PCATCH, "lefdwr", 0);
> 
> > +		if (error == ERESTART)
> > +			error = EINTR;
> 
> As in eventfd_read(), why disable system call restart here?
> 
> > +		if (error == 0)
> > +			goto retry;
> > +	}
> > +	if (error == 0) {
> > +		efd->efd_count += count;
> > +		KNOTE_LOCKED(&efd->efd_sel.si_note, 0);
> > +		selwakeup(&efd->efd_sel);
> > +		wakeup(&efd->efd_count);
> > +	}
> > +	mtx_unlock(&efd->efd_lock);
> > +
> > +	return (error);
> > +}
> > +
> > +static int
> > +eventfd_poll(struct file *fp, int events, struct ucred *active_cred,
> > +	struct thread *td)
> > +{
> > +	struct eventfd *efd;
> > +	int revents = 0;
> > +
> > +	efd = fp->f_data;
> > +	if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
> > +		return (POLLERR);
> > +
> > +	mtx_lock(&efd->efd_lock);
> > +	if (efd->efd_count == UINT64_MAX)
> > +		revents |= events & POLLERR;
> 
> POLLERR should be returned regardless of whether it is set in events.
> 

indeed.

> > +	if ((events & (POLLIN|POLLRDNORM)) && efd->efd_count > 0)
> > +		revents |= events & (POLLIN|POLLRDNORM);
> > +	if ((events & (POLLOUT|POLLWRNORM)) && UINT64_MAX - 1 > efd->efd_count)
> > +		revents |= events & (POLLOUT|POLLWRNORM);
> > +	if (revents == 0)
> > +		selrecord(td, &efd->efd_sel);
> > +	mtx_unlock(&efd->efd_lock);
> > +
> > +	return (revents);
> > +}
> > [snip]
> > +/*ARGSUSED*/
> > +static int
> > +filt_eventfdread(struct knote *kn, long hint)
> > +{
> > +	struct eventfd *efd = kn->kn_hook;
> > +	int ret;
> > +
> > +	mtx_assert(&efd->efd_lock, MA_OWNED);
> > +	ret = (efd->efd_count > 0);
> > +
> > +	return (ret);
> > +}
> > +
> > +/*ARGSUSED*/
> > +static int
> > +filt_eventfdwrite(struct knote *kn, long hint)
> > +{
> > +	struct eventfd *efd = kn->kn_hook;
> > +	int ret;
> > +
> > +	mtx_assert(&efd->efd_lock, MA_OWNED);
> > +	ret = (UINT64_MAX - 1 > efd->efd_count);
> 
> Perhaps set EV_EOF or EV_ERROR on overflow, so the epoll emulation can
> reconstruct POLLERR.
> 
> Note, though, that sockets set EV_EOF when the corresponding direction
> is closed, while POLLHUP and POLLERR should only be returned if both
> directions are closed (otherwise, applications cannot properly finish
> the other direction when one direction is closed).
> 
> > +
> > +	return (ret);
> > +}
> > [snip]
> > +/*ARGSUSED*/
> > +static int
> > +eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
> > +	struct thread *td)
> > +{
> > +
> > +	return (ENXIO);
> > +}
> 
> Linux appears to permit fstat(), though the returned data is rather
> meaningless.
> 

no, did not find this in linux code.

> > [snip]
> 
> -- 
> Jilles Tjoelker

Thank you Jilles, glad to see that someone is interested.

-- 
Have fun!
chd
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 181 bytes
Desc: not available
URL: <http://lists.freebsd.org/pipermail/svn-src-user/attachments/20140519/eb3bf70d/attachment.sig>


More information about the svn-src-user mailing list