svn commit: r283441 - in head/sys: amd64/linux amd64/linux32 compat/linux conf i386/linux modules/linux modules/linux64
Julian Elischer
julian at freebsd.org
Mon May 25 02:42:18 UTC 2015
On 5/25/15 12:41 AM, Dmitry Chagin wrote:
> Author: dchagin
> Date: Sun May 24 16:41:39 2015
> New Revision: 283441
> URL: https://svnweb.freebsd.org/changeset/base/283441
>
> Log:
> Implement epoll family system calls. This is a tiny wrapper
> around kqueue() to implement epoll subset of functionality.
> The kqueue user data are 32bit on i386 which is not enough for
> epoll user data, so we keep user data in the proc emuldata.
Have you considered making the in-kernel representation just have more
room?
>
> Initial patch developed by rdivacky@ in 2007, then extended
> by Yuri Victorovich @ r255672 and finished by me
> in collaboration with mjg@ and jillies at .
>
> Differential Revision: https://reviews.freebsd.org/D1092
>
> Added:
> head/sys/compat/linux/linux_event.c (contents, props changed)
> head/sys/compat/linux/linux_event.h (contents, props changed)
> Modified:
> head/sys/amd64/linux/linux_dummy.c
> head/sys/amd64/linux/syscalls.master
> head/sys/amd64/linux32/linux32_dummy.c
> head/sys/amd64/linux32/syscalls.master
> head/sys/compat/linux/linux_emul.c
> head/sys/compat/linux/linux_emul.h
> head/sys/compat/linux/linux_util.c
> head/sys/compat/linux/linux_util.h
> head/sys/conf/files.amd64
> head/sys/conf/files.i386
> head/sys/conf/files.pc98
> head/sys/i386/linux/linux_dummy.c
> head/sys/i386/linux/syscalls.master
> head/sys/modules/linux/Makefile
> head/sys/modules/linux64/Makefile
>
> Modified: head/sys/amd64/linux/linux_dummy.c
> ==============================================================================
> --- head/sys/amd64/linux/linux_dummy.c Sun May 24 16:36:29 2015 (r283440)
> +++ head/sys/amd64/linux/linux_dummy.c Sun May 24 16:41:39 2015 (r283441)
> @@ -69,13 +69,10 @@ DUMMY(tuxcall);
> DUMMY(security);
> DUMMY(set_thread_area);
> DUMMY(lookup_dcookie);
> -DUMMY(epoll_create);
> DUMMY(epoll_ctl_old);
> DUMMY(epoll_wait_old);
> DUMMY(remap_file_pages);
> DUMMY(semtimedop);
> -DUMMY(epoll_ctl);
> -DUMMY(epoll_wait);
> DUMMY(mbind);
> DUMMY(get_mempolicy);
> DUMMY(set_mempolicy);
> @@ -112,7 +109,6 @@ DUMMY(timerfd_settime);
> DUMMY(timerfd_gettime);
> DUMMY(signalfd4);
> DUMMY(eventfd2);
> -DUMMY(epoll_create1);
> DUMMY(inotify_init1);
> DUMMY(preadv);
> DUMMY(pwritev);
>
> Modified: head/sys/amd64/linux/syscalls.master
> ==============================================================================
> --- head/sys/amd64/linux/syscalls.master Sun May 24 16:36:29 2015 (r283440)
> +++ head/sys/amd64/linux/syscalls.master Sun May 24 16:41:39 2015 (r283441)
> @@ -373,7 +373,7 @@
> 210 AUE_NULL UNIMPL linux_io_cancel
> 211 AUE_NULL UNIMPL linux_get_thread_area
> 212 AUE_NULL STD { int linux_lookup_dcookie(void); }
> -213 AUE_NULL STD { int linux_epoll_create(void); }
> +213 AUE_NULL STD { int linux_epoll_create(l_int size); }
> 214 AUE_NULL STD { int linux_epoll_ctl_old(void); }
> 215 AUE_NULL STD { int linux_epoll_wait_old(void); }
> 216 AUE_NULL STD { int linux_remap_file_pages(void); }
> @@ -397,8 +397,10 @@
> 230 AUE_NULL STD { int linux_clock_nanosleep(clockid_t which, int flags, \
> struct l_timespec *rqtp, struct l_timespec *rmtp); }
> 231 AUE_EXIT STD { int linux_exit_group(int error_code); }
> -232 AUE_NULL STD { int linux_epoll_wait(void); }
> -233 AUE_NULL STD { int linux_epoll_ctl(void); }
> +232 AUE_NULL STD { int linux_epoll_wait(l_int epfd, struct epoll_event *events, \
> + l_int maxevents, l_int timeout); }
> +233 AUE_NULL STD { int linux_epoll_ctl(l_int epfd, l_int op, l_int fd, \
> + struct epoll_event *event); }
> 234 AUE_NULL STD { int linux_tgkill(int tgid, int pid, int sig); }
> 235 AUE_UTIMES STD { int linux_utimes(char *fname, \
> struct l_timeval *tptr); }
> @@ -466,7 +468,8 @@
> 278 AUE_NULL STD { int linux_vmsplice(void); }
> 279 AUE_NULL STD { int linux_move_pages(void); }
> 280 AUE_NULL STD { int linux_utimensat(void); }
> -281 AUE_NULL STD { int linux_epoll_pwait(void); }
> +281 AUE_NULL STD { int linux_epoll_pwait(l_int epfd, struct epoll_event *events, \
> + l_int maxevents, l_int timeout, l_sigset_t *mask); }
> 282 AUE_NULL STD { int linux_signalfd(void); }
> 283 AUE_NULL STD { int linux_timerfd(void); }
> 284 AUE_NULL STD { int linux_eventfd(void); }
> @@ -477,7 +480,7 @@
> l_uintptr_t namelen, int flags); }
> 289 AUE_NULL STD { int linux_signalfd4(void); }
> 290 AUE_NULL STD { int linux_eventfd2(void); }
> -291 AUE_NULL STD { int linux_epoll_create1(void); }
> +291 AUE_NULL STD { int linux_epoll_create1(l_int flags); }
> 292 AUE_NULL STD { int linux_dup3(l_int oldfd, \
> l_int newfd, l_int flags); }
> 293 AUE_NULL STD { int linux_pipe2(l_int *pipefds, l_int flags); }
>
> Modified: head/sys/amd64/linux32/linux32_dummy.c
> ==============================================================================
> --- head/sys/amd64/linux32/linux32_dummy.c Sun May 24 16:36:29 2015 (r283440)
> +++ head/sys/amd64/linux32/linux32_dummy.c Sun May 24 16:41:39 2015 (r283441)
> @@ -68,9 +68,6 @@ DUMMY(pivot_root);
> DUMMY(mincore);
> DUMMY(ptrace);
> DUMMY(lookup_dcookie);
> -DUMMY(epoll_create);
> -DUMMY(epoll_ctl);
> -DUMMY(epoll_wait);
> DUMMY(remap_file_pages);
> DUMMY(fstatfs64);
> DUMMY(mbind);
> @@ -120,7 +117,6 @@ DUMMY(timerfd_gettime);
> /* linux 2.6.27: */
> DUMMY(signalfd4);
> DUMMY(eventfd2);
> -DUMMY(epoll_create1);
> DUMMY(inotify_init1);
> /* linux 2.6.30: */
> DUMMY(preadv);
>
> Modified: head/sys/amd64/linux32/syscalls.master
> ==============================================================================
> --- head/sys/amd64/linux32/syscalls.master Sun May 24 16:36:29 2015 (r283440)
> +++ head/sys/amd64/linux32/syscalls.master Sun May 24 16:41:39 2015 (r283441)
> @@ -430,9 +430,11 @@
> 251 AUE_NULL UNIMPL
> 252 AUE_EXIT STD { int linux_exit_group(int error_code); }
> 253 AUE_NULL STD { int linux_lookup_dcookie(void); }
> -254 AUE_NULL STD { int linux_epoll_create(void); }
> -255 AUE_NULL STD { int linux_epoll_ctl(void); }
> -256 AUE_NULL STD { int linux_epoll_wait(void); }
> +254 AUE_NULL STD { int linux_epoll_create(l_int size); }
> +255 AUE_NULL STD { int linux_epoll_ctl(l_int epfd, l_int op, l_int fd, \
> + struct epoll_event *event); }
> +256 AUE_NULL STD { int linux_epoll_wait(l_int epfd, struct epoll_event *events, \
> + l_int maxevents, l_int timeout); }
> 257 AUE_NULL STD { int linux_remap_file_pages(void); }
> 258 AUE_NULL STD { int linux_set_tid_address(int *tidptr); }
> 259 AUE_NULL STD { int linux_timer_create(clockid_t clock_id, \
> @@ -527,7 +529,8 @@
> 317 AUE_NULL STD { int linux_move_pages(void); }
> ; linux 2.6.19:
> 318 AUE_NULL STD { int linux_getcpu(void); }
> -319 AUE_NULL STD { int linux_epoll_pwait(void); }
> +319 AUE_NULL STD { int linux_epoll_pwait(l_int epfd, struct epoll_event *events, \
> + l_int maxevents, l_int timeout, l_osigset_t *mask); }
> ; linux 2.6.22:
> 320 AUE_NULL STD { int linux_utimensat(void); }
> 321 AUE_NULL STD { int linux_signalfd(void); }
> @@ -541,7 +544,7 @@
> ; linux 2.6.27:
> 327 AUE_NULL STD { int linux_signalfd4(void); }
> 328 AUE_NULL STD { int linux_eventfd2(void); }
> -329 AUE_NULL STD { int linux_epoll_create1(void); }
> +329 AUE_NULL STD { int linux_epoll_create1(l_int flags); }
> 330 AUE_NULL STD { int linux_dup3(l_int oldfd, \
> l_int newfd, l_int flags); }
> 331 AUE_NULL STD { int linux_pipe2(l_int *pipefds, l_int flags); }
>
> Modified: head/sys/compat/linux/linux_emul.c
> ==============================================================================
> --- head/sys/compat/linux/linux_emul.c Sun May 24 16:36:29 2015 (r283440)
> +++ head/sys/compat/linux/linux_emul.c Sun May 24 16:41:39 2015 (r283441)
> @@ -42,8 +42,6 @@ __FBSDID("$FreeBSD$");
> #include <sys/proc.h>
> #include <sys/syscallsubr.h>
> #include <sys/sysent.h>
> -#include <sys/sysproto.h>
> -#include <sys/unistd.h>
>
> #include <compat/linux/linux_emul.h>
> #include <compat/linux/linux_misc.h>
> @@ -86,6 +84,7 @@ linux_proc_init(struct thread *td, struc
> {
> struct linux_emuldata *em;
> struct linux_pemuldata *pem;
> + struct epoll_emuldata *emd;
>
> if (newtd != NULL) {
> /* non-exec call */
> @@ -93,8 +92,13 @@ linux_proc_init(struct thread *td, struc
> em->pdeath_signal = 0;
> em->robust_futexes = NULL;
> if (flags & LINUX_CLONE_THREAD) {
> + LINUX_CTR1(proc_init, "thread newtd(%d)",
> + newtd->td_tid);
> +
> em->em_tid = newtd->td_tid;
> } else {
> + LINUX_CTR1(proc_init, "fork newtd(%d)",
> + newtd->td_proc->p_pid);
>
> em->em_tid = newtd->td_proc->p_pid;
>
> @@ -105,12 +109,24 @@ linux_proc_init(struct thread *td, struc
> newtd->td_emuldata = em;
> } else {
> /* exec */
> + LINUX_CTR1(proc_init, "exec newtd(%d)",
> + td->td_proc->p_pid);
>
> /* lookup the old one */
> em = em_find(td);
> KASSERT(em != NULL, ("proc_init: emuldata not found in exec case.\n"));
>
> em->em_tid = td->td_proc->p_pid;
> +
> + /* epoll should be destroyed in a case of exec. */
> + pem = pem_find(td->td_proc);
> + KASSERT(pem != NULL, ("proc_exit: proc emuldata not found.\n"));
> +
> + if (pem->epoll != NULL) {
> + emd = pem->epoll;
> + pem->epoll = NULL;
> + free(emd, M_EPOLL);
> + }
> }
>
> em->child_clear_tid = NULL;
> @@ -121,6 +137,7 @@ void
> linux_proc_exit(void *arg __unused, struct proc *p)
> {
> struct linux_pemuldata *pem;
> + struct epoll_emuldata *emd;
> struct thread *td = curthread;
>
> if (__predict_false(SV_CURPROC_ABI() != SV_ABI_LINUX))
> @@ -133,6 +150,12 @@ linux_proc_exit(void *arg __unused, stru
>
> p->p_emuldata = NULL;
>
> + if (pem->epoll != NULL) {
> + emd = pem->epoll;
> + pem->epoll = NULL;
> + free(emd, M_EPOLL);
> + }
> +
> sx_destroy(&pem->pem_sx);
> free(pem, M_LINUX);
> }
> @@ -141,6 +164,7 @@ int
> linux_common_execve(struct thread *td, struct image_args *eargs)
> {
> struct linux_pemuldata *pem;
> + struct epoll_emuldata *emd;
> struct linux_emuldata *em;
> struct proc *p;
> int error;
> @@ -180,6 +204,12 @@ linux_common_execve(struct thread *td, s
> p->p_emuldata = NULL;
> PROC_UNLOCK(p);
>
> + if (pem->epoll != NULL) {
> + emd = pem->epoll;
> + pem->epoll = NULL;
> + free(emd, M_EPOLL);
> + }
> +
> free(em, M_TEMP);
> free(pem, M_LINUX);
> }
> @@ -197,6 +227,7 @@ linux_proc_exec(void *arg __unused, stru
> */
> if (__predict_false((imgp->sysent->sv_flags & SV_ABI_MASK) ==
> SV_ABI_LINUX)) {
> +
> if (SV_PROC_ABI(p) == SV_ABI_LINUX)
> linux_proc_init(td, NULL, 0);
> else
>
> Modified: head/sys/compat/linux/linux_emul.h
> ==============================================================================
> --- head/sys/compat/linux/linux_emul.h Sun May 24 16:36:29 2015 (r283440)
> +++ head/sys/compat/linux/linux_emul.h Sun May 24 16:41:39 2015 (r283441)
> @@ -60,9 +60,12 @@ int linux_common_execve(struct thread *,
> /* process emuldata flags */
> #define LINUX_XDEPR_REQUEUEOP 0x00000001 /* uses deprecated
> futex REQUEUE op*/
> +#define LINUX_XUNSUP_EPOLL 0x00000002 /* unsupported epoll events */
> +
> struct linux_pemuldata {
> uint32_t flags; /* process emuldata flags */
> struct sx pem_sx; /* lock for this struct */
> + void *epoll; /* epoll data */
> };
>
> #define LINUX_PEM_XLOCK(p) sx_xlock(&(p)->pem_sx)
>
> Added: head/sys/compat/linux/linux_event.c
> ==============================================================================
> --- /dev/null 00:00:00 1970 (empty, because file is newly added)
> +++ head/sys/compat/linux/linux_event.c Sun May 24 16:41:39 2015 (r283441)
> @@ -0,0 +1,500 @@
> +/*-
> + * Copyright (c) 2007 Roman Divacky
> + * Copyright (c) 2014 Dmitry Chagin
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in the
> + * documentation and/or other materials provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
> + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + */
> +
> +#include <sys/cdefs.h>
> +__FBSDID("$FreeBSD$");
> +
> +#include "opt_compat.h"
> +
> +#include <sys/param.h>
> +#include <sys/systm.h>
> +#include <sys/imgact.h>
> +#include <sys/kernel.h>
> +#include <sys/limits.h>
> +#include <sys/lock.h>
> +#include <sys/mutex.h>
> +#include <sys/capability.h>
> +#include <sys/types.h>
> +#include <sys/file.h>
> +#include <sys/filedesc.h>
> +#include <sys/errno.h>
> +#include <sys/event.h>
> +#include <sys/proc.h>
> +#include <sys/sx.h>
> +#include <sys/syscallsubr.h>
> +#include <sys/timespec.h>
> +
> +#ifdef COMPAT_LINUX32
> +#include <machine/../linux32/linux.h>
> +#include <machine/../linux32/linux32_proto.h>
> +#else
> +#include <machine/../linux/linux.h>
> +#include <machine/../linux/linux_proto.h>
> +#endif
> +
> +#include <compat/linux/linux_emul.h>
> +#include <compat/linux/linux_event.h>
> +#include <compat/linux/linux_file.h>
> +#include <compat/linux/linux_util.h>
> +
> +/*
> + * epoll defines 'struct epoll_event' with the field 'data' as 64 bits
> + * on all architectures. But on 32 bit architectures BSD 'struct kevent' only
> + * has 32 bit opaque pointer as 'udata' field. So we can't pass epoll supplied
> + * data verbatuim. Therefore we allocate 64-bit memory block to pass
> + * user supplied data for every file descriptor.
> + */
> +
> +typedef uint64_t epoll_udata_t;
> +
> +struct epoll_emuldata {
> + uint32_t fdc; /* epoll udata max index */
> + epoll_udata_t udata[1]; /* epoll user data vector */
> +};
> +
> +#define EPOLL_DEF_SZ 16
> +#define EPOLL_SIZE(fdn) \
> + (sizeof(struct epoll_emuldata)+(fdn) * sizeof(epoll_udata_t))
> +
> +struct epoll_event {
> + uint32_t events;
> + epoll_udata_t data;
> +}
> +#if defined(__amd64__)
> +__attribute__((packed))
> +#endif
> +;
> +
> +#define LINUX_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
> +
> +static void epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata);
> +static int epoll_to_kevent(struct thread *td, struct file *epfp,
> + int fd, struct epoll_event *l_event, int *kev_flags,
> + struct kevent *kevent, int *nkevents);
> +static void kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event);
> +static int epoll_kev_copyout(void *arg, struct kevent *kevp, int count);
> +static int epoll_kev_copyin(void *arg, struct kevent *kevp, int count);
> +static int epoll_delete_event(struct thread *td, struct file *epfp,
> + int fd, int filter);
> +static int epoll_delete_all_events(struct thread *td, struct file *epfp,
> + int fd);
> +
> +struct epoll_copyin_args {
> + struct kevent *changelist;
> +};
> +
> +struct epoll_copyout_args {
> + struct epoll_event *leventlist;
> + struct proc *p;
> + uint32_t count;
> + int error;
> +};
> +
> +
> +static void
> +epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata)
> +{
> + struct linux_pemuldata *pem;
> + struct epoll_emuldata *emd;
> + struct proc *p;
> +
> + p = td->td_proc;
> +
> + pem = pem_find(p);
> + KASSERT(pem != NULL, ("epoll proc emuldata not found.\n"));
> +
> + LINUX_PEM_XLOCK(pem);
> + if (pem->epoll == NULL) {
> + emd = malloc(EPOLL_SIZE(fd), M_EPOLL, M_WAITOK);
> + emd->fdc = fd;
> + pem->epoll = emd;
> + } else {
> + emd = pem->epoll;
> + if (fd > emd->fdc) {
> + emd = realloc(emd, EPOLL_SIZE(fd), M_EPOLL, M_WAITOK);
> + emd->fdc = fd;
> + pem->epoll = emd;
> + }
> + }
> + emd->udata[fd] = udata;
> + LINUX_PEM_XUNLOCK(pem);
> +}
> +
> +static int
> +epoll_create_common(struct thread *td, int flags)
> +{
> + int error;
> +
> + error = kern_kqueue(td, flags);
> + if (error)
> + return (error);
> +
> + epoll_fd_install(td, EPOLL_DEF_SZ, 0);
> +
> + return (0);
> +}
> +
> +int
> +linux_epoll_create(struct thread *td, struct linux_epoll_create_args *args)
> +{
> +
> + /*
> + * args->size is unused. Linux just tests it
> + * and then forgets it as well.
> + */
> + if (args->size <= 0)
> + return (EINVAL);
> +
> + return (epoll_create_common(td, 0));
> +}
> +
> +int
> +linux_epoll_create1(struct thread *td, struct linux_epoll_create1_args *args)
> +{
> + int flags;
> +
> + if ((args->flags & ~(LINUX_O_CLOEXEC)) != 0)
> + return (EINVAL);
> +
> + flags = 0;
> + if ((args->flags & LINUX_O_CLOEXEC) != 0)
> + flags |= O_CLOEXEC;
> +
> + return (epoll_create_common(td, flags));
> +}
> +
> +/* Structure converting function from epoll to kevent. */
> +static int
> +epoll_to_kevent(struct thread *td, struct file *epfp,
> + int fd, struct epoll_event *l_event, int *kev_flags,
> + struct kevent *kevent, int *nkevents)
> +{
> + uint32_t levents = l_event->events;
> + struct linux_pemuldata *pem;
> + struct proc *p;
> +
> + /* flags related to how event is registered */
> + if ((levents & LINUX_EPOLLONESHOT) != 0)
> + *kev_flags |= EV_ONESHOT;
> + if ((levents & LINUX_EPOLLET) != 0)
> + *kev_flags |= EV_CLEAR;
> +
> + /* flags related to what event is registered */
> + if ((levents & LINUX_EPOLL_EVRD) != 0) {
> + EV_SET(kevent++, fd, EVFILT_READ, *kev_flags, 0, 0, 0);
> + ++(*nkevents);
> + }
> + if ((levents & LINUX_EPOLL_EVWR) != 0) {
> + EV_SET(kevent++, fd, EVFILT_WRITE, *kev_flags, 0, 0, 0);
> + ++(*nkevents);
> + }
> +
> + if ((levents & ~(LINUX_EPOLL_EVSUP)) != 0) {
> + p = td->td_proc;
> +
> + pem = pem_find(p);
> + KASSERT(pem != NULL, ("epoll proc emuldata not found.\n"));
> + KASSERT(pem->epoll != NULL, ("epoll proc epolldata not found.\n"));
> +
> + LINUX_PEM_XLOCK(pem);
> + if ((pem->flags & LINUX_XUNSUP_EPOLL) == 0) {
> + pem->flags |= LINUX_XUNSUP_EPOLL;
> + LINUX_PEM_XUNLOCK(pem);
> + linux_msg(td, "epoll_ctl unsupported flags: 0x%x\n",
> + levents);
> + } else
> + LINUX_PEM_XUNLOCK(pem);
> + return (EINVAL);
> + }
> +
> + return (0);
> +}
> +
> +/*
> + * Structure converting function from kevent to epoll. In a case
> + * this is called on error in registration we store the error in
> + * event->data and pick it up later in linux_epoll_ctl().
> + */
> +static void
> +kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event)
> +{
> +
> + if ((kevent->flags & EV_ERROR) != 0)
> + return;
> +
> + switch (kevent->filter) {
> + case EVFILT_READ:
> + l_event->events = LINUX_EPOLLIN|LINUX_EPOLLRDNORM|LINUX_EPOLLPRI;
> + break;
> + case EVFILT_WRITE:
> + l_event->events = LINUX_EPOLLOUT|LINUX_EPOLLWRNORM;
> + break;
> + }
> +}
> +
> +/*
> + * Copyout callback used by kevent. This converts kevent
> + * events to epoll events and copies them back to the
> + * userspace. This is also called on error on registering
> + * of the filter.
> + */
> +static int
> +epoll_kev_copyout(void *arg, struct kevent *kevp, int count)
> +{
> + struct epoll_copyout_args *args;
> + struct linux_pemuldata *pem;
> + struct epoll_emuldata *emd;
> + struct epoll_event *eep;
> + int error, fd, i;
> +
> + args = (struct epoll_copyout_args*) arg;
> + eep = malloc(sizeof(*eep) * count, M_EPOLL, M_WAITOK | M_ZERO);
> +
> + pem = pem_find(args->p);
> + KASSERT(pem != NULL, ("epoll proc emuldata not found.\n"));
> + LINUX_PEM_SLOCK(pem);
> + emd = pem->epoll;
> + KASSERT(emd != NULL, ("epoll proc epolldata not found.\n"));
> +
> + for (i = 0; i < count; i++) {
> + kevent_to_epoll(&kevp[i], &eep[i]);
> +
> + fd = kevp[i].ident;
> + KASSERT(fd <= emd->fdc, ("epoll user data vector"
> + " is too small.\n"));
> + eep[i].data = emd->udata[fd];
> + }
> + LINUX_PEM_SUNLOCK(pem);
> +
> + error = copyout(eep, args->leventlist, count * sizeof(*eep));
> + if (error == 0) {
> + args->leventlist += count;
> + args->count += count;
> + } else if (args->error == 0)
> + args->error = error;
> +
> + free(eep, M_EPOLL);
> + return (error);
> +}
> +
> +/*
> + * Copyin callback used by kevent. This copies already
> + * converted filters from kernel memory to the kevent
> + * internal kernel memory. Hence the memcpy instead of
> + * copyin.
> + */
> +static int
> +epoll_kev_copyin(void *arg, struct kevent *kevp, int count)
> +{
> + struct epoll_copyin_args *args;
> +
> + args = (struct epoll_copyin_args*) arg;
> +
> + memcpy(kevp, args->changelist, count * sizeof(*kevp));
> + args->changelist += count;
> +
> + return (0);
> +}
> +
> +/*
> + * Load epoll filter, convert it to kevent filter
> + * and load it into kevent subsystem.
> + */
> +int
> +linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args)
> +{
> + struct file *epfp, *fp;
> + struct epoll_copyin_args ciargs;
> + struct kevent kev[2];
> + struct kevent_copyops k_ops = { &ciargs,
> + NULL,
> + epoll_kev_copyin};
> + struct epoll_event le;
> + cap_rights_t rights;
> + int kev_flags;
> + int nchanges = 0;
> + int error;
> +
> + if (args->op != LINUX_EPOLL_CTL_DEL) {
> + error = copyin(args->event, &le, sizeof(le));
> + if (error != 0)
> + return (error);
> + }
> +
> + error = fget(td, args->epfd,
> + cap_rights_init(&rights, CAP_KQUEUE_CHANGE), &epfp);
> + if (error != 0)
> + return (error);
> + if (epfp->f_type != DTYPE_KQUEUE)
> + goto leave1;
> +
> + /* Protect user data vector from incorrectly supplied fd. */
> + error = fget(td, args->fd, cap_rights_init(&rights, CAP_POLL_EVENT), &fp);
> + if (error != 0)
> + goto leave1;
> +
> + /* Linux disallows spying on himself */
> + if (epfp == fp) {
> + error = EINVAL;
> + goto leave0;
> + }
> +
> + ciargs.changelist = kev;
> +
> + switch (args->op) {
> + case LINUX_EPOLL_CTL_MOD:
> + /*
> + * We don't memorize which events were set for this FD
> + * on this level, so just delete all we could have set:
> + * EVFILT_READ and EVFILT_WRITE, ignoring any errors
> + */
> + error = epoll_delete_all_events(td, epfp, args->fd);
> + if (error)
> + goto leave0;
> + /* FALLTHROUGH */
> +
> + case LINUX_EPOLL_CTL_ADD:
> + kev_flags = EV_ADD | EV_ENABLE;
> + break;
> +
> + case LINUX_EPOLL_CTL_DEL:
> + /* CTL_DEL means unregister this fd with this epoll */
> + error = epoll_delete_all_events(td, epfp, args->fd);
> + goto leave0;
> +
> + default:
> + error = EINVAL;
> + goto leave0;
> + }
> +
> + error = epoll_to_kevent(td, epfp, args->fd, &le, &kev_flags,
> + kev, &nchanges);
> + if (error)
> + goto leave0;
> +
> + epoll_fd_install(td, args->fd, le.data);
> +
> + error = kern_kevent_fp(td, epfp, nchanges, 0, &k_ops, NULL);
> +
> +leave0:
> + fdrop(fp, td);
> +
> +leave1:
> + fdrop(epfp, td);
> + return (error);
> +}
> +
> +/*
> + * Wait for a filter to be triggered on the epoll file descriptor.
> + */
> +int
> +linux_epoll_wait(struct thread *td, struct linux_epoll_wait_args *args)
> +{
> + struct file *epfp;
> + struct timespec ts, *tsp;
> + cap_rights_t rights;
> + struct epoll_copyout_args coargs;
> + struct kevent_copyops k_ops = { &coargs,
> + epoll_kev_copyout,
> + NULL};
> + int error;
> +
> + if (args->maxevents <= 0 || args->maxevents > LINUX_MAX_EVENTS)
> + return (EINVAL);
> +
> + error = fget(td, args->epfd,
> + cap_rights_init(&rights, CAP_KQUEUE_EVENT), &epfp);
> + if (error != 0)
> + return (error);
> +
> + coargs.leventlist = args->events;
> + coargs.p = td->td_proc;
> + coargs.count = 0;
> + coargs.error = 0;
> +
> + if (args->timeout != -1) {
> + if (args->timeout < 0) {
> + error = EINVAL;
> + goto leave;
> + }
> + /* Convert from milliseconds to timespec. */
> + ts.tv_sec = args->timeout / 1000;
> + ts.tv_nsec = (args->timeout % 1000) * 1000000;
> + tsp = &ts;
> + } else {
> + tsp = NULL;
> + }
> +
> + error = kern_kevent_fp(td, epfp, 0, args->maxevents, &k_ops, tsp);
> + if (error == 0 && coargs.error != 0)
> + error = coargs.error;
> +
> + /*
> + * kern_kevent might return ENOMEM which is not expected from epoll_wait.
> + * Maybe we should translate that but I don't think it matters at all.
> + */
> + if (error == 0)
> + td->td_retval[0] = coargs.count;
> +leave:
> + fdrop(epfp, td);
> + return (error);
> +}
> +
> +static int
> +epoll_delete_event(struct thread *td, struct file *epfp, int fd, int filter)
> +{
> + struct epoll_copyin_args ciargs;
> + struct kevent kev;
> + struct kevent_copyops k_ops = { &ciargs,
> + NULL,
> + epoll_kev_copyin};
> + int error;
> +
> + ciargs.changelist = &kev;
> + EV_SET(&kev, fd, filter, EV_DELETE | EV_DISABLE, 0, 0, 0);
> +
> + error = kern_kevent_fp(td, epfp, 1, 0, &k_ops, NULL);
> +
> + /*
> + * here we ignore ENONT, because we don't keep track of events here
> + */
> + if (error == ENOENT)
> + error = 0;
> + return (error);
> +}
> +
> +static int
> +epoll_delete_all_events(struct thread *td, struct file *epfp, int fd)
> +{
> + int error1, error2;
> +
> + error1 = epoll_delete_event(td, epfp, fd, EVFILT_READ);
> + error2 = epoll_delete_event(td, epfp, fd, EVFILT_WRITE);
> +
> + /* report any errors we got */
> + return (error1 == 0 ? error2 : error1);
> +}
>
> Added: head/sys/compat/linux/linux_event.h
> ==============================================================================
> --- /dev/null 00:00:00 1970 (empty, because file is newly added)
> +++ head/sys/compat/linux/linux_event.h Sun May 24 16:41:39 2015 (r283441)
> @@ -0,0 +1,58 @@
> +/*-
> + * Copyright (c) 2007 Roman Divacky
> + * Copyright (c) 2014 Dmitry Chagin
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in the
> + * documentation and/or other materials provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
> + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + *
> + * $FreeBSD$
> + */
> +
> +#ifndef _LINUX_EVENT_H_
> +#define _LINUX_EVENT_H_
> +
> +#define LINUX_EPOLLIN 0x001
> +#define LINUX_EPOLLPRI 0x002
> +#define LINUX_EPOLLOUT 0x004
> +#define LINUX_EPOLLRDNORM 0x040
> +#define LINUX_EPOLLRDBAND 0x080
> +#define LINUX_EPOLLWRNORM 0x100
> +#define LINUX_EPOLLWRBAND 0x200
> +#define LINUX_EPOLLMSG 0x400
> +#define LINUX_EPOLLERR 0x008
> +#define LINUX_EPOLLHUP 0x010
> +#define LINUX_EPOLLRDHUP 0x2000
> +#define LINUX_EPOLLWAKEUP 1u<<29
> +#define LINUX_EPOLLONESHOT 1u<<30
> +#define LINUX_EPOLLET 1u<<31
> +
> +#define LINUX_EPOLL_EVRD (LINUX_EPOLLIN|LINUX_EPOLLRDNORM \
> + |LINUX_EPOLLHUP|LINUX_EPOLLPRI)
> +#define LINUX_EPOLL_EVWR (LINUX_EPOLLOUT|LINUX_EPOLLWRNORM)
> +#define LINUX_EPOLL_EVSUP (LINUX_EPOLLET|LINUX_EPOLLONESHOT \
> + |LINUX_EPOLL_EVRD|LINUX_EPOLL_EVWR)
> +
> +#define LINUX_EPOLL_CTL_ADD 1
> +#define LINUX_EPOLL_CTL_DEL 2
> +#define LINUX_EPOLL_CTL_MOD 3
> +
> +#endif /* !_LINUX_EVENT_H_ */
>
> Modified: head/sys/compat/linux/linux_util.c
> ==============================================================================
> --- head/sys/compat/linux/linux_util.c Sun May 24 16:36:29 2015 (r283440)
> +++ head/sys/compat/linux/linux_util.c Sun May 24 16:41:39 2015 (r283441)
> @@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
> #include <compat/linux/linux_util.h>
>
> MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
> +MALLOC_DEFINE(M_EPOLL, "lepoll", "Linux events structures");
> MALLOC_DEFINE(M_FUTEX, "futex", "Linux futexes");
> MALLOC_DEFINE(M_FUTEX_WP, "futex wp", "Linux futex waiting proc");
>
>
> Modified: head/sys/compat/linux/linux_util.h
> ==============================================================================
> --- head/sys/compat/linux/linux_util.h Sun May 24 16:36:29 2015 (r283440)
> +++ head/sys/compat/linux/linux_util.h Sun May 24 16:41:39 2015 (r283441)
> @@ -45,6 +45,7 @@
> #include <sys/uio.h>
>
> MALLOC_DECLARE(M_LINUX);
> +MALLOC_DECLARE(M_EPOLL);
> MALLOC_DECLARE(M_FUTEX);
> MALLOC_DECLARE(M_FUTEX_WP);
>
>
> Modified: head/sys/conf/files.amd64
> ==============================================================================
> --- head/sys/conf/files.amd64 Sun May 24 16:36:29 2015 (r283440)
> +++ head/sys/conf/files.amd64 Sun May 24 16:41:39 2015 (r283441)
> @@ -509,6 +509,7 @@ compat/linux/linux_uid16.c optional comp
> compat/linux/linux_util.c optional compat_linux32
> compat/linux/linux_vdso.c optional compat_linux32
> compat/linux/linux_common.c optional compat_linux32
> +compat/linux/linux_event.c optional compat_linux32
> dev/amr/amr_linux.c optional compat_linux32 amr
> dev/mfi/mfi_linux.c optional compat_linux32 mfi
> #
>
> Modified: head/sys/conf/files.i386
> ==============================================================================
> --- head/sys/conf/files.i386 Sun May 24 16:36:29 2015 (r283440)
> +++ head/sys/conf/files.i386 Sun May 24 16:41:39 2015 (r283441)
> @@ -81,6 +81,7 @@ hptrr_lib.o optional hptrr \
> cddl/contrib/opensolaris/common/atomic/i386/opensolaris_atomic.S optional zfs compile-with "${ZFS_S}"
> compat/linprocfs/linprocfs.c optional linprocfs
> compat/linsysfs/linsysfs.c optional linsysfs
> +compat/linux/linux_event.c optional compat_linux
> compat/linux/linux_emul.c optional compat_linux
> compat/linux/linux_file.c optional compat_linux
> compat/linux/linux_fork.c optional compat_linux
>
> Modified: head/sys/conf/files.pc98
> ==============================================================================
> --- head/sys/conf/files.pc98 Sun May 24 16:36:29 2015 (r283440)
> +++ head/sys/conf/files.pc98 Sun May 24 16:41:39 2015 (r283441)
> @@ -41,6 +41,7 @@ ukbdmap.h optional ukbd_dflt_keymap \
> cddl/contrib/opensolaris/common/atomic/i386/opensolaris_atomic.S optional zfs compile-with "${ZFS_S}"
> compat/linprocfs/linprocfs.c optional linprocfs
> compat/linsysfs/linsysfs.c optional linsysfs
> +compat/linux/linux_event.c optional compat_linux
> compat/linux/linux_emul.c optional compat_linux
> compat/linux/linux_file.c optional compat_linux
> compat/linux/linux_fork.c optional compat_linux
>
> Modified: head/sys/i386/linux/linux_dummy.c
> ==============================================================================
> --- head/sys/i386/linux/linux_dummy.c Sun May 24 16:36:29 2015 (r283440)
> +++ head/sys/i386/linux/linux_dummy.c Sun May 24 16:41:39 2015 (r283441)
> @@ -70,9 +70,6 @@ DUMMY(setfsgid);
> DUMMY(pivot_root);
> DUMMY(mincore);
> DUMMY(lookup_dcookie);
> -DUMMY(epoll_create);
> -DUMMY(epoll_ctl);
> -DUMMY(epoll_wait);
> DUMMY(remap_file_pages);
> DUMMY(fstatfs64);
> DUMMY(mbind);
> @@ -116,7 +113,6 @@ DUMMY(timerfd_gettime);
> /* linux 2.6.27: */
> DUMMY(signalfd4);
> DUMMY(eventfd2);
> -DUMMY(epoll_create1);
> DUMMY(inotify_init1);
> /* linux 2.6.30: */
> DUMMY(preadv);
>
> Modified: head/sys/i386/linux/syscalls.master
> ==============================================================================
> --- head/sys/i386/linux/syscalls.master Sun May 24 16:36:29 2015 (r283440)
> +++ head/sys/i386/linux/syscalls.master Sun May 24 16:41:39 2015 (r283441)
> @@ -432,9 +432,11 @@
> 251 AUE_NULL UNIMPL
> 252 AUE_EXIT STD { int linux_exit_group(int error_code); }
> 253 AUE_NULL STD { int linux_lookup_dcookie(void); }
> -254 AUE_NULL STD { int linux_epoll_create(void); }
> -255 AUE_NULL STD { int linux_epoll_ctl(void); }
> -256 AUE_NULL STD { int linux_epoll_wait(void); }
> +254 AUE_NULL STD { int linux_epoll_create(l_int size); }
> +255 AUE_NULL STD { int linux_epoll_ctl(l_int epfd, l_int op, l_int fd, \
> + struct epoll_event *event); }
> +256 AUE_NULL STD { int linux_epoll_wait(l_int epfd, struct epoll_event *events, \
> + l_int maxevents, l_int timeout); }
> 257 AUE_NULL STD { int linux_remap_file_pages(void); }
> 258 AUE_NULL STD { int linux_set_tid_address(int *tidptr); }
> 259 AUE_NULL STD { int linux_timer_create(clockid_t clock_id, \
> @@ -535,7 +537,8 @@
> 317 AUE_NULL STD { int linux_move_pages(void); }
> ; linux 2.6.19:
> 318 AUE_NULL STD { int linux_getcpu(void); }
> -319 AUE_NULL STD { int linux_epoll_pwait(void); }
> +319 AUE_NULL STD { int linux_epoll_pwait(l_int epfd, struct epoll_event *events, \
> + l_int maxevents, l_int timeout, l_osigset_t *mask); }
> ; linux 2.6.22:
> 320 AUE_NULL STD { int linux_utimensat(void); }
> 321 AUE_NULL STD { int linux_signalfd(void); }
> @@ -549,7 +552,7 @@
> ; linux 2.6.27:
> 327 AUE_NULL STD { int linux_signalfd4(void); }
> 328 AUE_NULL STD { int linux_eventfd2(void); }
> -329 AUE_NULL STD { int linux_epoll_create1(void); }
> +329 AUE_NULL STD { int linux_epoll_create1(l_int flags); }
> 330 AUE_NULL STD { int linux_dup3(l_int oldfd, \
> l_int newfd, l_int flags); }
> 331 AUE_NULL STD { int linux_pipe2(l_int *pipefds, l_int flags); }
>
> Modified: head/sys/modules/linux/Makefile
> ==============================================================================
> --- head/sys/modules/linux/Makefile Sun May 24 16:36:29 2015 (r283440)
> +++ head/sys/modules/linux/Makefile Sun May 24 16:41:39 2015 (r283441)
> @@ -10,7 +10,7 @@ CFLAGS+=-DCOMPAT_FREEBSD32 -DCOMPAT_LINU
> VDSO= linux${SFX}_vdso
>
> KMOD= linux
> -SRCS= linux_fork.c linux${SFX}_dummy.c linux_file.c \
> +SRCS= linux_fork.c linux${SFX}_dummy.c linux_file.c linux_event.c \
> linux_futex.c linux_getcwd.c linux_ioctl.c linux_ipc.c \
> linux${SFX}_machdep.c linux_misc.c linux_signal.c \
> linux_socket.c linux_stats.c linux_sysctl.c linux${SFX}_sysent.c \
>
> Modified: head/sys/modules/linux64/Makefile
> ==============================================================================
> --- head/sys/modules/linux64/Makefile Sun May 24 16:36:29 2015 (r283440)
> +++ head/sys/modules/linux64/Makefile Sun May 24 16:41:39 2015 (r283441)
> @@ -5,7 +5,7 @@
> VDSO= linux_vdso
>
> KMOD= linux64
> -SRCS= linux_fork.c linux_dummy.c linux_file.c \
> +SRCS= linux_fork.c linux_dummy.c linux_file.c linux_event.c \
> linux_futex.c linux_getcwd.c linux_ioctl.c linux_ipc.c \
> linux_machdep.c linux_misc.c linux_signal.c \
> linux_socket.c linux_stats.c linux_sysctl.c linux_sysent.c \
>
>
More information about the svn-src-all
mailing list