git: fd140c52160a - stable/13 - epoll: Store epoll_event udata member in ext member of kevent.

Vladimir Kondratyev wulf at FreeBSD.org
Sun Apr 11 21:49:57 UTC 2021


The branch stable/13 has been updated by wulf:

URL: https://cgit.FreeBSD.org/src/commit/?id=fd140c52160a8cc21d665d5bc6fa91d9d6ab8efe

commit fd140c52160a8cc21d665d5bc6fa91d9d6ab8efe
Author:     Vladimir Kondratyev <wulf at FreeBSD.org>
AuthorDate: 2021-02-07 23:46:14 +0000
Commit:     Vladimir Kondratyev <wulf at FreeBSD.org>
CommitDate: 2021-04-11 21:47:39 +0000

    epoll: Store epoll_event udata member in ext member of kevent.
    
    Current epoll implementation stores udata fields of epoll_event
    structure in special dynamically-sized table rather than in udata field
    of backing kevent structure because of 2 reasons:
    1. Kevent's udata size is smaller than epoll's on 32-bit archs.
    2. Kevent's udata can be clobbered on execution EPOLL_CTL_ADD as kqueue
       modifies existing event while epoll returns error in this case.
    
    After r320043 has introduced four new 64bit user data members (ext[]),
    we can store epoll udata in one of them and drop aforementioned table.
    According to kqueue_register() source code ext members are not updated
    when existing kevent is modified that fixes p.2.
    
    As a side effect the patch fixes PR/252582.
    
    Reviewed by:    trasz
    MFC after:      1 month
    Differential revision:  https://reviews.freebsd.org/D28169
    
    (cherry picked from commit b3c6fe663bb90240f8bda6b5ba9c6a761f09f078)
---
 sys/compat/linux/linux_emul.c  | 21 ----------
 sys/compat/linux/linux_emul.h  |  1 -
 sys/compat/linux/linux_event.c | 88 ++++++------------------------------------
 3 files changed, 11 insertions(+), 99 deletions(-)

diff --git a/sys/compat/linux/linux_emul.c b/sys/compat/linux/linux_emul.c
index 1dfbe239ccc4..499bebe8926a 100644
--- a/sys/compat/linux/linux_emul.c
+++ b/sys/compat/linux/linux_emul.c
@@ -143,7 +143,6 @@ linux_proc_init(struct thread *td, struct thread *newtd, int flags)
 {
 	struct linux_emuldata *em;
 	struct linux_pemuldata *pem;
-	struct epoll_emuldata *emd;
 	struct proc *p;
 
 	if (newtd != NULL) {
@@ -185,15 +184,9 @@ linux_proc_init(struct thread *td, struct thread *newtd, int flags)
 		em->child_clear_tid = NULL;
 		em->child_set_tid = NULL;
 
-		 /* epoll should be destroyed in a case of exec. */
 		pem = pem_find(p);
 		KASSERT(pem != NULL, ("proc_exit: proc emuldata not found.\n"));
 		pem->persona = 0;
-		if (pem->epoll != NULL) {
-			emd = pem->epoll;
-			pem->epoll = NULL;
-			free(emd, M_EPOLL);
-		}
 	}
 
 }
@@ -202,7 +195,6 @@ void
 linux_on_exit(struct proc *p)
 {
 	struct linux_pemuldata *pem;
-	struct epoll_emuldata *emd;
 	struct thread *td = curthread;
 
 	MPASS(SV_CURPROC_ABI() == SV_ABI_LINUX);
@@ -217,12 +209,6 @@ linux_on_exit(struct proc *p)
 
 	p->p_emuldata = NULL;
 
-	if (pem->epoll != NULL) {
-		emd = pem->epoll;
-		pem->epoll = NULL;
-		free(emd, M_EPOLL);
-	}
-
 	sx_destroy(&pem->pem_sx);
 	free(pem, M_LINUX);
 }
@@ -267,7 +253,6 @@ int
 linux_common_execve(struct thread *td, struct image_args *eargs)
 {
 	struct linux_pemuldata *pem;
-	struct epoll_emuldata *emd;
 	struct vmspace *oldvmspace;
 	struct linux_emuldata *em;
 	struct proc *p;
@@ -299,12 +284,6 @@ linux_common_execve(struct thread *td, struct image_args *eargs)
 		p->p_emuldata = NULL;
 		PROC_UNLOCK(p);
 
-		if (pem->epoll != NULL) {
-			emd = pem->epoll;
-			pem->epoll = NULL;
-			free(emd, M_EPOLL);
-		}
-
 		free(em, M_TEMP);
 		free(pem, M_LINUX);
 	}
diff --git a/sys/compat/linux/linux_emul.h b/sys/compat/linux/linux_emul.h
index 1bbc69ad98be..de66a7a4c82a 100644
--- a/sys/compat/linux/linux_emul.h
+++ b/sys/compat/linux/linux_emul.h
@@ -68,7 +68,6 @@ int	linux_common_execve(struct thread *, struct image_args *);
 struct linux_pemuldata {
 	uint32_t	flags;		/* process emuldata flags */
 	struct sx	pem_sx;		/* lock for this struct */
-	void		*epoll;		/* epoll data */
 	uint32_t	persona;	/* process execution domain */
 	uint32_t	ptrace_flags;	/* used by ptrace(2) */
 };
diff --git a/sys/compat/linux/linux_event.c b/sys/compat/linux/linux_event.c
index 54f6b083adf3..370bf4d6485f 100644
--- a/sys/compat/linux/linux_event.c
+++ b/sys/compat/linux/linux_event.c
@@ -71,25 +71,8 @@ __FBSDID("$FreeBSD$");
 #include <compat/linux/linux_timer.h>
 #include <compat/linux/linux_util.h>
 
-/*
- * epoll defines 'struct epoll_event' with the field 'data' as 64 bits
- * on all architectures. But on 32 bit architectures BSD 'struct kevent' only
- * has 32 bit opaque pointer as 'udata' field. So we can't pass epoll supplied
- * data verbatuim. Therefore we allocate 64-bit memory block to pass
- * user supplied data for every file descriptor.
- */
-
 typedef uint64_t	epoll_udata_t;
 
-struct epoll_emuldata {
-	uint32_t	fdc;		/* epoll udata max index */
-	epoll_udata_t	udata[1];	/* epoll user data vector */
-};
-
-#define	EPOLL_DEF_SZ		16
-#define	EPOLL_SIZE(fdn)			\
-	(sizeof(struct epoll_emuldata)+(fdn) * sizeof(epoll_udata_t))
-
 struct epoll_event {
 	uint32_t	events;
 	epoll_udata_t	data;
@@ -101,7 +84,6 @@ __attribute__((packed))
 
 #define	LINUX_MAX_EVENTS	(INT_MAX / sizeof(struct epoll_event))
 
-static void	epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata);
 static int	epoll_to_kevent(struct thread *td, int fd,
 		    struct epoll_event *l_event, struct kevent *kevent,
 		    int *nkevents);
@@ -175,47 +157,11 @@ struct timerfd {
 static void	linux_timerfd_expire(void *);
 static void	linux_timerfd_curval(struct timerfd *, struct itimerspec *);
 
-static void
-epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata)
-{
-	struct linux_pemuldata *pem;
-	struct epoll_emuldata *emd;
-	struct proc *p;
-
-	p = td->td_proc;
-
-	pem = pem_find(p);
-	KASSERT(pem != NULL, ("epoll proc emuldata not found.\n"));
-
-	LINUX_PEM_XLOCK(pem);
-	if (pem->epoll == NULL) {
-		emd = malloc(EPOLL_SIZE(fd), M_EPOLL, M_WAITOK);
-		emd->fdc = fd;
-		pem->epoll = emd;
-	} else {
-		emd = pem->epoll;
-		if (fd > emd->fdc) {
-			emd = realloc(emd, EPOLL_SIZE(fd), M_EPOLL, M_WAITOK);
-			emd->fdc = fd;
-			pem->epoll = emd;
-		}
-	}
-	emd->udata[fd] = udata;
-	LINUX_PEM_XUNLOCK(pem);
-}
-
 static int
 epoll_create_common(struct thread *td, int flags)
 {
-	int error;
-
-	error = kern_kqueue(td, flags, NULL);
-	if (error != 0)
-		return (error);
-
-	epoll_fd_install(td, EPOLL_DEF_SZ, 0);
 
-	return (0);
+	return (kern_kqueue(td, flags, NULL));
 }
 
 #ifdef LINUX_LEGACY_SYSCALLS
@@ -271,11 +217,15 @@ epoll_to_kevent(struct thread *td, int fd, struct epoll_event *l_event,
 
 	/* flags related to what event is registered */
 	if ((levents & LINUX_EPOLL_EVRD) != 0) {
-		EV_SET(kevent++, fd, EVFILT_READ, kev_flags, 0, 0, 0);
+		EV_SET(kevent, fd, EVFILT_READ, kev_flags, 0, 0, 0);
+		kevent->ext[0] = l_event->data;
+		++kevent;
 		++(*nkevents);
 	}
 	if ((levents & LINUX_EPOLL_EVWR) != 0) {
-		EV_SET(kevent++, fd, EVFILT_WRITE, kev_flags, 0, 0, 0);
+		EV_SET(kevent, fd, EVFILT_WRITE, kev_flags, 0, 0, 0);
+		kevent->ext[0] = l_event->data;
+		++kevent;
 		++(*nkevents);
 	}
 	/* zero event mask is legal */
@@ -289,7 +239,6 @@ epoll_to_kevent(struct thread *td, int fd, struct epoll_event *l_event,
 
 		pem = pem_find(p);
 		KASSERT(pem != NULL, ("epoll proc emuldata not found.\n"));
-		KASSERT(pem->epoll != NULL, ("epoll proc epolldata not found.\n"));
 
 		LINUX_PEM_XLOCK(pem);
 		if ((pem->flags & LINUX_XUNSUP_EPOLL) == 0) {
@@ -314,6 +263,8 @@ static void
 kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event)
 {
 
+	l_event->data = kevent->ext[0];
+
 	if ((kevent->flags & EV_ERROR) != 0) {
 		l_event->events = LINUX_EPOLLERR;
 		return;
@@ -342,30 +293,15 @@ static int
 epoll_kev_copyout(void *arg, struct kevent *kevp, int count)
 {
 	struct epoll_copyout_args *args;
-	struct linux_pemuldata *pem;
-	struct epoll_emuldata *emd;
 	struct epoll_event *eep;
-	int error, fd, i;
+	int error, i;
 
 	args = (struct epoll_copyout_args*) arg;
 	eep = malloc(sizeof(*eep) * count, M_EPOLL, M_WAITOK | M_ZERO);
 
-	pem = pem_find(args->p);
-	KASSERT(pem != NULL, ("epoll proc emuldata not found.\n"));
-	LINUX_PEM_SLOCK(pem);
-	emd = pem->epoll;
-	KASSERT(emd != NULL, ("epoll proc epolldata not found.\n"));
-
-	for (i = 0; i < count; i++) {
+	for (i = 0; i < count; i++)
 		kevent_to_epoll(&kevp[i], &eep[i]);
 
-		fd = kevp[i].ident;
-		KASSERT(fd <= emd->fdc, ("epoll user data vector"
-						    " is too small.\n"));
-		eep[i].data = emd->udata[fd];
-	}
-	LINUX_PEM_SUNLOCK(pem);
-
 	error = copyout(eep, args->leventlist, count * sizeof(*eep));
 	if (error == 0) {
 		args->leventlist += count;
@@ -473,8 +409,6 @@ linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args)
 		goto leave0;
 	}
 
-	epoll_fd_install(td, args->fd, le.data);
-
 	error = kern_kevent_fp(td, epfp, nchanges, 0, &k_ops, NULL);
 
 leave0:


More information about the dev-commits-src-all mailing list