git: b11289f87123 - main - kqueuex(2): add KQUEUE_CPONFORK
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sat, 18 Oct 2025 05:14:14 UTC
The branch main has been updated by kib:
URL: https://cgit.FreeBSD.org/src/commit/?id=b11289f87123f8ae06fc70bc70d26a25d4356a65
commit b11289f87123f8ae06fc70bc70d26a25d4356a65
Author: Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2025-08-19 04:34:04 +0000
Commit: Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2025-10-18 05:12:36 +0000
kqueuex(2): add KQUEUE_CPONFORK
The created kqueue is copied on fork, together with the registered
events. This means that a new kqueue is created at the same fd index
as the parent' kqueue, and all registered events are copied into the
new kqueue (when possible). The current active events list is also
duplicated.
Reviewed by: markj
Tested by: pho
Sponsored by: The FreeBSD Foundation
MFC after: 2 weeks
Differential revision: https://reviews.freebsd.org/D52045
---
sys/compat/linux/linux_event.c | 2 +-
sys/kern/kern_event.c | 174 ++++++++++++++++++++++++++++++++++++++---
sys/sys/event.h | 2 +
sys/sys/eventvar.h | 2 +
sys/sys/syscallsubr.h | 3 +-
5 files changed, 168 insertions(+), 15 deletions(-)
diff --git a/sys/compat/linux/linux_event.c b/sys/compat/linux/linux_event.c
index e88791659f1f..fc3ef7c3e90a 100644
--- a/sys/compat/linux/linux_event.c
+++ b/sys/compat/linux/linux_event.c
@@ -104,7 +104,7 @@ static int
epoll_create_common(struct thread *td, int flags)
{
- return (kern_kqueue(td, flags, NULL));
+ return (kern_kqueue(td, flags, false, NULL));
}
#ifdef LINUX_LEGACY_SYSCALLS
diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c
index 80c289f7d802..1f3030995ec6 100644
--- a/sys/kern/kern_event.c
+++ b/sys/kern/kern_event.c
@@ -134,6 +134,7 @@ static fo_kqfilter_t kqueue_kqfilter;
static fo_stat_t kqueue_stat;
static fo_close_t kqueue_close;
static fo_fill_kinfo_t kqueue_fill_kinfo;
+static fo_fork_t kqueue_fork;
static const struct fileops kqueueops = {
.fo_read = invfo_rdwr,
@@ -148,7 +149,9 @@ static const struct fileops kqueueops = {
.fo_chown = invfo_chown,
.fo_sendfile = invfo_sendfile,
.fo_cmp = file_kcmp_generic,
+ .fo_fork = kqueue_fork,
.fo_fill_kinfo = kqueue_fill_kinfo,
+ .fo_flags = DFLAG_FORK,
};
static int knote_attach(struct knote *kn, struct kqueue *kq);
@@ -1151,7 +1154,7 @@ int
sys_kqueue(struct thread *td, struct kqueue_args *uap)
{
- return (kern_kqueue(td, 0, NULL));
+ return (kern_kqueue(td, 0, false, NULL));
}
int
@@ -1159,27 +1162,30 @@ sys_kqueuex(struct thread *td, struct kqueuex_args *uap)
{
int flags;
- if ((uap->flags & ~(KQUEUE_CLOEXEC)) != 0)
+ if ((uap->flags & ~(KQUEUE_CLOEXEC | KQUEUE_CPONFORK)) != 0)
return (EINVAL);
flags = 0;
if ((uap->flags & KQUEUE_CLOEXEC) != 0)
flags |= O_CLOEXEC;
- return (kern_kqueue(td, flags, NULL));
+ return (kern_kqueue(td, flags, (uap->flags & KQUEUE_CPONFORK) != 0,
+ NULL));
}
static void
-kqueue_init(struct kqueue *kq)
+kqueue_init(struct kqueue *kq, bool cponfork)
{
mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF | MTX_DUPOK);
TAILQ_INIT(&kq->kq_head);
knlist_init_mtx(&kq->kq_sel.si_note, &kq->kq_lock);
TASK_INIT(&kq->kq_task, 0, kqueue_task, kq);
+ if (cponfork)
+ kq->kq_state |= KQ_CPONFORK;
}
static int
kern_kqueue_alloc(struct thread *td, struct filedesc *fdp, int *fdip,
- struct file **fpp, int flags, struct filecaps *fcaps,
+ struct file **fpp, int flags, struct filecaps *fcaps, bool cponfork,
struct kqueue **kqp)
{
struct ucred *cred;
@@ -1191,7 +1197,7 @@ kern_kqueue_alloc(struct thread *td, struct filedesc *fdp, int *fdip,
return (ENOMEM);
error = fdip != NULL ? falloc_caps(td, fpp, fdip, flags, fcaps) :
- _falloc_noinstall(td, fpp, 2);
+ _falloc_noinstall(td, fpp, 1);
if (error != 0) {
chgkqcnt(cred->cr_ruidinfo, -1, 0);
return (error);
@@ -1199,31 +1205,33 @@ kern_kqueue_alloc(struct thread *td, struct filedesc *fdp, int *fdip,
/* An extra reference on `fp' has been held for us by falloc(). */
kq = malloc(sizeof(*kq), M_KQUEUE, M_WAITOK | M_ZERO);
- kqueue_init(kq);
+ kqueue_init(kq, cponfork);
kq->kq_fdp = fdp;
kq->kq_cred = crhold(cred);
- FILEDESC_XLOCK(fdp);
+ if (fdip != NULL)
+ FILEDESC_XLOCK(fdp);
TAILQ_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list);
- FILEDESC_XUNLOCK(fdp);
+ if (fdip != NULL)
+ FILEDESC_XUNLOCK(fdp);
+ finit(*fpp, FREAD | FWRITE, DTYPE_KQUEUE, kq, &kqueueops);
*kqp = kq;
return (0);
}
int
-kern_kqueue(struct thread *td, int flags, struct filecaps *fcaps)
+kern_kqueue(struct thread *td, int flags, bool cponfork, struct filecaps *fcaps)
{
struct kqueue *kq;
struct file *fp;
int fd, error;
error = kern_kqueue_alloc(td, td->td_proc->p_fd, &fd, &fp, flags,
- fcaps, &kq);
+ fcaps, cponfork, &kq);
if (error != 0)
return (error);
- finit(fp, FREAD | FWRITE, DTYPE_KQUEUE, kq, &kqueueops);
fdrop(fp, td);
td->td_retval[0] = fd;
@@ -1504,7 +1512,7 @@ kern_kevent_anonymous(struct thread *td, int nevents,
struct kqueue kq = {};
int error;
- kqueue_init(&kq);
+ kqueue_init(&kq, false);
kq.kq_refcnt = 1;
error = kqueue_kevent(&kq, td, nevents, nevents, k_ops, NULL);
kqueue_drain(&kq, td);
@@ -2958,6 +2966,146 @@ noacquire:
return (error);
}
+static int
+kqueue_fork_alloc(struct filedesc *fdp, struct file *fp, struct file **fp1,
+ struct thread *td)
+{
+ struct kqueue *kq, *kq1;
+ int error;
+
+ MPASS(fp->f_type == DTYPE_KQUEUE);
+ kq = fp->f_data;
+ if ((kq->kq_state & KQ_CPONFORK) == 0)
+ return (EOPNOTSUPP);
+ error = kqueue_acquire_ref(kq);
+ if (error != 0)
+ return (error);
+ error = kern_kqueue_alloc(td, fdp, NULL, fp1, 0, NULL, true, &kq1);
+ if (error == 0) {
+ kq1->kq_forksrc = kq;
+ (*fp1)->f_flag = fp->f_flag & (FREAD | FWRITE | FEXEC |
+ O_CLOEXEC | O_CLOFORK);
+ } else {
+ kqueue_release(kq, 0);
+ }
+ return (error);
+}
+
+static void
+kqueue_fork_copy_knote(struct kqueue *kq1, struct knote *kn, struct proc *p1,
+ struct filedesc *fdp)
+{
+ struct knote *kn1;
+ const struct filterops *fop;
+ int error;
+
+ fop = kn->kn_fop;
+ if (fop->f_copy == NULL || (fop->f_isfd &&
+ fdp->fd_files->fdt_ofiles[kn->kn_kevent.ident].fde_file == NULL))
+ return;
+ error = kqueue_expand(kq1, fop, kn->kn_kevent.ident, M_WAITOK);
+ if (error != 0)
+ return;
+
+ kn1 = knote_alloc(M_WAITOK);
+ *kn1 = *kn;
+ kn1->kn_status |= KN_DETACHED;
+ kn1->kn_status &= ~KN_QUEUED;
+ kn1->kn_kq = kq1;
+ error = fop->f_copy(kn1, p1);
+ if (error != 0) {
+ knote_free(kn1);
+ return;
+ }
+ (void)kqueue_fo_find(kn->kn_kevent.filter);
+ if (fop->f_isfd && !fhold(kn1->kn_fp)) {
+ fop->f_detach(kn1);
+ kqueue_fo_release(kn->kn_kevent.filter);
+ knote_free(kn1);
+ return;
+ }
+ if (kn->kn_knlist != NULL)
+ knlist_add(kn->kn_knlist, kn1, 0);
+ KQ_LOCK(kq1);
+ knote_attach(kn1, kq1);
+ kn1->kn_influx = 0;
+ if ((kn->kn_status & KN_QUEUED) != 0)
+ knote_enqueue(kn1);
+ KQ_UNLOCK(kq1);
+}
+
+static void
+kqueue_fork_copy_list(struct klist *knlist, struct knote *marker,
+ struct kqueue *kq, struct kqueue *kq1, struct proc *p1,
+ struct filedesc *fdp)
+{
+ struct knote *kn;
+
+ KQ_OWNED(kq);
+ kn = SLIST_FIRST(knlist);
+ while (kn != NULL) {
+ if ((kn->kn_status & KN_DETACHED) != 0 ||
+ (kn_in_flux(kn) && (kn->kn_status & KN_SCAN) == 0)) {
+ kn = SLIST_NEXT(kn, kn_link);
+ continue;
+ }
+ kn_enter_flux(kn);
+ SLIST_INSERT_AFTER(kn, marker, kn_link);
+ KQ_UNLOCK(kq);
+ kqueue_fork_copy_knote(kq1, kn, p1, fdp);
+ KQ_LOCK(kq);
+ kn_leave_flux(kn);
+ kn = SLIST_NEXT(marker, kn_link);
+ /* XXXKIB switch kn_link to LIST? */
+ SLIST_REMOVE(knlist, marker, knote, kn_link);
+ }
+}
+
+static int
+kqueue_fork_copy(struct filedesc *fdp, struct file *fp, struct file *fp1,
+ struct proc *p1, struct thread *td)
+{
+ struct kqueue *kq, *kq1;
+ struct knote *marker;
+ int error, i;
+
+ error = 0;
+ MPASS(fp == NULL);
+ MPASS(fp1->f_type == DTYPE_KQUEUE);
+
+ kq1 = fp1->f_data;
+ kq = kq1->kq_forksrc;
+ marker = knote_alloc(M_WAITOK);
+ marker->kn_status = KN_MARKER;
+
+ KQ_LOCK(kq);
+ for (i = 0; i < kq->kq_knlistsize; i++) {
+ kqueue_fork_copy_list(&kq->kq_knlist[i], marker, kq, kq1,
+ p1, fdp);
+ }
+ if (kq->kq_knhashmask != 0) {
+ for (i = 0; i <= kq->kq_knhashmask; i++) {
+ kqueue_fork_copy_list(&kq->kq_knhash[i], marker, kq,
+ kq1, p1, fdp);
+ }
+ }
+ kqueue_release(kq, 1);
+ kq1->kq_forksrc = NULL;
+ KQ_UNLOCK(kq);
+
+ knote_free(marker);
+ return (error);
+}
+
+static int
+kqueue_fork(struct filedesc *fdp, struct file *fp, struct file **fp1,
+ struct proc *p1, struct thread *td)
+{
+ if (*fp1 == NULL)
+ return (kqueue_fork_alloc(fdp, fp, fp1, td));
+ return (kqueue_fork_copy(fdp, fp, *fp1, p1, td));
+}
+
struct knote_status_export_bit {
int kn_status_bit;
int knt_status_bit;
diff --git a/sys/sys/event.h b/sys/sys/event.h
index 084eaafcbdc0..6e71445f03b0 100644
--- a/sys/sys/event.h
+++ b/sys/sys/event.h
@@ -228,6 +228,7 @@ struct freebsd11_kevent32 {
/* Flags for kqueuex(2) */
#define KQUEUE_CLOEXEC 0x00000001 /* close on exec */
+#define KQUEUE_CPONFORK 0x00000002 /* copy on fork */
struct knote;
SLIST_HEAD(klist, knote);
@@ -283,6 +284,7 @@ struct filterops {
void (*f_touch)(struct knote *kn, struct kevent *kev, u_long type);
int (*f_userdump)(struct proc *p, struct knote *kn,
struct kinfo_knote *kin);
+ int (*f_copy)(struct knote *kn, struct proc *p1);
};
/*
diff --git a/sys/sys/eventvar.h b/sys/sys/eventvar.h
index 7fec444447f9..7cb3269f1fdf 100644
--- a/sys/sys/eventvar.h
+++ b/sys/sys/eventvar.h
@@ -55,12 +55,14 @@ struct kqueue {
#define KQ_CLOSING 0x10
#define KQ_TASKSCHED 0x20 /* task scheduled */
#define KQ_TASKDRAIN 0x40 /* waiting for task to drain */
+#define KQ_CPONFORK 0x80
int kq_knlistsize; /* size of knlist */
struct klist *kq_knlist; /* list of knotes */
u_long kq_knhashmask; /* size of knhash */
struct klist *kq_knhash; /* hash table for knotes */
struct task kq_task;
struct ucred *kq_cred;
+ struct kqueue *kq_forksrc;
};
#endif /* !_SYS_EVENTVAR_H_ */
diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h
index 8237165b84ce..d32690634059 100644
--- a/sys/sys/syscallsubr.h
+++ b/sys/sys/syscallsubr.h
@@ -211,7 +211,8 @@ int kern_kevent_fp(struct thread *td, struct file *fp, int nchanges,
int nevents, struct kevent_copyops *k_ops,
const struct timespec *timeout);
int kern_kill(struct thread *td, pid_t pid, int signum);
-int kern_kqueue(struct thread *td, int flags, struct filecaps *fcaps);
+int kern_kqueue(struct thread *td, int flags, bool cponfork,
+ struct filecaps *fcaps);
int kern_kldload(struct thread *td, const char *file, int *fileid);
int kern_kldstat(struct thread *td, int fileid, struct kld_file_stat *stat);
int kern_kldunload(struct thread *td, int fileid, int flags);