git: af93fea71038 - main - timerfd: Move implementation from linux compat to sys/kern
- Reply: Konstantin Belousov : "Re: git: af93fea71038 - main - timerfd: Move implementation from linux compat to sys/kern"
- Reply: Dmitry Chagin : "Re: git: af93fea71038 - main - timerfd: Move implementation from linux compat to sys/kern"
- Reply: Alexey Dokuchaev : "Re: git: af93fea71038 - main - timerfd: Move implementation from linux compat to sys/kern"
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Thu, 24 Aug 2023 20:29:48 UTC
The branch main has been updated by imp:
URL: https://cgit.FreeBSD.org/src/commit/?id=af93fea710385b2b11f0cabd377e7ed6f3d97c34
commit af93fea710385b2b11f0cabd377e7ed6f3d97c34
Author: Jake Freeland <jfree@freebsd.org>
AuthorDate: 2023-08-24 04:39:54 +0000
Commit: Warner Losh <imp@FreeBSD.org>
CommitDate: 2023-08-24 20:28:56 +0000
timerfd: Move implementation from linux compat to sys/kern
Move the timerfd impelemntation from linux compat code to sys/kern. Use
it to implement the new system calls for timerfd. Add a hook to kern_tc
to allow timerfd to know when the system time has stepped. Add kqueue
support to timerfd. Adjust a few names to be less Linux centric.
RelNotes: YES
Reviewed by: markj (on irc), imp, kib (with reservations), jhb (slack)
Differential Revision: https://reviews.freebsd.org/D38459
---
lib/libc/sys/Symbol.map | 3 +
sys/bsm/audit_kevents.h | 1 +
sys/compat/freebsd32/freebsd32_proto.h | 14 +
sys/compat/freebsd32/freebsd32_syscall.h | 5 +-
sys/compat/freebsd32/freebsd32_syscalls.c | 3 +
sys/compat/freebsd32/freebsd32_sysent.c | 3 +
sys/compat/freebsd32/freebsd32_systrace_args.c | 86 ++++
sys/compat/linux/linux_event.c | 443 ++---------------
sys/compat/linux/linux_event.h | 11 -
sys/conf/files | 1 +
sys/kern/init_sysent.c | 3 +
sys/kern/kern_descrip.c | 4 +-
sys/kern/kern_tc.c | 2 +
sys/kern/sys_timerfd.c | 632 +++++++++++++++++++++++++
sys/kern/syscalls.c | 3 +
sys/kern/syscalls.master | 20 +
sys/kern/systrace_args.c | 86 ++++
sys/sys/file.h | 2 +-
sys/sys/syscall.h | 5 +-
sys/sys/syscall.mk | 5 +-
sys/sys/sysproto.h | 20 +
sys/sys/timerfd.h | 66 +++
sys/sys/user.h | 6 +
23 files changed, 999 insertions(+), 425 deletions(-)
diff --git a/lib/libc/sys/Symbol.map b/lib/libc/sys/Symbol.map
index 9a07bb457eb8..7937661e3787 100644
--- a/lib/libc/sys/Symbol.map
+++ b/lib/libc/sys/Symbol.map
@@ -421,6 +421,9 @@ FBSD_1.7 {
kqueuex;
membarrier;
swapoff;
+ timerfd_create;
+ timerfd_gettime;
+ timerfd_settime;
};
FBSDprivate_1.0 {
diff --git a/sys/bsm/audit_kevents.h b/sys/bsm/audit_kevents.h
index a6b50a67ee6a..d06381837aad 100644
--- a/sys/bsm/audit_kevents.h
+++ b/sys/bsm/audit_kevents.h
@@ -661,6 +661,7 @@
#define AUE_AIO_WRITEV 43267 /* FreeBSD-specific. */
#define AUE_AIO_READV 43268 /* FreeBSD-specific. */
#define AUE_FSPACECTL 43269 /* FreeBSD-specific. */
+#define AUE_TIMERFD 43270 /* FreeBSD/Linux. */
/*
* Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the
diff --git a/sys/compat/freebsd32/freebsd32_proto.h b/sys/compat/freebsd32/freebsd32_proto.h
index bb333e0321a0..50448b6dce16 100644
--- a/sys/compat/freebsd32/freebsd32_proto.h
+++ b/sys/compat/freebsd32/freebsd32_proto.h
@@ -684,6 +684,16 @@ struct freebsd32_aio_writev_args {
struct freebsd32_aio_readv_args {
char aiocbp_l_[PADL_(struct aiocb32 *)]; struct aiocb32 * aiocbp; char aiocbp_r_[PADR_(struct aiocb32 *)];
};
+struct freebsd32_timerfd_gettime_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+ char curr_value_l_[PADL_(struct itimerspec32 *)]; struct itimerspec32 * curr_value; char curr_value_r_[PADR_(struct itimerspec32 *)];
+};
+struct freebsd32_timerfd_settime_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+ char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
+ char new_value_l_[PADL_(const struct itimerspec32 *)]; const struct itimerspec32 * new_value; char new_value_r_[PADR_(const struct itimerspec32 *)];
+ char old_value_l_[PADL_(struct itimerspec32 *)]; struct itimerspec32 * old_value; char old_value_r_[PADR_(struct itimerspec32 *)];
+};
int freebsd32_wait4(struct thread *, struct freebsd32_wait4_args *);
int freebsd32_ptrace(struct thread *, struct freebsd32_ptrace_args *);
int freebsd32_recvmsg(struct thread *, struct freebsd32_recvmsg_args *);
@@ -799,6 +809,8 @@ int freebsd32_cpuset_setdomain(struct thread *, struct freebsd32_cpuset_setdomai
int freebsd32___sysctlbyname(struct thread *, struct freebsd32___sysctlbyname_args *);
int freebsd32_aio_writev(struct thread *, struct freebsd32_aio_writev_args *);
int freebsd32_aio_readv(struct thread *, struct freebsd32_aio_readv_args *);
+int freebsd32_timerfd_gettime(struct thread *, struct freebsd32_timerfd_gettime_args *);
+int freebsd32_timerfd_settime(struct thread *, struct freebsd32_timerfd_settime_args *);
#ifdef COMPAT_43
@@ -1292,6 +1304,8 @@ int freebsd11_freebsd32_fstatat(struct thread *, struct freebsd11_freebsd32_fsta
#define FREEBSD32_SYS_AUE_freebsd32___sysctlbyname AUE_SYSCTL
#define FREEBSD32_SYS_AUE_freebsd32_aio_writev AUE_AIO_WRITEV
#define FREEBSD32_SYS_AUE_freebsd32_aio_readv AUE_AIO_READV
+#define FREEBSD32_SYS_AUE_freebsd32_timerfd_gettime AUE_TIMERFD
+#define FREEBSD32_SYS_AUE_freebsd32_timerfd_settime AUE_TIMERFD
#undef PAD_
#undef PADL_
diff --git a/sys/compat/freebsd32/freebsd32_syscall.h b/sys/compat/freebsd32/freebsd32_syscall.h
index c3d8617abf4b..e3777730be1c 100644
--- a/sys/compat/freebsd32/freebsd32_syscall.h
+++ b/sys/compat/freebsd32/freebsd32_syscall.h
@@ -502,4 +502,7 @@
#define FREEBSD32_SYS_swapoff 582
#define FREEBSD32_SYS_kqueuex 583
#define FREEBSD32_SYS_membarrier 584
-#define FREEBSD32_SYS_MAXSYSCALL 585
+#define FREEBSD32_SYS_timerfd_create 585
+#define FREEBSD32_SYS_freebsd32_timerfd_gettime 586
+#define FREEBSD32_SYS_freebsd32_timerfd_settime 587
+#define FREEBSD32_SYS_MAXSYSCALL 588
diff --git a/sys/compat/freebsd32/freebsd32_syscalls.c b/sys/compat/freebsd32/freebsd32_syscalls.c
index 19d454743c55..ccc910ee5ca9 100644
--- a/sys/compat/freebsd32/freebsd32_syscalls.c
+++ b/sys/compat/freebsd32/freebsd32_syscalls.c
@@ -590,4 +590,7 @@ const char *freebsd32_syscallnames[] = {
"swapoff", /* 582 = swapoff */
"kqueuex", /* 583 = kqueuex */
"membarrier", /* 584 = membarrier */
+ "timerfd_create", /* 585 = timerfd_create */
+ "freebsd32_timerfd_gettime", /* 586 = freebsd32_timerfd_gettime */
+ "freebsd32_timerfd_settime", /* 587 = freebsd32_timerfd_settime */
};
diff --git a/sys/compat/freebsd32/freebsd32_sysent.c b/sys/compat/freebsd32/freebsd32_sysent.c
index 971f06a643c5..fec6f4a47bd6 100644
--- a/sys/compat/freebsd32/freebsd32_sysent.c
+++ b/sys/compat/freebsd32/freebsd32_sysent.c
@@ -646,4 +646,7 @@ struct sysent freebsd32_sysent[] = {
{ .sy_narg = AS(swapoff_args), .sy_call = (sy_call_t *)sys_swapoff, .sy_auevent = AUE_SWAPOFF, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 582 = swapoff */
{ .sy_narg = AS(kqueuex_args), .sy_call = (sy_call_t *)sys_kqueuex, .sy_auevent = AUE_KQUEUE, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 583 = kqueuex */
{ .sy_narg = AS(membarrier_args), .sy_call = (sy_call_t *)sys_membarrier, .sy_auevent = AUE_NULL, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 584 = membarrier */
+ { .sy_narg = AS(timerfd_create_args), .sy_call = (sy_call_t *)sys_timerfd_create, .sy_auevent = AUE_TIMERFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 585 = timerfd_create */
+ { .sy_narg = AS(freebsd32_timerfd_gettime_args), .sy_call = (sy_call_t *)freebsd32_timerfd_gettime, .sy_auevent = AUE_TIMERFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 586 = freebsd32_timerfd_gettime */
+ { .sy_narg = AS(freebsd32_timerfd_settime_args), .sy_call = (sy_call_t *)freebsd32_timerfd_settime, .sy_auevent = AUE_TIMERFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 587 = freebsd32_timerfd_settime */
};
diff --git a/sys/compat/freebsd32/freebsd32_systrace_args.c b/sys/compat/freebsd32/freebsd32_systrace_args.c
index 5dfc82c30b7b..2c26a0ddab2f 100644
--- a/sys/compat/freebsd32/freebsd32_systrace_args.c
+++ b/sys/compat/freebsd32/freebsd32_systrace_args.c
@@ -3336,6 +3336,32 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
*n_args = 3;
break;
}
+ /* timerfd_create */
+ case 585: {
+ struct timerfd_create_args *p = params;
+ iarg[a++] = p->clockid; /* int */
+ iarg[a++] = p->flags; /* int */
+ *n_args = 2;
+ break;
+ }
+ /* freebsd32_timerfd_gettime */
+ case 586: {
+ struct freebsd32_timerfd_gettime_args *p = params;
+ iarg[a++] = p->fd; /* int */
+ uarg[a++] = (intptr_t)p->curr_value; /* struct itimerspec32 * */
+ *n_args = 2;
+ break;
+ }
+ /* freebsd32_timerfd_settime */
+ case 587: {
+ struct freebsd32_timerfd_settime_args *p = params;
+ iarg[a++] = p->fd; /* int */
+ iarg[a++] = p->flags; /* int */
+ uarg[a++] = (intptr_t)p->new_value; /* const struct itimerspec32 * */
+ uarg[a++] = (intptr_t)p->old_value; /* struct itimerspec32 * */
+ *n_args = 4;
+ break;
+ }
default:
*n_args = 0;
break;
@@ -9005,6 +9031,51 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
break;
};
break;
+ /* timerfd_create */
+ case 585:
+ switch (ndx) {
+ case 0:
+ p = "int";
+ break;
+ case 1:
+ p = "int";
+ break;
+ default:
+ break;
+ };
+ break;
+ /* freebsd32_timerfd_gettime */
+ case 586:
+ switch (ndx) {
+ case 0:
+ p = "int";
+ break;
+ case 1:
+ p = "userland struct itimerspec32 *";
+ break;
+ default:
+ break;
+ };
+ break;
+ /* freebsd32_timerfd_settime */
+ case 587:
+ switch (ndx) {
+ case 0:
+ p = "int";
+ break;
+ case 1:
+ p = "int";
+ break;
+ case 2:
+ p = "userland const struct itimerspec32 *";
+ break;
+ case 3:
+ p = "userland struct itimerspec32 *";
+ break;
+ default:
+ break;
+ };
+ break;
default:
break;
};
@@ -10873,6 +10944,21 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
if (ndx == 0 || ndx == 1)
p = "int";
break;
+ /* timerfd_create */
+ case 585:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
+ /* freebsd32_timerfd_gettime */
+ case 586:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
+ /* freebsd32_timerfd_settime */
+ case 587:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
default:
break;
};
diff --git a/sys/compat/linux/linux_event.c b/sys/compat/linux/linux_event.c
index a7db8516e5f0..816c68a90f1d 100644
--- a/sys/compat/linux/linux_event.c
+++ b/sys/compat/linux/linux_event.c
@@ -44,6 +44,7 @@
#include <sys/specialfd.h>
#include <sys/sx.h>
#include <sys/syscallsubr.h>
+#include <sys/timerfd.h>
#include <sys/timespec.h>
#include <sys/user.h>
@@ -99,55 +100,6 @@ struct epoll_copyout_args {
int error;
};
-/* timerfd */
-typedef uint64_t timerfd_t;
-
-static fo_rdwr_t timerfd_read;
-static fo_ioctl_t timerfd_ioctl;
-static fo_poll_t timerfd_poll;
-static fo_kqfilter_t timerfd_kqfilter;
-static fo_stat_t timerfd_stat;
-static fo_close_t timerfd_close;
-static fo_fill_kinfo_t timerfd_fill_kinfo;
-
-static struct fileops timerfdops = {
- .fo_read = timerfd_read,
- .fo_write = invfo_rdwr,
- .fo_truncate = invfo_truncate,
- .fo_ioctl = timerfd_ioctl,
- .fo_poll = timerfd_poll,
- .fo_kqfilter = timerfd_kqfilter,
- .fo_stat = timerfd_stat,
- .fo_close = timerfd_close,
- .fo_chmod = invfo_chmod,
- .fo_chown = invfo_chown,
- .fo_sendfile = invfo_sendfile,
- .fo_fill_kinfo = timerfd_fill_kinfo,
- .fo_flags = DFLAG_PASSABLE
-};
-
-static void filt_timerfddetach(struct knote *kn);
-static int filt_timerfdread(struct knote *kn, long hint);
-
-static struct filterops timerfd_rfiltops = {
- .f_isfd = 1,
- .f_detach = filt_timerfddetach,
- .f_event = filt_timerfdread
-};
-
-struct timerfd {
- clockid_t tfd_clockid;
- struct itimerspec tfd_time;
- struct callout tfd_callout;
- timerfd_t tfd_count;
- bool tfd_canceled;
- struct selinfo tfd_sel;
- struct mtx tfd_lock;
-};
-
-static void linux_timerfd_expire(void *);
-static void linux_timerfd_curval(struct timerfd *, struct itimerspec *);
-
static int
epoll_create_common(struct thread *td, int flags)
{
@@ -658,255 +610,14 @@ linux_eventfd2(struct thread *td, struct linux_eventfd2_args *args)
int
linux_timerfd_create(struct thread *td, struct linux_timerfd_create_args *args)
{
- struct timerfd *tfd;
- struct file *fp;
clockid_t clockid;
- int fflags, fd, error;
-
- if ((args->flags & ~LINUX_TFD_CREATE_FLAGS) != 0)
- return (EINVAL);
-
- error = linux_to_native_clockid(&clockid, args->clockid);
- if (error != 0)
- return (error);
- if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC)
- return (EINVAL);
-
- fflags = 0;
- if ((args->flags & LINUX_TFD_CLOEXEC) != 0)
- fflags |= O_CLOEXEC;
-
- error = falloc(td, &fp, &fd, fflags);
- if (error != 0)
- return (error);
-
- tfd = malloc(sizeof(*tfd), M_EPOLL, M_WAITOK | M_ZERO);
- tfd->tfd_clockid = clockid;
- mtx_init(&tfd->tfd_lock, "timerfd", NULL, MTX_DEF);
-
- callout_init_mtx(&tfd->tfd_callout, &tfd->tfd_lock, 0);
- knlist_init_mtx(&tfd->tfd_sel.si_note, &tfd->tfd_lock);
-
- fflags = FREAD;
- if ((args->flags & LINUX_O_NONBLOCK) != 0)
- fflags |= FNONBLOCK;
-
- finit(fp, fflags, DTYPE_LINUXTFD, tfd, &timerfdops);
- fdrop(fp, td);
-
- td->td_retval[0] = fd;
- return (error);
-}
-
-static int
-timerfd_close(struct file *fp, struct thread *td)
-{
- struct timerfd *tfd;
-
- tfd = fp->f_data;
- if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL)
- return (EINVAL);
-
- timespecclear(&tfd->tfd_time.it_value);
- timespecclear(&tfd->tfd_time.it_interval);
-
- callout_drain(&tfd->tfd_callout);
-
- seldrain(&tfd->tfd_sel);
- knlist_destroy(&tfd->tfd_sel.si_note);
-
- fp->f_ops = &badfileops;
- mtx_destroy(&tfd->tfd_lock);
- free(tfd, M_EPOLL);
-
- return (0);
-}
-
-static int
-timerfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
- int flags, struct thread *td)
-{
- struct timerfd *tfd;
- timerfd_t count;
- int error;
-
- tfd = fp->f_data;
- if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL)
- return (EINVAL);
-
- if (uio->uio_resid < sizeof(timerfd_t))
- return (EINVAL);
-
- error = 0;
- mtx_lock(&tfd->tfd_lock);
-retry:
- if (tfd->tfd_canceled) {
- tfd->tfd_count = 0;
- mtx_unlock(&tfd->tfd_lock);
- return (ECANCELED);
- }
- if (tfd->tfd_count == 0) {
- if ((fp->f_flag & FNONBLOCK) != 0) {
- mtx_unlock(&tfd->tfd_lock);
- return (EAGAIN);
- }
- error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock, PCATCH, "ltfdrd", 0);
- if (error == 0)
- goto retry;
- }
- if (error == 0) {
- count = tfd->tfd_count;
- tfd->tfd_count = 0;
- mtx_unlock(&tfd->tfd_lock);
- error = uiomove(&count, sizeof(timerfd_t), uio);
- } else
- mtx_unlock(&tfd->tfd_lock);
-
- return (error);
-}
-
-static int
-timerfd_poll(struct file *fp, int events, struct ucred *active_cred,
- struct thread *td)
-{
- struct timerfd *tfd;
- int revents = 0;
-
- tfd = fp->f_data;
- if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL)
- return (POLLERR);
-
- mtx_lock(&tfd->tfd_lock);
- if ((events & (POLLIN|POLLRDNORM)) && tfd->tfd_count > 0)
- revents |= events & (POLLIN|POLLRDNORM);
- if (revents == 0)
- selrecord(td, &tfd->tfd_sel);
- mtx_unlock(&tfd->tfd_lock);
-
- return (revents);
-}
-
-static int
-timerfd_kqfilter(struct file *fp, struct knote *kn)
-{
- struct timerfd *tfd;
-
- tfd = fp->f_data;
- if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL)
- return (EINVAL);
-
- if (kn->kn_filter == EVFILT_READ)
- kn->kn_fop = &timerfd_rfiltops;
- else
- return (EINVAL);
-
- kn->kn_hook = tfd;
- knlist_add(&tfd->tfd_sel.si_note, kn, 0);
-
- return (0);
-}
-
-static void
-filt_timerfddetach(struct knote *kn)
-{
- struct timerfd *tfd = kn->kn_hook;
-
- mtx_lock(&tfd->tfd_lock);
- knlist_remove(&tfd->tfd_sel.si_note, kn, 1);
- mtx_unlock(&tfd->tfd_lock);
-}
-
-static int
-filt_timerfdread(struct knote *kn, long hint)
-{
- struct timerfd *tfd = kn->kn_hook;
-
- return (tfd->tfd_count > 0);
-}
-
-static int
-timerfd_ioctl(struct file *fp, u_long cmd, void *data,
- struct ucred *active_cred, struct thread *td)
-{
-
- if (fp->f_data == NULL || fp->f_type != DTYPE_LINUXTFD)
- return (EINVAL);
-
- switch (cmd) {
- case FIONBIO:
- case FIOASYNC:
- return (0);
- }
-
- return (ENOTTY);
-}
-
-static int
-timerfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred)
-{
-
- return (ENXIO);
-}
-
-static int
-timerfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
-{
-
- kif->kf_type = KF_TYPE_UNKNOWN;
- return (0);
-}
-
-static void
-linux_timerfd_clocktime(struct timerfd *tfd, struct timespec *ts)
-{
-
- if (tfd->tfd_clockid == CLOCK_REALTIME)
- getnanotime(ts);
- else /* CLOCK_MONOTONIC */
- getnanouptime(ts);
-}
-
-static void
-linux_timerfd_curval(struct timerfd *tfd, struct itimerspec *ots)
-{
- struct timespec cts;
-
- linux_timerfd_clocktime(tfd, &cts);
- *ots = tfd->tfd_time;
- if (ots->it_value.tv_sec != 0 || ots->it_value.tv_nsec != 0) {
- timespecsub(&ots->it_value, &cts, &ots->it_value);
- if (ots->it_value.tv_sec < 0 ||
- (ots->it_value.tv_sec == 0 &&
- ots->it_value.tv_nsec == 0)) {
- ots->it_value.tv_sec = 0;
- ots->it_value.tv_nsec = 1;
- }
- }
-}
-
-static int
-linux_timerfd_gettime_common(struct thread *td, int fd, struct itimerspec *ots)
-{
- struct timerfd *tfd;
- struct file *fp;
int error;
- error = fget(td, fd, &cap_read_rights, &fp);
+ error = linux_to_native_clockid(&clockid, args->clockid);
if (error != 0)
return (error);
- tfd = fp->f_data;
- if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) {
- error = EINVAL;
- goto out;
- }
-
- mtx_lock(&tfd->tfd_lock);
- linux_timerfd_curval(tfd, ots);
- mtx_unlock(&tfd->tfd_lock);
-out:
- fdrop(fp, td);
- return (error);
+ return (kern_timerfd_create(td, clockid, args->flags));
}
int
@@ -916,84 +627,14 @@ linux_timerfd_gettime(struct thread *td, struct linux_timerfd_gettime_args *args
struct itimerspec ots;
int error;
- error = linux_timerfd_gettime_common(td, args->fd, &ots);
+ error = kern_timerfd_gettime(td, args->fd, &ots);
if (error != 0)
return (error);
- error = native_to_linux_itimerspec(&lots, &ots);
- if (error == 0)
- error = copyout(&lots, args->old_value, sizeof(lots));
- return (error);
-}
-
-#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
-int
-linux_timerfd_gettime64(struct thread *td, struct linux_timerfd_gettime64_args *args)
-{
- struct l_itimerspec64 lots;
- struct itimerspec ots;
- int error;
- error = linux_timerfd_gettime_common(td, args->fd, &ots);
- if (error != 0)
- return (error);
- error = native_to_linux_itimerspec64(&lots, &ots);
+ error = native_to_linux_itimerspec(&lots, &ots);
if (error == 0)
error = copyout(&lots, args->old_value, sizeof(lots));
- return (error);
-}
-#endif
-
-static int
-linux_timerfd_settime_common(struct thread *td, int fd, int flags,
- struct itimerspec *nts, struct itimerspec *oval)
-{
- struct timespec cts, ts;
- struct timerfd *tfd;
- struct timeval tv;
- struct file *fp;
- int error;
-
- if ((flags & ~LINUX_TFD_SETTIME_FLAGS) != 0)
- return (EINVAL);
-
- error = fget(td, fd, &cap_write_rights, &fp);
- if (error != 0)
- return (error);
- tfd = fp->f_data;
- if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) {
- error = EINVAL;
- goto out;
- }
-
- mtx_lock(&tfd->tfd_lock);
- if (!timespecisset(&nts->it_value))
- timespecclear(&nts->it_interval);
- if (oval != NULL)
- linux_timerfd_curval(tfd, oval);
-
- bcopy(nts, &tfd->tfd_time, sizeof(*nts));
- tfd->tfd_count = 0;
- if (timespecisset(&nts->it_value)) {
- linux_timerfd_clocktime(tfd, &cts);
- ts = nts->it_value;
- if ((flags & LINUX_TFD_TIMER_ABSTIME) == 0) {
- timespecadd(&tfd->tfd_time.it_value, &cts,
- &tfd->tfd_time.it_value);
- } else {
- timespecsub(&ts, &cts, &ts);
- }
- TIMESPEC_TO_TIMEVAL(&tv, &ts);
- callout_reset(&tfd->tfd_callout, tvtohz(&tv),
- linux_timerfd_expire, tfd);
- tfd->tfd_canceled = false;
- } else {
- tfd->tfd_canceled = true;
- callout_stop(&tfd->tfd_callout);
- }
- mtx_unlock(&tfd->tfd_lock);
-out:
- fdrop(fp, td);
return (error);
}
@@ -1001,7 +642,7 @@ int
linux_timerfd_settime(struct thread *td, struct linux_timerfd_settime_args *args)
{
struct l_itimerspec lots;
- struct itimerspec nts, ots, *pots;
+ struct itimerspec nts, ots;
int error;
error = copyin(args->new_value, &lots, sizeof(lots));
@@ -1010,23 +651,43 @@ linux_timerfd_settime(struct thread *td, struct linux_timerfd_settime_args *args
error = linux_to_native_itimerspec(&nts, &lots);
if (error != 0)
return (error);
- pots = (args->old_value != NULL ? &ots : NULL);
- error = linux_timerfd_settime_common(td, args->fd, args->flags,
- &nts, pots);
+ if (args->old_value == NULL)
+ error = kern_timerfd_settime(td, args->fd, args->flags, &nts, NULL);
+ else
+ error = kern_timerfd_settime(td, args->fd, args->flags, &nts, &ots);
if (error == 0 && args->old_value != NULL) {
error = native_to_linux_itimerspec(&lots, &ots);
if (error == 0)
error = copyout(&lots, args->old_value, sizeof(lots));
}
+
return (error);
}
#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
+int
+linux_timerfd_gettime64(struct thread *td, struct linux_timerfd_gettime64_args *args)
+{
+ struct l_itimerspec64 lots;
+ struct itimerspec ots;
+ int error;
+
+ error = kern_timerfd_gettime(td, args->fd, &ots);
+ if (error != 0)
+ return (error);
+
+ error = native_to_linux_itimerspec64(&lots, &ots);
+ if (error == 0)
+ error = copyout(&lots, args->old_value, sizeof(lots));
+
+ return (error);
+}
+
int
linux_timerfd_settime64(struct thread *td, struct linux_timerfd_settime64_args *args)
{
struct l_itimerspec64 lots;
- struct itimerspec nts, ots, *pots;
+ struct itimerspec nts, ots;
int error;
error = copyin(args->new_value, &lots, sizeof(lots));
@@ -1035,50 +696,16 @@ linux_timerfd_settime64(struct thread *td, struct linux_timerfd_settime64_args *
error = linux_to_native_itimerspec64(&nts, &lots);
if (error != 0)
return (error);
- pots = (args->old_value != NULL ? &ots : NULL);
- error = linux_timerfd_settime_common(td, args->fd, args->flags,
- &nts, pots);
+ if (args->old_value == NULL)
+ error = kern_timerfd_settime(td, args->fd, args->flags, &nts, NULL);
+ else
+ error = kern_timerfd_settime(td, args->fd, args->flags, &nts, &ots);
if (error == 0 && args->old_value != NULL) {
error = native_to_linux_itimerspec64(&lots, &ots);
if (error == 0)
error = copyout(&lots, args->old_value, sizeof(lots));
}
+
return (error);
}
#endif
-
-static void
-linux_timerfd_expire(void *arg)
-{
- struct timespec cts, ts;
- struct timeval tv;
- struct timerfd *tfd;
-
- tfd = (struct timerfd *)arg;
-
- linux_timerfd_clocktime(tfd, &cts);
- if (timespeccmp(&cts, &tfd->tfd_time.it_value, >=)) {
- if (timespecisset(&tfd->tfd_time.it_interval))
- timespecadd(&tfd->tfd_time.it_value,
- &tfd->tfd_time.it_interval,
- &tfd->tfd_time.it_value);
- else
- /* single shot timer */
- timespecclear(&tfd->tfd_time.it_value);
- if (timespecisset(&tfd->tfd_time.it_value)) {
- timespecsub(&tfd->tfd_time.it_value, &cts, &ts);
- TIMESPEC_TO_TIMEVAL(&tv, &ts);
- callout_reset(&tfd->tfd_callout, tvtohz(&tv),
- linux_timerfd_expire, tfd);
- }
- tfd->tfd_count++;
- KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0);
- selwakeup(&tfd->tfd_sel);
- wakeup(&tfd->tfd_count);
- } else if (timespecisset(&tfd->tfd_time.it_value)) {
- timespecsub(&tfd->tfd_time.it_value, &cts, &ts);
- TIMESPEC_TO_TIMEVAL(&tv, &ts);
- callout_reset(&tfd->tfd_callout, tvtohz(&tv),
- linux_timerfd_expire, tfd);
- }
-}
diff --git a/sys/compat/linux/linux_event.h b/sys/compat/linux/linux_event.h
index 32269b0070bc..fa63371b5170 100644
--- a/sys/compat/linux/linux_event.h
+++ b/sys/compat/linux/linux_event.h
@@ -54,15 +54,4 @@
#define LINUX_EFD_SEMAPHORE (1 << 0)
-#define LINUX_TFD_TIMER_ABSTIME (1 << 0)
-#define LINUX_TFD_TIMER_CANCEL_ON_SET (1 << 1)
-#define LINUX_TFD_CLOEXEC LINUX_O_CLOEXEC
-#define LINUX_TFD_NONBLOCK LINUX_O_NONBLOCK
-
-#define LINUX_TFD_SHARED_FCNTL_FLAGS (LINUX_TFD_CLOEXEC \
- |LINUX_TFD_NONBLOCK)
-#define LINUX_TFD_CREATE_FLAGS LINUX_TFD_SHARED_FCNTL_FLAGS
-#define LINUX_TFD_SETTIME_FLAGS (LINUX_TFD_TIMER_ABSTIME \
- |LINUX_TFD_TIMER_CANCEL_ON_SET)
-
#endif /* !_LINUX_EVENT_H_ */
diff --git a/sys/conf/files b/sys/conf/files
index 3f79ce752c80..8d38b9cc8a2e 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3908,6 +3908,7 @@ kern/sys_pipe.c standard
kern/sys_procdesc.c standard
kern/sys_process.c standard
kern/sys_socket.c standard
+kern/sys_timerfd.c standard
kern/syscalls.c standard
kern/sysv_ipc.c standard
kern/sysv_msg.c optional sysvmsg
diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c
index 1e62c46b8be0..d44fec54fcd7 100644
--- a/sys/kern/init_sysent.c
+++ b/sys/kern/init_sysent.c
@@ -645,4 +645,7 @@ struct sysent sysent[] = {
{ .sy_narg = AS(swapoff_args), .sy_call = (sy_call_t *)sys_swapoff, .sy_auevent = AUE_SWAPOFF, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 582 = swapoff */
{ .sy_narg = AS(kqueuex_args), .sy_call = (sy_call_t *)sys_kqueuex, .sy_auevent = AUE_KQUEUE, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 583 = kqueuex */
{ .sy_narg = AS(membarrier_args), .sy_call = (sy_call_t *)sys_membarrier, .sy_auevent = AUE_NULL, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 584 = membarrier */
+ { .sy_narg = AS(timerfd_create_args), .sy_call = (sy_call_t *)sys_timerfd_create, .sy_auevent = AUE_TIMERFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 585 = timerfd_create */
+ { .sy_narg = AS(timerfd_gettime_args), .sy_call = (sy_call_t *)sys_timerfd_gettime, .sy_auevent = AUE_TIMERFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 586 = timerfd_gettime */
+ { .sy_narg = AS(timerfd_settime_args), .sy_call = (sy_call_t *)sys_timerfd_settime, .sy_auevent = AUE_TIMERFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 587 = timerfd_settime */
};
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index c5226288afc5..35046c856d54 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -5001,8 +5001,8 @@ file_type_to_name(short type)
return ("proc");
case DTYPE_EVENTFD:
return ("eventfd");
- case DTYPE_LINUXTFD:
- return ("ltimer");
+ case DTYPE_TIMERFD:
+ return ("timerfd");
default:
return ("unkn");
}
diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c
index 170f35830923..26f09cb60260 100644
--- a/sys/kern/kern_tc.c
+++ b/sys/kern/kern_tc.c
@@ -34,6 +34,7 @@
#include <sys/systm.h>
#include <sys/timeffc.h>
#include <sys/timepps.h>
+#include <sys/timerfd.h>
#include <sys/timetc.h>
#include <sys/timex.h>
#include <sys/vdso.h>
@@ -1305,6 +1306,7 @@ tc_setclock(struct timespec *ts)
/* Avoid rtc_generation == 0, since td_rtcgen == 0 is special. */
atomic_add_rel_int(&rtc_generation, 2);
+ timerfd_jumped();
sleepq_chains_remove_matching(sleeping_on_old_rtc);
if (timestepwarnings) {
nanotime(&taft);
diff --git a/sys/kern/sys_timerfd.c b/sys/kern/sys_timerfd.c
new file mode 100644
index 000000000000..6948fa059b8c
--- /dev/null
+++ b/sys/kern/sys_timerfd.c
@@ -0,0 +1,632 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org>
+ * Copyright (c) 2023 Jake Freeland <jfree@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/callout.h>
+#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/filio.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/poll.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/selinfo.h>
+#include <sys/stat.h>
+#include <sys/sysctl.h>
+#include <sys/sysent.h>
+#include <sys/sysproto.h>
+#include <sys/timerfd.h>
+#include <sys/timespec.h>
+#include <sys/uio.h>
+#include <sys/user.h>
+
+#include <security/audit/audit.h>
+
+#ifdef COMPAT_FREEBSD32
+#include <compat/freebsd32/freebsd32.h>
+#include <compat/freebsd32/freebsd32_proto.h>
+#endif
+
+static MALLOC_DEFINE(M_TIMERFD, "timerfd", "timerfd structures");
+static LIST_HEAD(, timerfd) timerfd_head;
+static struct unrhdr64 tfdino_unr;
+
+#define TFD_NOJUMP 0 /* Realtime clock has not jumped. */
+#define TFD_READ 1 /* Jumped, tfd has been read since. */
+#define TFD_ZREAD 2 /* Jumped backwards, CANCEL_ON_SET=false. */
+#define TFD_CANCELED 4 /* Jumped, CANCEL_ON_SET=true. */
+#define TFD_JUMPED (TFD_ZREAD | TFD_CANCELED)
+
+struct timerfd {
+ /* User specified. */
+ struct itimerspec tfd_time; /* tfd timer */
+ clockid_t tfd_clockid; /* timing base */
+ int tfd_flags; /* creation flags */
+ int tfd_timflags; /* timer flags */
+
+ /* Used internally. */
+ timerfd_t tfd_count; /* expiration count since last read */
+ bool tfd_expired; /* true upon initial expiration */
+ struct mtx tfd_lock; /* mtx lock */
+ struct callout tfd_callout; /* expiration notification */
+ struct selinfo tfd_sel; /* I/O alerts */
+ struct timespec tfd_boottim; /* cached boottime */
+ int tfd_jumped; /* timer jump status */
+ LIST_ENTRY(timerfd) entry; /* entry in list */
+
+ /* For stat(2). */
+ ino_t tfd_ino; /* inode number */
+ struct timespec tfd_atim; /* time of last read */
+ struct timespec tfd_mtim; /* time of last settime */
+ struct timespec tfd_birthtim; /* creation time */
+};
+
+static void
+timerfd_init(void *data)
+{
+ new_unrhdr64(&tfdino_unr, 1);
+}
+
+SYSINIT(timerfd, SI_SUB_VFS, SI_ORDER_ANY, timerfd_init, NULL);
+
+static inline void
+timerfd_getboottime(struct timespec *ts)
+{
+ struct timeval tv;
+ getboottime(&tv);
+ TIMEVAL_TO_TIMESPEC(&tv, ts);
+}
+
+/*
+ * Call when a discontinuous jump has occured in CLOCK_REALTIME and
+ * update timerfd's cached boottime. A jump can be triggered using
+ * functions like clock_settime(2) or settimeofday(2).
+ *
+ * Timer is marked TFD_CANCELED if TFD_TIMER_CANCEL_ON_SET is set
*** 850 LINES SKIPPED ***