git: 3965de642c29 - main - linux: Add inotify support

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Fri, 04 Jul 2025 14:56:04 UTC
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=3965de642c29d831649c8307203303de560d721a

commit 3965de642c29d831649c8307203303de560d721a
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2025-06-06 13:25:09 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2025-07-04 14:42:34 +0000

    linux: Add inotify support
    
    Implement the Linux inotify system calls using the native implementation
    in vfs_inotify.c.
    
    PR:             240874
    Reviewed by:    brooks
    MFC after:      3 months
    Sponsored by:   Klara, Inc.
    Differential Revision:  https://reviews.freebsd.org/D50761
---
 sys/amd64/linux/syscalls.master   |  11 +++-
 sys/amd64/linux32/syscalls.master |  15 ++++-
 sys/arm64/linux/syscalls.master   |  11 +++-
 sys/compat/linux/linux_dummy.c    |   4 --
 sys/compat/linux/linux_file.c     | 121 ++++++++++++++++++++++++++++++++++++++
 sys/compat/linux/linux_file.h     |  32 ++++++++++
 sys/i386/linux/syscalls.master    |  15 ++++-
 sys/kern/vfs_inotify.c            |   4 +-
 sys/sys/inotify.h                 |   4 ++
 sys/x86/linux/linux_dummy_x86.c   |   2 -
 10 files changed, 201 insertions(+), 18 deletions(-)

diff --git a/sys/amd64/linux/syscalls.master b/sys/amd64/linux/syscalls.master
index fd08c9b0279d..5e1394751ef6 100644
--- a/sys/amd64/linux/syscalls.master
+++ b/sys/amd64/linux/syscalls.master
@@ -1476,10 +1476,17 @@
 		int linux_inotify_init(void);
 	}
 254	AUE_NULL	STD {
-		int linux_inotify_add_watch(void);
+		int linux_inotify_add_watch(
+		    l_int fd,
+		    const char *pathname,
+		    uint32_t mask
+		);
 	}
 255	AUE_NULL	STD {
-		int linux_inotify_rm_watch(void);
+		int linux_inotify_rm_watch(
+		    l_int fd,
+		    uint32_t wd
+		);
 	}
 256	AUE_NULL	STD {
 		int linux_migrate_pages(void);
diff --git a/sys/amd64/linux32/syscalls.master b/sys/amd64/linux32/syscalls.master
index 92d5f09c423f..7bd522a598e8 100644
--- a/sys/amd64/linux32/syscalls.master
+++ b/sys/amd64/linux32/syscalls.master
@@ -1589,10 +1589,17 @@
 		int linux_inotify_init(void);
 	}
 292	AUE_NULL	STD {
-		int linux_inotify_add_watch(void);
+		int linux_inotify_add_watch(
+		    l_int fd,
+		    const char *pathname,
+		    uint32_t mask
+		);
 	}
 293	AUE_NULL	STD {
-		int linux_inotify_rm_watch(void);
+		int linux_inotify_rm_watch(
+		    l_int fd,
+		    uint32_t wd
+		);
 	}
 ; Linux 2.6.16:
 294	AUE_NULL	STD {
@@ -1860,7 +1867,9 @@
 		);
 	}
 332	AUE_NULL	STD {
-		int linux_inotify_init1(void);
+		int linux_inotify_init1(
+		    l_int flags
+		);
 	}
 ; Linux 2.6.30:
 333	AUE_NULL	STD {
diff --git a/sys/arm64/linux/syscalls.master b/sys/arm64/linux/syscalls.master
index 79c04c398e00..2babdcaf03bf 100644
--- a/sys/arm64/linux/syscalls.master
+++ b/sys/arm64/linux/syscalls.master
@@ -170,10 +170,17 @@
 		);
 	}
 27	AUE_NULL	STD	{
-		int linux_inotify_add_watch(void);
+		int linux_inotify_add_watch(
+		    l_int fd,
+		    const char *pathname,
+		    uint32_t mask
+		);
 	}
 28	AUE_NULL	STD	{
-		int linux_inotify_rm_watch(void);
+		int linux_inotify_rm_watch(
+		    l_int fd,
+		    uint32_t wd
+		);
 	}
 29	AUE_IOCTL	STD	{
 		int linux_ioctl(
diff --git a/sys/compat/linux/linux_dummy.c b/sys/compat/linux/linux_dummy.c
index 35d6debe0da9..19cd55849f65 100644
--- a/sys/compat/linux/linux_dummy.c
+++ b/sys/compat/linux/linux_dummy.c
@@ -74,9 +74,6 @@ DUMMY(kexec_load);
 DUMMY(add_key);
 DUMMY(request_key);
 DUMMY(keyctl);
-/* Linux 2.6.13: */
-DUMMY(inotify_add_watch);
-DUMMY(inotify_rm_watch);
 /* Linux 2.6.16: */
 DUMMY(migrate_pages);
 DUMMY(unshare);
@@ -87,7 +84,6 @@ DUMMY(vmsplice);
 DUMMY(move_pages);
 /* Linux 2.6.27: */
 DUMMY(signalfd4);
-DUMMY(inotify_init1);
 /* Linux 2.6.31: */
 DUMMY(perf_event_open);
 /* Linux 2.6.36: */
diff --git a/sys/compat/linux/linux_file.c b/sys/compat/linux/linux_file.c
index 246bc26d85d4..86834a7ecea8 100644
--- a/sys/compat/linux/linux_file.c
+++ b/sys/compat/linux/linux_file.c
@@ -32,11 +32,13 @@
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
+#include <sys/inotify.h>
 #include <sys/lock.h>
 #include <sys/mman.h>
 #include <sys/selinfo.h>
 #include <sys/pipe.h>
 #include <sys/proc.h>
+#include <sys/specialfd.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/syscallsubr.h>
@@ -1877,3 +1879,122 @@ linux_writev(struct thread *td, struct linux_writev_args *args)
 	freeuio(auio);
 	return (linux_enobufs2eagain(td, args->fd, error));
 }
+
+static int
+linux_inotify_init_flags(int l_flags)
+{
+	int bsd_flags;
+
+	if ((l_flags & ~(LINUX_IN_CLOEXEC | LINUX_IN_NONBLOCK)) != 0)
+		linux_msg(NULL, "inotify_init1 unsupported flags 0x%x",
+		    l_flags);
+
+	bsd_flags = 0;
+	if ((l_flags & LINUX_IN_CLOEXEC) != 0)
+		bsd_flags |= O_CLOEXEC;
+	if ((l_flags & LINUX_IN_NONBLOCK) != 0)
+		bsd_flags |= O_NONBLOCK;
+	return (bsd_flags);
+}
+
+static int
+inotify_init_common(struct thread *td, int flags)
+{
+	struct specialfd_inotify si;
+
+	si.flags = linux_inotify_init_flags(flags);
+	return (kern_specialfd(td, SPECIALFD_INOTIFY, &si));
+}
+
+#if defined(__i386__) || defined(__amd64__)
+int
+linux_inotify_init(struct thread *td, struct linux_inotify_init_args *args)
+{
+	return (inotify_init_common(td, 0));
+}
+#endif
+
+int
+linux_inotify_init1(struct thread *td, struct linux_inotify_init1_args *args)
+{
+	return (inotify_init_common(td, args->flags));
+}
+
+/*
+ * The native implementation uses the same values for inotify events as
+ * libinotify, which gives us binary compatibility with Linux.  This simplifies
+ * the shim implementation a lot, as otherwise we would have to handle read(2)
+ * calls on inotify descriptors and translate events to Linux's ABI.
+ */
+_Static_assert(LINUX_IN_ACCESS == IN_ACCESS,
+    "IN_ACCESS mismatch");
+_Static_assert(LINUX_IN_MODIFY == IN_MODIFY,
+    "IN_MODIFY mismatch");
+_Static_assert(LINUX_IN_ATTRIB == IN_ATTRIB,
+    "IN_ATTRIB mismatch");
+_Static_assert(LINUX_IN_CLOSE_WRITE == IN_CLOSE_WRITE,
+    "IN_CLOSE_WRITE mismatch");
+_Static_assert(LINUX_IN_CLOSE_NOWRITE == IN_CLOSE_NOWRITE,
+    "IN_CLOSE_NOWRITE mismatch");
+_Static_assert(LINUX_IN_OPEN == IN_OPEN,
+    "IN_OPEN mismatch");
+_Static_assert(LINUX_IN_MOVED_FROM == IN_MOVED_FROM,
+    "IN_MOVED_FROM mismatch");
+_Static_assert(LINUX_IN_MOVED_TO == IN_MOVED_TO,
+    "IN_MOVED_TO mismatch");
+_Static_assert(LINUX_IN_CREATE == IN_CREATE,
+    "IN_CREATE mismatch");
+_Static_assert(LINUX_IN_DELETE == IN_DELETE,
+    "IN_DELETE mismatch");
+_Static_assert(LINUX_IN_DELETE_SELF == IN_DELETE_SELF,
+    "IN_DELETE_SELF mismatch");
+_Static_assert(LINUX_IN_MOVE_SELF == IN_MOVE_SELF,
+    "IN_MOVE_SELF mismatch");
+
+_Static_assert(LINUX_IN_UNMOUNT == IN_UNMOUNT,
+    "IN_UNMOUNT mismatch");
+_Static_assert(LINUX_IN_Q_OVERFLOW == IN_Q_OVERFLOW,
+    "IN_Q_OVERFLOW mismatch");
+_Static_assert(LINUX_IN_IGNORED == IN_IGNORED,
+    "IN_IGNORED mismatch");
+
+_Static_assert(LINUX_IN_ISDIR == IN_ISDIR,
+    "IN_ISDIR mismatch");
+_Static_assert(LINUX_IN_ONLYDIR == IN_ONLYDIR,
+    "IN_ONLYDIR mismatch");
+_Static_assert(LINUX_IN_DONT_FOLLOW == IN_DONT_FOLLOW,
+    "IN_DONT_FOLLOW mismatch");
+_Static_assert(LINUX_IN_MASK_CREATE == IN_MASK_CREATE,
+    "IN_MASK_CREATE mismatch");
+_Static_assert(LINUX_IN_MASK_ADD == IN_MASK_ADD,
+    "IN_MASK_ADD mismatch");
+_Static_assert(LINUX_IN_ONESHOT == IN_ONESHOT,
+    "IN_ONESHOT mismatch");
+_Static_assert(LINUX_IN_EXCL_UNLINK == IN_EXCL_UNLINK,
+    "IN_EXCL_UNLINK mismatch");
+
+static int
+linux_inotify_watch_flags(int l_flags)
+{
+	if ((l_flags & ~(LINUX_IN_ALL_EVENTS | LINUX_IN_ALL_FLAGS)) != 0) {
+		linux_msg(NULL, "inotify_add_watch unsupported flags 0x%x",
+		    l_flags);
+	}
+
+	return (l_flags);
+}
+
+int
+linux_inotify_add_watch(struct thread *td,
+    struct linux_inotify_add_watch_args *args)
+{
+	return (kern_inotify_add_watch(args->fd, AT_FDCWD, args->pathname,
+	    linux_inotify_watch_flags(args->mask), td));
+}
+
+int
+linux_inotify_rm_watch(struct thread *td,
+    struct linux_inotify_rm_watch_args *args)
+{
+	return (kern_inotify_rm_watch(args->fd, args->wd, td));
+}
diff --git a/sys/compat/linux/linux_file.h b/sys/compat/linux/linux_file.h
index 2e56942b0f40..7448dc597230 100644
--- a/sys/compat/linux/linux_file.h
+++ b/sys/compat/linux/linux_file.h
@@ -189,6 +189,38 @@
 #define LINUX_HUGETLB_FLAG_ENCODE_2GB	(31 << LINUX_HUGETLB_FLAG_ENCODE_SHIFT)
 #define LINUX_HUGETLB_FLAG_ENCODE_16GB	(34U << LINUX_HUGETLB_FLAG_ENCODE_SHIFT)
 
+/* inotify flags */
+#define	LINUX_IN_ACCESS		0x00000001
+#define	LINUX_IN_MODIFY		0x00000002
+#define	LINUX_IN_ATTRIB		0x00000004
+#define	LINUX_IN_CLOSE_WRITE	0x00000008
+#define	LINUX_IN_CLOSE_NOWRITE	0x00000010
+#define	LINUX_IN_OPEN		0x00000020
+#define	LINUX_IN_MOVED_FROM	0x00000040
+#define	LINUX_IN_MOVED_TO	0x00000080
+#define	LINUX_IN_CREATE		0x00000100
+#define	LINUX_IN_DELETE		0x00000200
+#define	LINUX_IN_DELETE_SELF	0x00000400
+#define	LINUX_IN_MOVE_SELF	0x00000800
+
+#define	LINUX_IN_UNMOUNT	0x00002000
+#define	LINUX_IN_Q_OVERFLOW	0x00004000
+#define	LINUX_IN_IGNORED	0x00008000
+
+#define	LINUX_IN_ONLYDIR	0x01000000
+#define	LINUX_IN_DONT_FOLLOW	0x02000000
+#define	LINUX_IN_EXCL_UNLINK	0x04000000
+#define	LINUX_IN_MASK_CREATE	0x10000000
+#define	LINUX_IN_MASK_ADD	0x20000000
+#define	LINUX_IN_ISDIR		0x40000000
+#define	LINUX_IN_ONESHOT	0x80000000
+
+#define	LINUX_IN_ALL_EVENTS	0x00000fff
+#define	LINUX_IN_ALL_FLAGS	0xf700e000
+
+#define	LINUX_IN_NONBLOCK	0x00000800
+#define	LINUX_IN_CLOEXEC	0x00080000
+
 #if defined(_KERNEL)
 struct l_file_handle {
 	l_uint handle_bytes;
diff --git a/sys/i386/linux/syscalls.master b/sys/i386/linux/syscalls.master
index 958336be0f08..2113ea51ac5d 100644
--- a/sys/i386/linux/syscalls.master
+++ b/sys/i386/linux/syscalls.master
@@ -1605,10 +1605,17 @@
 		int linux_inotify_init(void);
 	}
 292	AUE_NULL	STD {
-		int linux_inotify_add_watch(void);
+		int linux_inotify_add_watch(
+		    l_int fd,
+		    const char *pathname,
+		    uint32_t mask
+		);
 	}
 293	AUE_NULL	STD {
-		int linux_inotify_rm_watch(void);
+		int linux_inotify_rm_watch(
+		    l_int fd,
+		    uint32_t wd
+		);
 	}
 ; Linux 2.6.16:
 294	AUE_NULL	STD {
@@ -1872,7 +1879,9 @@
 		);
 	}
 332	AUE_NULL	STD {
-		int linux_inotify_init1(void);
+		int linux_inotify_init1(
+		    l_int flags
+		);
 	}
 ; Linux 2.6.30:
 333	AUE_NULL	STD {
diff --git a/sys/kern/vfs_inotify.c b/sys/kern/vfs_inotify.c
index 929ce0426ee8..9562350c897f 100644
--- a/sys/kern/vfs_inotify.c
+++ b/sys/kern/vfs_inotify.c
@@ -872,7 +872,7 @@ fget_inotify(struct thread *td, int fd, const cap_rights_t *needrightsp,
 	return (0);
 }
 
-static int
+int
 kern_inotify_add_watch(int fd, int dfd, const char *path, uint32_t mask,
     struct thread *td)
 {
@@ -958,7 +958,7 @@ sys_inotify_add_watch_at(struct thread *td,
 	    uap->mask, td));
 }
 
-static int
+int
 kern_inotify_rm_watch(int fd, uint32_t wd, struct thread *td)
 {
 	struct file *fp;
diff --git a/sys/sys/inotify.h b/sys/sys/inotify.h
index 6a266aacce32..65dc5dba43f3 100644
--- a/sys/sys/inotify.h
+++ b/sys/sys/inotify.h
@@ -89,6 +89,10 @@ struct vnode;
 int	inotify_create_file(struct thread *, struct file *, int, int *);
 void	inotify_log(struct vnode *, const char *, size_t, int, __uint32_t);
 
+int	kern_inotify_rm_watch(int, uint32_t, struct thread *);
+int	kern_inotify_add_watch(int, int, const char *, uint32_t,
+	    struct thread *);
+
 void	vn_inotify(struct vnode *, struct vnode *, struct componentname *, int,
 	    uint32_t);
 int	vn_inotify_add_watch(struct vnode *, struct inotify_softc *,
diff --git a/sys/x86/linux/linux_dummy_x86.c b/sys/x86/linux/linux_dummy_x86.c
index ae1d23e811e7..221f5dbf5ba3 100644
--- a/sys/x86/linux/linux_dummy_x86.c
+++ b/sys/x86/linux/linux_dummy_x86.c
@@ -46,7 +46,5 @@ LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE);
 
 DUMMY(sysfs);
 DUMMY(quotactl);
-/* Linux 2.6.13: */
-DUMMY(inotify_init);
 /* Linux 2.6.22: */
 DUMMY(signalfd);