git: 77a5c63f7923 - stable/15 - struct file: add DFLAG_FORK, indicate copying on fork

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Sat, 01 Nov 2025 00:45:01 UTC
The branch stable/15 has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=77a5c63f79236301dece4f3045e254ccc5f49716

commit 77a5c63f79236301dece4f3045e254ccc5f49716
Author:     Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2025-08-20 19:07:43 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2025-11-01 00:43:13 +0000

    struct file: add DFLAG_FORK, indicate copying on fork
    
    (cherry picked from commit 1265516c5c610fbc0d65451140e3f17b358cd015)
---
 sys/kern/kern_descrip.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++--
 sys/sys/file.h          |  6 +++++-
 2 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index 057235574eb5..6934a5a83081 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -2510,10 +2510,13 @@ fdcopy(struct filedesc *fdp)
 {
 	struct filedesc *newfdp;
 	struct filedescent *nfde, *ofde;
+	struct file *fp;
 	int i, lastfile;
+	bool fork_pass;
 
 	MPASS(fdp != NULL);
 
+	fork_pass = false;
 	newfdp = fdinit();
 	FILEDESC_SLOCK(fdp);
 	for (;;) {
@@ -2524,10 +2527,35 @@ fdcopy(struct filedesc *fdp)
 		fdgrowtable(newfdp, lastfile + 1);
 		FILEDESC_SLOCK(fdp);
 	}
-	/* copy all passable descriptors (i.e. not kqueue) */
+
+	/*
+	 * Copy all passable descriptors (i.e. not kqueue), and
+	 * prepare to handle copyable but not passable descriptors
+	 * (kqueues).
+	 *
+	 * The pass to handle copying is performed after all passable
+	 * files are installed into the new file descriptor's table,
+	 * since kqueues need all referenced file descriptors already
+	 * valid, including other kqueues. For the same reason the
+	 * copying is done in two passes by itself, first installing
+	 * not fully initialized ('empty') copyable files into the new
+	 * fd table, and then giving the subsystems a second chance to
+	 * really fill the copied file backing structure with the
+	 * content.
+	 */
 	newfdp->fd_freefile = fdp->fd_freefile;
 	FILEDESC_FOREACH_FDE(fdp, i, ofde) {
-		if ((ofde->fde_file->f_ops->fo_flags & DFLAG_PASSABLE) == 0 ||
+		const struct fileops *ops;
+
+		ops = ofde->fde_file->f_ops;
+		fp = NULL;
+		if ((ops->fo_flags & DFLAG_FORK) != 0 &&
+		    (ofde->fde_flags & UF_FOCLOSE) == 0) {
+			if (ops->fo_fork(newfdp, ofde->fde_file, &fp, p1,
+			    curthread) != 0)
+				continue;
+			fork_pass = true;
+		} else if ((ops->fo_flags & DFLAG_PASSABLE) == 0 ||
 		    (ofde->fde_flags & UF_FOCLOSE) != 0 ||
 		    !fhold(ofde->fde_file)) {
 			if (newfdp->fd_freefile == fdp->fd_freefile)
@@ -2536,11 +2564,30 @@ fdcopy(struct filedesc *fdp)
 		}
 		nfde = &newfdp->fd_ofiles[i];
 		*nfde = *ofde;
+		if (fp != NULL)
+			nfde->fde_file = fp;
 		filecaps_copy(&ofde->fde_caps, &nfde->fde_caps, true);
 		fdused_init(newfdp, i);
 	}
 	MPASS(newfdp->fd_freefile != -1);
 	FILEDESC_SUNLOCK(fdp);
+
+	/*
+	 * Now handle copying kqueues, since all fds, including
+	 * kqueues, are in place.
+	 */
+	if (__predict_false(fork_pass)) {
+		FILEDESC_FOREACH_FDE(newfdp, i, nfde) {
+			const struct fileops *ops;
+
+			ops = nfde->fde_file->f_ops;
+			if ((ops->fo_flags & DFLAG_FORK) == 0 ||
+			    nfde->fde_file == NULL)
+				continue;
+			ops->fo_fork(newfdp, NULL, &nfde->fde_file, p1,
+			    curthread);
+		}
+	}
 	return (newfdp);
 }
 
diff --git a/sys/sys/file.h b/sys/sys/file.h
index 9a072121e25f..b2bd62f244da 100644
--- a/sys/sys/file.h
+++ b/sys/sys/file.h
@@ -137,6 +137,8 @@ typedef int fo_fspacectl_t(struct file *fp, int cmd,
 		    off_t *offset, off_t *length, int flags,
 		    struct ucred *active_cred, struct thread *td);
 typedef int fo_cmp_t(struct file *fp, struct file *fp1, struct thread *td);
+typedef	int fo_fork_t(struct filedesc *fdp, struct file *fp, struct file **fp1,
+		    struct proc *p1, struct thread *td);
 typedef int fo_spare_t(struct file *fp);
 typedef	int fo_flags_t;
 
@@ -161,12 +163,14 @@ struct fileops {
 	fo_fallocate_t	*fo_fallocate;
 	fo_fspacectl_t	*fo_fspacectl;
 	fo_cmp_t	*fo_cmp;
-	fo_spare_t	*fo_spares[8];	/* Spare slots */
+	fo_fork_t	*fo_fork;
+	fo_spare_t	*fo_spares[7];	/* Spare slots */
 	fo_flags_t	fo_flags;	/* DFLAG_* below */
 };
 
 #define DFLAG_PASSABLE	0x01	/* may be passed via unix sockets. */
 #define DFLAG_SEEKABLE	0x02	/* seekable / nonsequential */
+#define	DFLAG_FORK	0x04	/* copy on fork */
 #endif /* _KERNEL */
 
 #if defined(_KERNEL) || defined(_WANT_FILE)