Re: git: 37b2cb5ecb0f - main - vfs: Add support for file cloning to VOP_COPY_FILE_RANGE
Date: Fri, 08 Aug 2025 05:45:41 UTC
On Fri, Aug 08, 2025 at 12:55:34AM +0000, Rick Macklem wrote:
> The branch main has been updated by rmacklem:
>
> URL: https://cgit.FreeBSD.org/src/commit/?id=37b2cb5ecb0fb1b1f5a98ff4c08b61b8c05ec1d7
>
> commit 37b2cb5ecb0fb1b1f5a98ff4c08b61b8c05ec1d7
> Author: Rick Macklem <rmacklem@FreeBSD.org>
> AuthorDate: 2025-08-08 00:52:23 +0000
> Commit: Rick Macklem <rmacklem@FreeBSD.org>
> CommitDate: 2025-08-08 00:52:23 +0000
>
> vfs: Add support for file cloning to VOP_COPY_FILE_RANGE
>
> NFSv4 has a separate CLONE operation from COPY with
> a couple of semantics differences. Unlike COPY, CLONE
> must complete the "copy on write" and cannot return
> partially copied. It also is required to use offsets (and
> the length if not to EOF) that are aligned to a buffer
> boundary.
>
> Since VOP_COPY_FILE_RANGE() can already do "copy on write"
> for file systems that support it, such as ZFS with block
> cloning enabled, all this patch does is add a flag called
> COPY_FILE_RANGE_CLONE so that it will conform to the
> rule that it must do a "copy on write" to completion.
>
> The patch also adds a new pathconf(2) name _PC_CLONE_BLKSIZE,
> which acquires the blocksize requirement for cloning and
> returns 0 for file systems that do not support the
> "copy on write" feature. (This is needed for the NFSv4.2
> clone_blksize attribute.)
>
> This patch will allow the implementation of CLONE
> for NFSv4.2.
>
> Reviewed by: asomers
> Differential Revision: https://reviews.freebsd.org/D51808
> ---
> sys/fs/fuse/fuse_vnops.c | 3 +++
> sys/kern/vfs_default.c | 1 +
> sys/kern/vfs_syscalls.c | 2 +-
> sys/kern/vfs_vnops.c | 5 +++++
> sys/sys/unistd.h | 1 +
> sys/sys/vnode.h | 17 +++++++++++++----
> 6 files changed, 24 insertions(+), 5 deletions(-)
>
> diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c
> index b90ce60ec664..b782146b7278 100644
> --- a/sys/fs/fuse/fuse_vnops.c
> +++ b/sys/fs/fuse/fuse_vnops.c
> @@ -877,6 +877,9 @@ fuse_vnop_copy_file_range(struct vop_copy_file_range_args *ap)
> pid_t pid;
> int err;
>
> + if ((ap->a_flags & COPY_FILE_RANGE_CLONE) != 0)
> + return (EXTERROR(ENOSYS, "Cannot clone"));
> +
> if (mp == NULL || mp != vnode_mount(outvp))
> return (EXTERROR(ENOSYS, "Mount points do not match"));
>
> diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
> index fd6202a1424c..85f67731e1cc 100644
> --- a/sys/kern/vfs_default.c
> +++ b/sys/kern/vfs_default.c
> @@ -457,6 +457,7 @@ vop_stdpathconf(struct vop_pathconf_args *ap)
> case _PC_NAMEDATTR_ENABLED:
> case _PC_HAS_NAMEDATTR:
> case _PC_HAS_HIDDENSYSTEM:
> + case _PC_CLONE_BLKSIZE:
> *ap->a_retval = 0;
> return (0);
> default:
> diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
> index 9704e9c160a8..c64618036733 100644
> --- a/sys/kern/vfs_syscalls.c
> +++ b/sys/kern/vfs_syscalls.c
> @@ -5058,7 +5058,7 @@ kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd,
> error = 0;
> retlen = 0;
>
> - if (flags != 0) {
> + if ((flags & ~COPY_FILE_RANGE_USERFLAGS) != 0) {
> error = EINVAL;
> goto out;
> }
> diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
> index 6451c9e07a60..93f87ddae4de 100644
> --- a/sys/kern/vfs_vnops.c
> +++ b/sys/kern/vfs_vnops.c
> @@ -3443,6 +3443,11 @@ vn_generic_copy_file_range(struct vnode *invp, off_t *inoffp,
> interrupted = 0;
> dat = NULL;
>
> + if ((flags & COPY_FILE_RANGE_CLONE) != 0) {
> + error = ENOSYS;
> + goto out;
> + }
> +
> error = vn_lock(invp, LK_SHARED);
> if (error != 0)
> goto out;
> diff --git a/sys/sys/unistd.h b/sys/sys/unistd.h
> index 85ed93fd359d..7ab2f021e408 100644
> --- a/sys/sys/unistd.h
> +++ b/sys/sys/unistd.h
> @@ -159,6 +159,7 @@
> #define _PC_XATTR_ENABLED _PC_NAMEDATTR_ENABLED /* Solaris Compatible */
> #define _PC_XATTR_EXISTS _PC_HAS_NAMEDATTR /* Solaris Compatible */
> #define _PC_HAS_HIDDENSYSTEM 68
> +#define _PC_CLONE_BLKSIZE 69
> #endif
>
> /* From OpenSolaris, used by SEEK_DATA/SEEK_HOLE. */
> diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
> index 074769d55c2d..8080e9edd8c3 100644
> --- a/sys/sys/vnode.h
> +++ b/sys/sys/vnode.h
> @@ -397,8 +397,21 @@ struct vattr {
> */
> #define VLKTIMEOUT (hz / 20 + 1)
>
> +/* copy_file_range flags */
> +#define COPY_FILE_RANGE_KFLAGS 0xff000000
> +
> +/*
> + * copy_file_range flags visible to user space.
> + * Allocate high bits first, to try and avoid conflicting with Linux.
> + */
> +#define COPY_FILE_RANGE_CLONE 0x00800000 /* Require cloning. */
> +#define COPY_FILE_RANGE_USERFLAGS (COPY_FILE_RANGE_CLONE)
> +
These are userspace flags for copy_file_range(2). Now you require
userspace to include non-std-compliant and highly offensive WRT namespace
pollution sys/vnode.h header to get to some bits of the copy_file_range(2)
interface. This is not right thing to do, IMO.
> #ifdef _KERNEL
>
> +/* copy_file_range flags only usable in the kernel */
> +#define COPY_FILE_RANGE_TIMEO1SEC 0x01000000 /* Return after 1sec. */
> +
> #ifdef MALLOC_DECLARE
> MALLOC_DECLARE(M_VNODE);
> #endif
> @@ -621,10 +634,6 @@ typedef void vop_getpages_iodone_t(void *, vm_page_t *, int, int);
> #define VN_OPEN_INVFS 0x00000008
> #define VN_OPEN_WANTIOCTLCAPS 0x00000010
>
> -/* copy_file_range kernel flags */
> -#define COPY_FILE_RANGE_KFLAGS 0xff000000
> -#define COPY_FILE_RANGE_TIMEO1SEC 0x01000000 /* Return after 1sec. */
> -
> /*
> * Public vnode manipulation functions.
> */