git: a5e284038edc - main - open(2): Add O_DSYNC flag.

Thomas Munro tmunro at FreeBSD.org
Fri Jan 8 00:16:58 UTC 2021


The branch main has been updated by tmunro:

URL: https://cgit.FreeBSD.org/src/commit/?id=a5e284038edc36b0447f1e6337419a3c0ea1788d

commit a5e284038edc36b0447f1e6337419a3c0ea1788d
Author:     Thomas Munro <tmunro at FreeBSD.org>
AuthorDate: 2021-01-07 10:46:51 +0000
Commit:     Thomas Munro <tmunro at FreeBSD.org>
CommitDate: 2021-01-08 00:15:56 +0000

    open(2): Add O_DSYNC flag.
    
    POSIX O_DSYNC means that writes include an implicit fdatasync(2), just
    as O_SYNC implies fsync(2).
    
    VOP_WRITE() functions that understand the new IO_DATASYNC flag can act
    accordingly, but we'll still pass down IO_SYNC so that file systems that
    don't understand it will continue to provide the stronger O_SYNC
    behaviour.
    
    Flag also applies to fcntl(2).
    
    Reviewed by: kib, delphij
    Differential Revision: https://reviews.freebsd.org/D25090
---
 lib/libc/sys/fcntl.2                                 | 19 +++++++++++++++++--
 lib/libc/sys/open.2                                  | 20 +++++++++++++-------
 .../openzfs/include/os/freebsd/spl/sys/ccompile.h    |  1 -
 .../openzfs/include/os/freebsd/spl/sys/vnode.h       |  2 ++
 sys/kern/vfs_vnops.c                                 |  7 +++++++
 sys/sys/fcntl.h                                      | 15 +++++++++------
 sys/sys/vnode.h                                      |  1 +
 7 files changed, 49 insertions(+), 16 deletions(-)

diff --git a/lib/libc/sys/fcntl.2 b/lib/libc/sys/fcntl.2
index 9793024f48ed..33ad7a5673e1 100644
--- a/lib/libc/sys/fcntl.2
+++ b/lib/libc/sys/fcntl.2
@@ -28,7 +28,7 @@
 .\"     @(#)fcntl.2	8.2 (Berkeley) 1/12/94
 .\" $FreeBSD$
 .\"
-.Dd January 17, 2020
+.Dd January 6, 2021
 .Dt FCNTL 2
 .Os
 .Sh NAME
@@ -196,7 +196,7 @@ The flags for the
 .Dv F_GETFL
 and
 .Dv F_SETFL
-flags are as follows:
+commands are as follows:
 .Bl -tag -width O_NONBLOCKX
 .It Dv O_NONBLOCK
 Non-blocking I/O; if no data is available to a
@@ -225,6 +225,21 @@ Enable the
 signal to be sent to the process group
 when I/O is possible, e.g.,
 upon availability of data to be read.
+.It Dv O_SYNC
+Enable synchronous writes.
+Corresponds to the
+.Dv O_SYNC
+flag of
+.Xr open 2 .
+.Dv O_FSYNC
+is an historical synonym for
+.Dv O_SYNC .
+.It Dv O_DSYNC
+Enable synchronous data writes.
+Corresponds to the
+.Dv O_DSYNC
+flag of
+.Xr open 2 .
 .El
 .Pp
 The seals that may be applied with
diff --git a/lib/libc/sys/open.2 b/lib/libc/sys/open.2
index d0918fb02eee..e43d012770df 100644
--- a/lib/libc/sys/open.2
+++ b/lib/libc/sys/open.2
@@ -181,8 +181,9 @@ O_EXCL		error if create and file exists
 O_SHLOCK	atomically obtain a shared lock
 O_EXLOCK	atomically obtain an exclusive lock
 O_DIRECT	eliminate or reduce cache effects
-O_FSYNC		synchronous writes
+O_FSYNC		synchronous writes (historical synonym for O_SYNC)
 O_SYNC		synchronous writes
+O_DSYNC		synchronous data writes
 O_NOFOLLOW	do not follow symlinks
 O_NOCTTY	ignored
 O_TTY_INIT	ignored
@@ -230,15 +231,18 @@ returns immediately.
 The descriptor remains in non-blocking mode for subsequent operations.
 .Pp
 If
-.Dv O_FSYNC
+.Dv O_SYNC
 is used in the mask, all writes will
 immediately and synchronously be written to disk.
-.Pp
-.Dv O_SYNC
-is a synonym for
 .Dv O_FSYNC
-required by
-.Tn POSIX .
+is an historical synonym for
+.Dv O_SYNC .
+.Pp
+If
+.Dv O_DSYNC
+is used in the mask, all data and metadata required to read the data will be
+synchronously written to disk, but changes to metadata such as file access and
+modification timestamps may be written later.
 .Pp
 If
 .Dv O_NOFOLLOW
@@ -651,6 +655,8 @@ The
 .Fn openat
 function was introduced in
 .Fx 8.0 .
+.Dv O_DSYNC
+appeared in 13.0.
 .Sh BUGS
 The Open Group Extended API Set 2 specification requires that the test
 for whether
diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/ccompile.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/ccompile.h
index a02e8f098540..524b81e68e61 100644
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/ccompile.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/ccompile.h
@@ -168,7 +168,6 @@ extern "C" {
 
 #define	O_LARGEFILE 0
 #define	O_RSYNC 0
-#define	O_DSYNC 0
 
 #ifndef LOCORE
 #ifndef HAVE_RPC_TYPES
diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/vnode.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/vnode.h
index 6a6146132765..fa7bbd88c6c8 100644
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/vnode.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/vnode.h
@@ -127,7 +127,9 @@ vn_is_readonly(vnode_t *vp)
 #define	FCREAT		O_CREAT
 #define	FTRUNC		O_TRUNC
 #define	FEXCL		O_EXCL
+#ifndef FDSYNC
 #define	FDSYNC		FFSYNC
+#endif
 #define	FRSYNC		FFSYNC
 #define	FSYNC		FFSYNC
 #define	FOFFMAX		0x00
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index 2ca2cf124c23..de5cd68501a7 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -1066,6 +1066,13 @@ vn_write(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags,
 	if ((fp->f_flag & O_FSYNC) ||
 	    (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
 		ioflag |= IO_SYNC;
+	/*
+	 * For O_DSYNC we set both IO_SYNC and IO_DATASYNC, so that VOP_WRITE()
+	 * implementations that don't understand IO_DATASYNC fall back to full
+	 * O_SYNC behavior.
+	 */
+	if (fp->f_flag & O_DSYNC)
+		ioflag |= IO_SYNC | IO_DATASYNC;
 	mp = NULL;
 	if (vp->v_type != VCHR &&
 	    (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h
index 2f424d173949..e2597726c53b 100644
--- a/sys/sys/fcntl.h
+++ b/sys/sys/fcntl.h
@@ -141,8 +141,10 @@ typedef	__pid_t		pid_t;
 					   return back */
 #endif
 
+#define	O_DSYNC		0x00800000	/* POSIX data sync */
+
 /*
- * XXX missing O_DSYNC, O_RSYNC.
+ * XXX missing O_RSYNC.
  */
 
 #ifdef _KERNEL
@@ -158,9 +160,9 @@ typedef	__pid_t		pid_t;
 #define	OFLAGS(fflags)	((fflags) & O_EXEC ? (fflags) : (fflags) - 1)
 
 /* bits to save after open */
-#define	FMASK	(FREAD|FWRITE|FAPPEND|FASYNC|FFSYNC|FNONBLOCK|O_DIRECT|FEXEC)
+#define	FMASK	(FREAD|FWRITE|FAPPEND|FASYNC|FFSYNC|FDSYNC|FNONBLOCK|O_DIRECT|FEXEC)
 /* bits settable by fcntl(F_SETFL, ...) */
-#define	FCNTLFLAGS	(FAPPEND|FASYNC|FFSYNC|FNONBLOCK|FRDAHEAD|O_DIRECT)
+#define	FCNTLFLAGS	(FAPPEND|FASYNC|FFSYNC|FDSYNC|FNONBLOCK|FRDAHEAD|O_DIRECT)
 
 #if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \
     defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4)
@@ -185,15 +187,16 @@ typedef	__pid_t		pid_t;
 #define	FAPPEND		O_APPEND	/* kernel/compat */
 #define	FASYNC		O_ASYNC		/* kernel/compat */
 #define	FFSYNC		O_FSYNC		/* kernel */
+#define	FDSYNC		O_DSYNC		/* kernel */
 #define	FNONBLOCK	O_NONBLOCK	/* kernel */
 #define	FNDELAY		O_NONBLOCK	/* compat */
 #define	O_NDELAY	O_NONBLOCK	/* compat */
 #endif
 
 /*
- * We are out of bits in f_flag (which is a short).  However,
- * the flag bits not set in FMASK are only meaningful in the
- * initial open syscall.  Those bits can thus be given a
+ * Historically, we ran out of bits in f_flag (which was once a short).
+ * However, the flag bits not set in FMASK are only meaningful in the
+ * initial open syscall.  Those bits were thus given a
  * different meaning for fcntl(2).
  */
 #if __BSD_VISIBLE
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index ff1323cb8719..0eadfec02313 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -326,6 +326,7 @@ struct vattr {
 #define	IO_NOMACCHECK	0x1000		/* MAC checks unnecessary */
 #define	IO_BUFLOCKED	0x2000		/* ffs flag; indir buf is locked */
 #define	IO_RANGELOCKED	0x4000		/* range locked */
+#define	IO_DATASYNC	0x8000		/* do only data I/O synchronously */
 
 #define IO_SEQMAX	0x7F		/* seq heuristic max value */
 #define IO_SEQSHIFT	16		/* seq heuristic in upper 16 bits */


More information about the dev-commits-src-all mailing list