git: 51485f81b01a - stable/13 - FIOSEEKHOLE/FIOSEEKDATA: correct consistency for bmap-based implementation

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Sat, 11 Feb 2023 00:36:01 UTC
The branch stable/13 has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=51485f81b01a03752ffaf530abfb570ae4593fae

commit 51485f81b01a03752ffaf530abfb570ae4593fae
Author:     Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2023-02-04 01:20:19 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2023-02-11 00:25:02 +0000

    FIOSEEKHOLE/FIOSEEKDATA: correct consistency for bmap-based implementation
    
    PR:     269261
    
    (cherry picked from commit 3b6056204dd80cc866b7998ef0776247ebc42ce4)
---
 sys/kern/vfs_vnops.c    | 12 +++++++++++-
 sys/ufs/ufs/ufs_bmap.c  | 18 ++++++++++++++++++
 sys/ufs/ufs/ufs_vnops.c |  2 +-
 3 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index f4274ee38689..566b781098ad 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -2546,6 +2546,7 @@ vn_pages_remove_valid(struct vnode *vp, vm_pindex_t start, vm_pindex_t end)
 int
 vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred)
 {
+	vm_object_t obj;
 	struct vattr va;
 	daddr_t bn, bnp;
 	uint64_t bsize;
@@ -2555,7 +2556,7 @@ vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred)
 	KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA,
 	    ("Wrong command %lu", cmd));
 
-	if (vn_lock(vp, LK_SHARED) != 0)
+	if (vn_lock(vp, LK_EXCLUSIVE) != 0)
 		return (EBADF);
 	if (vp->v_type != VREG) {
 		error = ENOTTY;
@@ -2569,6 +2570,15 @@ vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred)
 		error = ENXIO;
 		goto unlock;
 	}
+
+	/* See the comment in ufs_bmap_seekdata(). */
+	obj = vp->v_object;
+	if (obj != NULL) {
+		VM_OBJECT_WLOCK(obj);
+		vm_object_page_clean(obj, 0, 0, OBJPC_SYNC);
+		VM_OBJECT_WUNLOCK(obj);
+	}
+
 	bsize = vp->v_mount->mnt_stat.f_iosize;
 	for (bn = noff / bsize; noff < va.va_size; bn++, noff += bsize -
 	    noff % bsize) {
diff --git a/sys/ufs/ufs/ufs_bmap.c b/sys/ufs/ufs/ufs_bmap.c
index 4ac8ca149279..acdd334f6c7b 100644
--- a/sys/ufs/ufs/ufs_bmap.c
+++ b/sys/ufs/ufs/ufs_bmap.c
@@ -44,12 +44,16 @@ __FBSDID("$FreeBSD$");
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/proc.h>
+#include <sys/rwlock.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/stat.h>
 
+#include <vm/vm.h>
+#include <vm/vm_object.h>
+
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
@@ -348,6 +352,7 @@ ufs_bmap_seekdata(struct vnode *vp, off_t *offp)
 	struct inode *ip;
 	struct mount *mp;
 	struct ufsmount *ump;
+	vm_object_t obj;
 	ufs2_daddr_t bn, daddr, nextbn;
 	uint64_t bsize;
 	off_t numblks;
@@ -364,6 +369,19 @@ ufs_bmap_seekdata(struct vnode *vp, off_t *offp)
 	if (*offp < 0 || *offp >= ip->i_size)
 		return (ENXIO);
 
+	/*
+	 * We could have pages on the vnode' object queue which still
+	 * do not have the data blocks allocated.  Convert all dirty
+	 * pages into buffer writes to ensure that we see all
+	 * allocated data.
+	 */
+	obj = vp->v_object;
+	if (obj != NULL) {
+		VM_OBJECT_WLOCK(obj);
+		vm_object_page_clean(obj, 0, 0, OBJPC_SYNC);
+		VM_OBJECT_WUNLOCK(obj);
+	}
+
 	bsize = mp->mnt_stat.f_iosize;
 	for (bn = *offp / bsize, numblks = howmany(ip->i_size, bsize);
 	    bn < numblks; bn = nextbn) {
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 57560efffbbf..1ebb3597b925 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -2971,7 +2971,7 @@ ufs_ioctl(struct vop_ioctl_args *ap)
 	vp = ap->a_vp;
 	switch (ap->a_command) {
 	case FIOSEEKDATA:
-		error = vn_lock(vp, LK_SHARED);
+		error = vn_lock(vp, LK_EXCLUSIVE);
 		if (error == 0) {
 			error = ufs_bmap_seekdata(vp, (off_t *)ap->a_data);
 			VOP_UNLOCK(vp);