git: 3b6056204dd8 - main - FIOSEEKHOLE/FIOSEEKDATA: correct consistency for bmap-based implementation
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sat, 04 Feb 2023 18:32:45 UTC
The branch main has been updated by kib:
URL: https://cgit.FreeBSD.org/src/commit/?id=3b6056204dd80cc866b7998ef0776247ebc42ce4
commit 3b6056204dd80cc866b7998ef0776247ebc42ce4
Author: Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2023-02-04 01:20:19 +0000
Commit: Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2023-02-04 18:32:07 +0000
FIOSEEKHOLE/FIOSEEKDATA: correct consistency for bmap-based implementation
Writes on UFS through a mapped region do not allocate disk blocks in
holes immediately. The blocks are allocated when the pages are paged out
first time.
This breaks the algorithm in vn_bmap_seekhole() and ufs_bmap_seekdata(),
because VOP_BMAP() reports hole for the place which already contains a
valid data.
Clean the pages before doing VOP_BMAP() in the affected functions. In
principle, we could clean less by only requesting clean starting from
the offset, but it is probably not very important.
PR: 269261
Reported by: asomers
Reviewed by: asomers, markj
Sponsored by: The FreeBSD Foundation
MFC after: 1 week
Differential revision: https://reviews.freebsd.org/D38379
---
sys/kern/vfs_vnops.c | 14 ++++++++++++--
sys/ufs/ufs/ufs_bmap.c | 18 ++++++++++++++++++
sys/ufs/ufs/ufs_vnops.c | 2 +-
3 files changed, 31 insertions(+), 3 deletions(-)
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index a49070b6060d..1d90f76f9cd6 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -2556,6 +2556,7 @@ int
vn_bmap_seekhole_locked(struct vnode *vp, u_long cmd, off_t *off,
struct ucred *cred)
{
+ vm_object_t obj;
off_t size;
daddr_t bn, bnp;
uint64_t bsize;
@@ -2564,7 +2565,7 @@ vn_bmap_seekhole_locked(struct vnode *vp, u_long cmd, off_t *off,
KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA,
("%s: Wrong command %lu", __func__, cmd));
- ASSERT_VOP_LOCKED(vp, "vn_bmap_seekhole_locked");
+ ASSERT_VOP_ELOCKED(vp, "vn_bmap_seekhole_locked");
if (vp->v_type != VREG) {
error = ENOTTY;
@@ -2578,6 +2579,15 @@ vn_bmap_seekhole_locked(struct vnode *vp, u_long cmd, off_t *off,
error = ENXIO;
goto out;
}
+
+ /* See the comment in ufs_bmap_seekdata(). */
+ obj = vp->v_object;
+ if (obj != NULL) {
+ VM_OBJECT_WLOCK(obj);
+ vm_object_page_clean(obj, 0, 0, OBJPC_SYNC);
+ VM_OBJECT_WUNLOCK(obj);
+ }
+
bsize = vp->v_mount->mnt_stat.f_iosize;
for (bn = noff / bsize; noff < size; bn++, noff += bsize -
noff % bsize) {
@@ -2613,7 +2623,7 @@ vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred)
KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA,
("%s: Wrong command %lu", __func__, cmd));
- if (vn_lock(vp, LK_SHARED) != 0)
+ if (vn_lock(vp, LK_EXCLUSIVE) != 0)
return (EBADF);
error = vn_bmap_seekhole_locked(vp, cmd, off, cred);
VOP_UNLOCK(vp);
diff --git a/sys/ufs/ufs/ufs_bmap.c b/sys/ufs/ufs/ufs_bmap.c
index 4ac8ca149279..acdd334f6c7b 100644
--- a/sys/ufs/ufs/ufs_bmap.c
+++ b/sys/ufs/ufs/ufs_bmap.c
@@ -44,12 +44,16 @@ __FBSDID("$FreeBSD$");
#include <sys/bio.h>
#include <sys/buf.h>
#include <sys/proc.h>
+#include <sys/rwlock.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/racct.h>
#include <sys/resourcevar.h>
#include <sys/stat.h>
+#include <vm/vm.h>
+#include <vm/vm_object.h>
+
#include <ufs/ufs/extattr.h>
#include <ufs/ufs/quota.h>
#include <ufs/ufs/inode.h>
@@ -348,6 +352,7 @@ ufs_bmap_seekdata(struct vnode *vp, off_t *offp)
struct inode *ip;
struct mount *mp;
struct ufsmount *ump;
+ vm_object_t obj;
ufs2_daddr_t bn, daddr, nextbn;
uint64_t bsize;
off_t numblks;
@@ -364,6 +369,19 @@ ufs_bmap_seekdata(struct vnode *vp, off_t *offp)
if (*offp < 0 || *offp >= ip->i_size)
return (ENXIO);
+ /*
+ * We could have pages on the vnode' object queue which still
+ * do not have the data blocks allocated. Convert all dirty
+ * pages into buffer writes to ensure that we see all
+ * allocated data.
+ */
+ obj = vp->v_object;
+ if (obj != NULL) {
+ VM_OBJECT_WLOCK(obj);
+ vm_object_page_clean(obj, 0, 0, OBJPC_SYNC);
+ VM_OBJECT_WUNLOCK(obj);
+ }
+
bsize = mp->mnt_stat.f_iosize;
for (bn = *offp / bsize, numblks = howmany(ip->i_size, bsize);
bn < numblks; bn = nextbn) {
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 93a5b173b785..ae6d963920f3 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -2944,7 +2944,7 @@ ufs_ioctl(struct vop_ioctl_args *ap)
vp = ap->a_vp;
switch (ap->a_command) {
case FIOSEEKDATA:
- error = vn_lock(vp, LK_SHARED);
+ error = vn_lock(vp, LK_EXCLUSIVE);
if (error == 0) {
error = ufs_bmap_seekdata(vp, (off_t *)ap->a_data);
VOP_UNLOCK(vp);