git: 065b9549d356 - releng/13.0 - sendfile: Use the pager size to determine the file extent when possible
Mark Johnston
markj at FreeBSD.org
Thu Mar 4 14:09:46 UTC 2021
The branch releng/13.0 has been updated by markj:
URL: https://cgit.FreeBSD.org/src/commit/?id=065b9549d35676dcd7af45ca3ebd05e9ca082cbe
commit 065b9549d35676dcd7af45ca3ebd05e9ca082cbe
Author: Mark Johnston <markj at FreeBSD.org>
AuthorDate: 2021-02-25 15:04:44 +0000
Commit: Mark Johnston <markj at FreeBSD.org>
CommitDate: 2021-03-04 14:09:36 +0000
sendfile: Use the pager size to determine the file extent when possible
Previously sendfile would issue a VOP_GETATTR and use the returned size,
i.e., the file size. When paging in file data, sendfile_swapin() will
use the pager to determine whether it needs to zero-fill, most often
because of a hole in a sparse file. An attempt to page in beyond the
end of a file is treated this way, and occurs when the requested page is
past the end of the pager. In other words, both the file size and pager
size were used interchangeably.
With ZFS, updates to the pager and file sizes are not synchronized by
the exclusive vnode lock, at least partially due to its use of
MNTK_SHARED_WRITES. In particular, the pager size is updated after the
file size, so in the presence of a writer concurrently extending the
file, sendfile could incorrectly instantiate "holes" in the page cache
pages backing the file, which manifests as data corruption when reading
the file back from the page cache. The on-disk copy is unaffected.
Fix this by consistently using the pager size when available.
Approved by: re (gjb)
Reported by: dumbbell
Reviewed by: chs, kib
Tested by: dumbbell, pho
Sponsored by: The FreeBSD Foundation
Differential Revision: https://reviews.freebsd.org/D28811
(cherry picked from commit faa998f6ff69573fe82765c77c7268ee89ac945e)
(cherry picked from commit 095558ac894977906fbbdf74c34b6d13fea7c8ed)
---
sys/kern/kern_sendfile.c | 55 ++++++++++++++++++++++++++++++++++++------------
1 file changed, 41 insertions(+), 14 deletions(-)
diff --git a/sys/kern/kern_sendfile.c b/sys/kern/kern_sendfile.c
index 7d7be6f072e9..95bfba0538dc 100644
--- a/sys/kern/kern_sendfile.c
+++ b/sys/kern/kern_sendfile.c
@@ -588,28 +588,40 @@ sendfile_getobj(struct thread *td, struct file *fp, vm_object_t *obj_res,
goto out;
}
*bsize = vp->v_mount->mnt_stat.f_iosize;
- error = VOP_GETATTR(vp, &va, td->td_ucred);
- if (error != 0)
- goto out;
- *obj_size = va.va_size;
obj = vp->v_object;
if (obj == NULL) {
error = EINVAL;
goto out;
}
+
+ /*
+ * Use the pager size when available to simplify synchronization
+ * with filesystems, which otherwise must atomically update both
+ * the vnode pager size and file size.
+ */
+ if (obj->type == OBJT_VNODE) {
+ VM_OBJECT_RLOCK(obj);
+ *obj_size = obj->un_pager.vnp.vnp_size;
+ } else {
+ error = VOP_GETATTR(vp, &va, td->td_ucred);
+ if (error != 0)
+ goto out;
+ *obj_size = va.va_size;
+ VM_OBJECT_RLOCK(obj);
+ }
} else if (fp->f_type == DTYPE_SHM) {
error = 0;
shmfd = fp->f_data;
obj = shmfd->shm_object;
+ VM_OBJECT_RLOCK(obj);
*obj_size = shmfd->shm_size;
} else {
error = EINVAL;
goto out;
}
- VM_OBJECT_WLOCK(obj);
if ((obj->flags & OBJ_DEAD) != 0) {
- VM_OBJECT_WUNLOCK(obj);
+ VM_OBJECT_RUNLOCK(obj);
error = EBADF;
goto out;
}
@@ -620,7 +632,7 @@ sendfile_getobj(struct thread *td, struct file *fp, vm_object_t *obj_res,
* immediately destroy it.
*/
vm_object_reference_locked(obj);
- VM_OBJECT_WUNLOCK(obj);
+ VM_OBJECT_RUNLOCK(obj);
*obj_res = obj;
*vp_res = vp;
*shmfd_res = shmfd;
@@ -679,7 +691,7 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
struct shmfd *shmfd;
struct sendfile_sync *sfs;
struct vattr va;
- off_t off, sbytes, rem, obj_size;
+ off_t off, sbytes, rem, obj_size, nobj_size;
int bsize, error, ext_pgs_idx, hdrlen, max_pgs, softerr;
#ifdef KERN_TLS
int tls_enq_cnt;
@@ -852,15 +864,30 @@ retry_space:
error = vn_lock(vp, LK_SHARED);
if (error != 0)
goto done;
- error = VOP_GETATTR(vp, &va, td->td_ucred);
- if (error != 0 || off >= va.va_size) {
+
+ /*
+ * Check to see if the file size has changed.
+ */
+ if (obj->type == OBJT_VNODE) {
+ VM_OBJECT_RLOCK(obj);
+ nobj_size = obj->un_pager.vnp.vnp_size;
+ VM_OBJECT_RUNLOCK(obj);
+ } else {
+ error = VOP_GETATTR(vp, &va, td->td_ucred);
+ if (error != 0) {
+ VOP_UNLOCK(vp);
+ goto done;
+ }
+ nobj_size = va.va_size;
+ }
+ if (off >= nobj_size) {
VOP_UNLOCK(vp);
goto done;
}
- if (va.va_size != obj_size) {
- obj_size = va.va_size;
- rem = nbytes ?
- omin(nbytes + offset, obj_size) : obj_size;
+ if (nobj_size != obj_size) {
+ obj_size = nobj_size;
+ rem = nbytes ? omin(nbytes + offset, obj_size) :
+ obj_size;
rem -= off;
}
}
More information about the dev-commits-src-branches
mailing list