git: de2e15295966 - main - Add vnode_pager_purge_range(9) KPI

Ka Ho Ng khng at FreeBSD.org
Thu Aug 5 15:23:34 UTC 2021


The branch main has been updated by khng:

URL: https://cgit.FreeBSD.org/src/commit/?id=de2e152959668756333db8a502a3d17a19dac393

commit de2e152959668756333db8a502a3d17a19dac393
Author:     Ka Ho Ng <khng at FreeBSD.org>
AuthorDate: 2021-08-04 19:20:37 +0000
Commit:     Ka Ho Ng <khng at FreeBSD.org>
CommitDate: 2021-08-05 14:52:26 +0000

    Add vnode_pager_purge_range(9) KPI
    
    This KPI is created in addition to the existing vnode_pager_setsize(9)
    KPI. The KPI is intended for file systems that are able to turn a range
    of file into sparse range, also known as hole-punching.
    
    Sponsored by:   The FreeBSD Foundation
    Reviewed by:    kib
    Differential Revision:  https://reviews.freebsd.org/D27194
---
 share/man/man9/Makefile                  |   1 +
 share/man/man9/vnode_pager_purge_range.9 |  85 +++++++++++++++++++
 sys/vm/vm_extern.h                       |   1 +
 sys/vm/vnode_pager.c                     | 140 +++++++++++++++++++++++--------
 4 files changed, 194 insertions(+), 33 deletions(-)

diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile
index a335f53b27f3..d0012301d889 100644
--- a/share/man/man9/Makefile
+++ b/share/man/man9/Makefile
@@ -409,6 +409,7 @@ MAN=	accept_filter.9 \
 	vnet.9 \
 	vnode.9 \
 	vnode_pager_setsize.9 \
+	vnode_pager_purge_range.9 \
 	VOP_ACCESS.9 \
 	VOP_ACLCHECK.9 \
 	VOP_ADVISE.9 \
diff --git a/share/man/man9/vnode_pager_purge_range.9 b/share/man/man9/vnode_pager_purge_range.9
new file mode 100644
index 000000000000..16a240c2a34b
--- /dev/null
+++ b/share/man/man9/vnode_pager_purge_range.9
@@ -0,0 +1,85 @@
+.\"
+.\" SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+.\"
+.\" Copyright (c) 2021 The FreeBSD Foundation
+.\"
+.\" This manual page was written by Ka Ho Ng under sponsorship from
+.\" the FreeBSD Foundation.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.Dd August 2, 2021
+.Dt VNODE_PAGER_PURGE_RANGE 9
+.Os
+.Sh NAME
+.Nm vnode_pager_purge_range
+.Nd "invalidate the cached contents within the given byte range"
+.Sh SYNOPSIS
+.In sys/param.h
+.In vm/vm.h
+.In vm/vm_extern.h
+.Ft void
+.Fo vnode_pager_purge_range
+.Fa "struct vnode *vp"
+.Fa "vm_ooffset_t start"
+.Fa "vm_ooffset_t end"
+.Fc
+.Sh DESCRIPTION
+.Nm
+invalidates the cached contents within the given byte range from the
+specified vnode
+.Fa vp .
+The range to be purged is
+.Eo [
+.Fa start , end
+.Ec ) .
+If the
+.Fa end
+parameter is the value zero, the affected range starts from
+.Fa start
+continues to the end of the object.
+Pages within the specified range will be removed from the object's queue.
+If
+.Fa start
+or
+.Fa end
+is not aligned to a page boundary, the invalidated part of the page is zeroed.
+This function only cleans the resident pages in the affected region, it is up to
+the callers to ensure reading the backing store gets back zeroes.
+.Pp
+In case the vnode
+.Fa vp
+does not have a VM object allocated, the effect of calling this function is a
+no-op.
+.Sh LOCKS
+The vnode must be locked on entry and will still be locked on exit.
+.Sh SEE ALSO
+.Xr vnode 9
+.Sh HISTORY
+The
+.Nm
+manual page first appeared in
+.Fx 14 .
+.Sh AUTHORS
+This
+manual page was written by
+.An Ka Ho Ng Aq Mt khng at FreeBSD.org .
diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h
index acdb361d3262..ed365bd41689 100644
--- a/sys/vm/vm_extern.h
+++ b/sys/vm/vm_extern.h
@@ -120,6 +120,7 @@ void vmspace_free(struct vmspace *);
 void vmspace_exitfree(struct proc *);
 void vmspace_switch_aio(struct vmspace *);
 void vnode_pager_setsize(struct vnode *, vm_ooffset_t);
+void vnode_pager_purge_range(struct vnode *, vm_ooffset_t, vm_ooffset_t);
 int vslock(void *, size_t);
 void vsunlock(void *, size_t);
 struct sf_buf *vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset);
diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c
index d167fcc555fb..4330c17c2033 100644
--- a/sys/vm/vnode_pager.c
+++ b/sys/vm/vnode_pager.c
@@ -427,6 +427,53 @@ vnode_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before,
 	return TRUE;
 }
 
+/*
+ * Internal routine clearing partial-page content
+ */
+static void
+vnode_pager_subpage_purge(struct vm_page *m, int base, int end)
+{
+	int size;
+
+	KASSERT(end > base && end <= PAGE_SIZE,
+	    ("%s: start %d end %d", __func__, base, end));
+	size = end - base;
+
+	/*
+	 * Clear out partial-page garbage in case
+	 * the page has been mapped.
+	 */
+	pmap_zero_page_area(m, base, size);
+
+	/*
+	 * Update the valid bits to reflect the blocks
+	 * that have been zeroed.  Some of these valid
+	 * bits may have already been set.
+	 */
+	vm_page_set_valid_range(m, base, size);
+
+	/*
+	 * Round up "base" to the next block boundary so
+	 * that the dirty bit for a partially zeroed
+	 * block is not cleared.
+	 */
+	base = roundup2(base, DEV_BSIZE);
+	end = rounddown2(end, DEV_BSIZE);
+
+	if (end > base) {
+		/*
+		 * Clear out partial-page dirty bits.
+		 *
+		 * note that we do not clear out the
+		 * valid bits.  This would prevent
+		 * bogus_page replacement from working
+		 * properly.
+		 */
+		vm_page_clear_dirty(m, base, end - base);
+	}
+
+}
+
 /*
  * Lets the VM system know about a change in size for a file.
  * We adjust our own internal size and flush any cached pages in
@@ -489,39 +536,9 @@ vnode_pager_setsize(struct vnode *vp, vm_ooffset_t nsize)
 		m = vm_page_grab(object, OFF_TO_IDX(nsize), VM_ALLOC_NOCREAT);
 		if (m == NULL)
 			goto out;
-		if (!vm_page_none_valid(m)) {
-			int base = (int)nsize & PAGE_MASK;
-			int size = PAGE_SIZE - base;
-
-			/*
-			 * Clear out partial-page garbage in case
-			 * the page has been mapped.
-			 */
-			pmap_zero_page_area(m, base, size);
-
-			/*
-			 * Update the valid bits to reflect the blocks that
-			 * have been zeroed.  Some of these valid bits may
-			 * have already been set.
-			 */
-			vm_page_set_valid_range(m, base, size);
-
-			/*
-			 * Round "base" to the next block boundary so that the
-			 * dirty bit for a partially zeroed block is not
-			 * cleared.
-			 */
-			base = roundup2(base, DEV_BSIZE);
-
-			/*
-			 * Clear out partial-page dirty bits.
-			 *
-			 * note that we do not clear out the valid
-			 * bits.  This would prevent bogus_page
-			 * replacement from working properly.
-			 */
-			vm_page_clear_dirty(m, base, PAGE_SIZE - base);
-		}
+		if (!vm_page_none_valid(m))
+			vnode_pager_subpage_purge(m, (int)nsize & PAGE_MASK,
+			    PAGE_SIZE);
 		vm_page_xunbusy(m);
 	}
 out:
@@ -534,6 +551,63 @@ out:
 	VM_OBJECT_WUNLOCK(object);
 }
 
+/*
+ * Lets the VM system know about the purged range for a file. We toss away any
+ * cached pages in the associated object that are affected by the purge
+ * operation. Partial-page area not aligned to page boundaries will be zeroed
+ * and the dirty blocks in DEV_BSIZE unit within a page will not be flushed.
+ */
+void
+vnode_pager_purge_range(struct vnode *vp, vm_ooffset_t start, vm_ooffset_t end)
+{
+	struct vm_page *m;
+	struct vm_object *object;
+	vm_pindex_t pi, pistart, piend;
+	bool same_page;
+	int base, pend;
+
+	ASSERT_VOP_LOCKED(vp, "vnode_pager_purge_range");
+
+	object = vp->v_object;
+	pi = start + PAGE_MASK < start ? OBJ_MAX_SIZE :
+	    OFF_TO_IDX(start + PAGE_MASK);
+	pistart = OFF_TO_IDX(start);
+	piend = end == 0 ? OBJ_MAX_SIZE : OFF_TO_IDX(end);
+	same_page = pistart == piend;
+	if ((end != 0 && end <= start) || object == NULL)
+		return;
+
+	VM_OBJECT_WLOCK(object);
+
+	if (pi < piend)
+		vm_object_page_remove(object, pi, piend, 0);
+
+	if ((start & PAGE_MASK) != 0) {
+		base = (int)start & PAGE_MASK;
+		pend = same_page ? (int)end & PAGE_MASK : PAGE_SIZE;
+		m = vm_page_grab(object, pistart, VM_ALLOC_NOCREAT);
+		if (m != NULL) {
+			if (!vm_page_none_valid(m))
+				vnode_pager_subpage_purge(m, base, pend);
+			vm_page_xunbusy(m);
+		}
+		if (same_page)
+			goto out;
+	}
+	if ((end & PAGE_MASK) != 0) {
+		base = same_page ? (int)start & PAGE_MASK : 0 ;
+		pend = (int)end & PAGE_MASK;
+		m = vm_page_grab(object, piend, VM_ALLOC_NOCREAT);
+		if (m != NULL) {
+			if (!vm_page_none_valid(m))
+				vnode_pager_subpage_purge(m, base, pend);
+			vm_page_xunbusy(m);
+		}
+	}
+out:
+	VM_OBJECT_WUNLOCK(object);
+}
+
 /*
  * calculate the linear (byte) disk address of specified virtual
  * file address


More information about the dev-commits-src-all mailing list