svn commit: r229821 - in head/sys: fs/tmpfs kern

Sun Jan 8 20:09:27 UTC 2012

Author: alc
Date: Sun Jan  8 20:09:26 2012
New Revision: 229821
URL: http://svn.freebsd.org/changeset/base/229821

Log:
  Correct an error of omission in the implementation of the truncation
  operation on POSIX shared memory objects and tmpfs.  Previously, neither of
  these modules correctly handled the case in which the new size of the object
  or file was not a multiple of the page size.  Specifically, they did not
  handle partial page truncation of data stored on swap.  As a result, stale
  data might later be returned to an application.
  
  Interestingly, a data inconsistency was less likely to occur under tmpfs
  than POSIX shared memory objects.  The reason being that a different mistake
  by the tmpfs truncation operation helped avoid a data inconsistency.  If the
  data was still resident in memory in a PG_CACHED page, then the tmpfs
  truncation operation would reactivate that page, zero the truncated portion,
  and leave the page pinned in memory.  More precisely, the benevolent error
  was that the truncation operation didn't add the reactivated page to any of
  the paging queues, effectively pinning the page.  This page would remain
  pinned until the file was destroyed or the page was read or written.  With
  this change, the page is now added to the inactive queue.
  
  Discussed with:	jhb
  Reviewed by:	kib (an earlier version)
  MFC after:	3 weeks

Modified:
  head/sys/fs/tmpfs/tmpfs_subr.c
  head/sys/kern/uipc_shm.c

Modified: head/sys/fs/tmpfs/tmpfs_subr.c
==============================================================================

--- head/sys/fs/tmpfs/tmpfs_subr.c	Sun Jan  8 19:52:56 2012	(r229820)
+++ head/sys/fs/tmpfs/tmpfs_subr.c	Sun Jan  8 20:09:26 2012	(r229821)
@@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_extern.h>
 
@@ -886,10 +887,10 @@ tmpfs_reg_resize(struct vnode *vp, off_t
 	struct tmpfs_mount *tmp;
 	struct tmpfs_node *node;
 	vm_object_t uobj;
-	vm_page_t m;
-	vm_pindex_t newpages, oldpages;
+	vm_page_t m, ma[1];
+	vm_pindex_t idx, newpages, oldpages;
 	off_t oldsize;
-	size_t zerolen;
+	int base, rv;
 
 	MPASS(vp->v_type == VREG);
 	MPASS(newsize >= 0);
@@ -912,15 +913,57 @@ tmpfs_reg_resize(struct vnode *vp, off_t
 	    newpages - oldpages > TMPFS_PAGES_AVAIL(tmp))
 		return (ENOSPC);
 
-	TMPFS_LOCK(tmp);
-	tmp->tm_pages_used += (newpages - oldpages);
-	TMPFS_UNLOCK(tmp);
-
-	node->tn_size = newsize;
-	vnode_pager_setsize(vp, newsize);
 	VM_OBJECT_LOCK(uobj);
 	if (newsize < oldsize) {
 		/*
+		 * Zero the truncated part of the last page.
+		 */
+		base = newsize & PAGE_MASK;
+		if (base != 0) {
+			idx = OFF_TO_IDX(newsize);
+retry:
+			m = vm_page_lookup(uobj, idx);
+			if (m != NULL) {
+				if ((m->oflags & VPO_BUSY) != 0 ||
+				    m->busy != 0) {
+					vm_page_sleep(m, "tmfssz");
+					goto retry;
+				}
+			} else if (vm_pager_has_page(uobj, idx, NULL, NULL)) {
+				m = vm_page_alloc(uobj, idx, VM_ALLOC_NORMAL);
+				if (m == NULL) {
+					VM_OBJECT_UNLOCK(uobj);
+					VM_WAIT;
+					VM_OBJECT_LOCK(uobj);
+					goto retry;
+				} else if (m->valid != VM_PAGE_BITS_ALL) {
+					ma[0] = m;
+					rv = vm_pager_get_pages(uobj, ma, 1, 0);
+					m = vm_page_lookup(uobj, idx);
+				} else
+					/* A cached page was reactivated. */
+					rv = VM_PAGER_OK;
+				vm_page_lock(m);
+				if (rv == VM_PAGER_OK) {
+					vm_page_deactivate(m);
+					vm_page_unlock(m);
+					vm_page_wakeup(m);
+				} else {
+					vm_page_free(m);
+					vm_page_unlock(m);
+					VM_OBJECT_UNLOCK(uobj);
+					return (EIO);
+				}
+			}
+			if (m != NULL) {
+				pmap_zero_page_area(m, base, PAGE_SIZE - base);
+				MPASS(m->valid == VM_PAGE_BITS_ALL);
+				vm_page_dirty(m);
+				vm_pager_page_unswapped(m);
+			}
+		}
+
+		/*
 		 * Release any swap space and free any whole pages.
 		 */
 		if (newpages < oldpages) {
@@ -928,19 +971,16 @@ tmpfs_reg_resize(struct vnode *vp, off_t
 			    newpages);
 			vm_object_page_remove(uobj, newpages, 0, 0);
 		}
-
-		/*
-		 * Zero the truncated part of the last page.
-		 */
-		zerolen = round_page(newsize) - newsize;
-		if (zerolen > 0) {
-			m = vm_page_grab(uobj, OFF_TO_IDX(newsize),
-			    VM_ALLOC_NOBUSY | VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
-			pmap_zero_page_area(m, PAGE_SIZE - zerolen, zerolen);
-		}
 	}
 	uobj->size = newpages;
 	VM_OBJECT_UNLOCK(uobj);
+
+	TMPFS_LOCK(tmp);
+	tmp->tm_pages_used += (newpages - oldpages);
+	TMPFS_UNLOCK(tmp);
+
+	node->tn_size = newsize;
+	vnode_pager_setsize(vp, newsize);
 	return (0);
 }
 

Modified: head/sys/kern/uipc_shm.c
==============================================================================
--- head/sys/kern/uipc_shm.c	Sun Jan  8 19:52:56 2012	(r229820)
+++ head/sys/kern/uipc_shm.c	Sun Jan  8 20:09:26 2012	(r229821)
@@ -39,13 +39,6 @@
  *
  * (3) Resource limits?  Does this need its own resource limits or are the
  *     existing limits in mmap(2) sufficient?
- *
- * (4) Partial page truncation.  vnode_pager_setsize() will zero any parts
- *     of a partially mapped page as a result of ftruncate(2)/truncate(2).
- *     We can do the same (with the same pmap evil), but do we need to
- *     worry about the bits on disk if the page is swapped out or will the
- *     swapper zero the parts of a page that are invalid if the page is
- *     swapped back in for us?
  */
 
 #include <sys/cdefs.h>
@@ -86,6 +79,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/swap_pager.h>
 
@@ -253,9 +247,10 @@ static int
 shm_dotruncate(struct shmfd *shmfd, off_t length)
 {
 	vm_object_t object;
-	vm_page_t m;
-	vm_pindex_t nobjsize;
+	vm_page_t m, ma[1];
+	vm_pindex_t idx, nobjsize;
 	vm_ooffset_t delta;
+	int base, rv;
 
 	object = shmfd->shm_object;
 	VM_OBJECT_LOCK(object);
@@ -275,6 +270,56 @@ shm_dotruncate(struct shmfd *shmfd, off_
 			VM_OBJECT_UNLOCK(object);
 			return (EBUSY);
 		}
+
+		/*
+		 * Zero the truncated part of the last page.
+		 */
+		base = length & PAGE_MASK;
+		if (base != 0) {
+			idx = OFF_TO_IDX(length);
+retry:
+			m = vm_page_lookup(object, idx);
+			if (m != NULL) {
+				if ((m->oflags & VPO_BUSY) != 0 ||
+				    m->busy != 0) {
+					vm_page_sleep(m, "shmtrc");
+					goto retry;
+				}
+			} else if (vm_pager_has_page(object, idx, NULL, NULL)) {
+				m = vm_page_alloc(object, idx, VM_ALLOC_NORMAL);
+				if (m == NULL) {
+					VM_OBJECT_UNLOCK(object);
+					VM_WAIT;
+					VM_OBJECT_LOCK(object);
+					goto retry;
+				} else if (m->valid != VM_PAGE_BITS_ALL) {
+					ma[0] = m;
+					rv = vm_pager_get_pages(object, ma, 1,
+					    0);
+					m = vm_page_lookup(object, idx);
+				} else
+					/* A cached page was reactivated. */
+					rv = VM_PAGER_OK;
+				vm_page_lock(m);
+				if (rv == VM_PAGER_OK) {
+					vm_page_deactivate(m);
+					vm_page_unlock(m);
+					vm_page_wakeup(m);
+				} else {
+					vm_page_free(m);
+					vm_page_unlock(m);
+					VM_OBJECT_UNLOCK(object);
+					return (EIO);
+				}
+			}
+			if (m != NULL) {
+				pmap_zero_page_area(m, base, PAGE_SIZE - base);
+				KASSERT(m->valid == VM_PAGE_BITS_ALL,
+				    ("shm_dotruncate: page %p is invalid", m));
+				vm_page_dirty(m);
+				vm_pager_page_unswapped(m);
+			}
+		}
 		delta = ptoa(object->size - nobjsize);
 
 		/* Toss in memory pages. */
@@ -289,45 +334,7 @@ shm_dotruncate(struct shmfd *shmfd, off_
 		/* Free the swap accounted for shm */
 		swap_release_by_cred(delta, object->cred);
 		object->charge -= delta;
-
-		/*
-		 * If the last page is partially mapped, then zero out
-		 * the garbage at the end of the page.  See comments
-		 * in vnode_pager_setsize() for more details.
-		 *
-		 * XXXJHB: This handles in memory pages, but what about
-		 * a page swapped out to disk?
-		 */
-		if ((length & PAGE_MASK) &&
-		    (m = vm_page_lookup(object, OFF_TO_IDX(length))) != NULL &&
-		    m->valid != 0) {
-			int base = (int)length & PAGE_MASK;
-			int size = PAGE_SIZE - base;
-
-			pmap_zero_page_area(m, base, size);
-
-			/*
-			 * Update the valid bits to reflect the blocks that
-			 * have been zeroed.  Some of these valid bits may
-			 * have already been set.
-			 */
-			vm_page_set_valid_range(m, base, size);
-
-			/*
-			 * Round "base" to the next block boundary so that the
-			 * dirty bit for a partially zeroed block is not
-			 * cleared.
-			 */
-			base = roundup2(base, DEV_BSIZE);
-
-			vm_page_clear_dirty(m, base, PAGE_SIZE - base);
-		} else if ((length & PAGE_MASK) &&
-		    __predict_false(object->cache != NULL)) {
-			vm_page_cache_free(object, OFF_TO_IDX(length),
-			    nobjsize);
-		}
 	} else {
-
 		/* Attempt to reserve the swap */
 		delta = ptoa(nobjsize - object->size);
 		if (!swap_reserve_by_cred(delta, object->cred)) {