svn commit: r250030 - in head/sys: fs/tmpfs vm

Konstantin Belousov kib at FreeBSD.org
Sun Apr 28 19:39:01 UTC 2013


Author: kib
Date: Sun Apr 28 19:38:59 2013
New Revision: 250030
URL: http://svnweb.freebsd.org/changeset/base/250030

Log:
  Rework the handling of the tmpfs node backing swap object and tmpfs
  vnode v_object to avoid double-buffering.  Use the same object both as
  the backing store for tmpfs node and as the v_object.
  
  Besides reducing memory use up to 2x times for situation of mapping
  files from tmpfs, it also makes tmpfs read and write operations copy
  twice bytes less.
  
  VM subsystem was already slightly adapted to tolerate OBJT_SWAP object
  as v_object. Now the vm_object_deallocate() is modified to not
  reinstantiate OBJ_ONEMAPPING flag and help the VFS to correctly handle
  VV_TEXT flag on the last dereference of the tmpfs backing object.
  
  Reviewed by:	alc
  Tested by:	pho, bf
  MFC after:	1 month

Modified:
  head/sys/fs/tmpfs/tmpfs_subr.c
  head/sys/fs/tmpfs/tmpfs_vnops.c
  head/sys/vm/vm_object.c
  head/sys/vm/vm_object.h

Modified: head/sys/fs/tmpfs/tmpfs_subr.c
==============================================================================
--- head/sys/fs/tmpfs/tmpfs_subr.c	Sun Apr 28 19:25:09 2013	(r250029)
+++ head/sys/fs/tmpfs/tmpfs_subr.c	Sun Apr 28 19:38:59 2013	(r250030)
@@ -166,6 +166,7 @@ tmpfs_alloc_node(struct tmpfs_mount *tmp
     char *target, dev_t rdev, struct tmpfs_node **node)
 {
 	struct tmpfs_node *nnode;
+	vm_object_t obj;
 
 	/* If the root directory of the 'tmp' file system is not yet
 	 * allocated, this must be the request to do it. */
@@ -227,9 +228,14 @@ tmpfs_alloc_node(struct tmpfs_mount *tmp
 		break;
 
 	case VREG:
-		nnode->tn_reg.tn_aobj =
+		obj = nnode->tn_reg.tn_aobj =
 		    vm_pager_allocate(OBJT_SWAP, NULL, 0, VM_PROT_DEFAULT, 0,
 			NULL /* XXXKIB - tmpfs needs swap reservation */);
+		VM_OBJECT_WLOCK(obj);
+		/* OBJ_TMPFS is set together with the setting of vp->v_object */
+		vm_object_set_flag(obj, OBJ_NOSPLIT);
+		vm_object_clear_flag(obj, OBJ_ONEMAPPING);
+		VM_OBJECT_WUNLOCK(obj);
 		break;
 
 	default:
@@ -434,9 +440,11 @@ int
 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag,
     struct vnode **vpp)
 {
-	int error = 0;
 	struct vnode *vp;
+	vm_object_t object;
+	int error;
 
+	error = 0;
 loop:
 	TMPFS_NODE_LOCK(node);
 	if ((vp = node->tn_vnode) != NULL) {
@@ -506,13 +514,22 @@ loop:
 		/* FALLTHROUGH */
 	case VLNK:
 		/* FALLTHROUGH */
-	case VREG:
-		/* FALLTHROUGH */
 	case VSOCK:
 		break;
 	case VFIFO:
 		vp->v_op = &tmpfs_fifoop_entries;
 		break;
+	case VREG:
+		object = node->tn_reg.tn_aobj;
+		VM_OBJECT_WLOCK(object);
+		VI_LOCK(vp);
+		KASSERT(vp->v_object == NULL, ("Not NULL v_object in tmpfs"));
+		vp->v_object = object;
+		object->un_pager.swp.swp_tmpfs = vp;
+		vm_object_set_flag(object, OBJ_TMPFS);
+		VI_UNLOCK(vp);
+		VM_OBJECT_WUNLOCK(object);
+		break;
 	case VDIR:
 		MPASS(node->tn_dir.tn_parent != NULL);
 		if (node->tn_dir.tn_parent == node)
@@ -523,7 +540,6 @@ loop:
 		panic("tmpfs_alloc_vp: type %p %d", node, (int)node->tn_type);
 	}
 
-	vnode_pager_setsize(vp, node->tn_size);
 	error = insmntque(vp, mp);
 	if (error)
 		vp = NULL;
@@ -1343,7 +1359,6 @@ retry:
 	TMPFS_UNLOCK(tmp);
 
 	node->tn_size = newsize;
-	vnode_pager_setsize(vp, newsize);
 	return (0);
 }
 

Modified: head/sys/fs/tmpfs/tmpfs_vnops.c
==============================================================================
--- head/sys/fs/tmpfs/tmpfs_vnops.c	Sun Apr 28 19:25:09 2013	(r250029)
+++ head/sys/fs/tmpfs/tmpfs_vnops.c	Sun Apr 28 19:38:59 2013	(r250030)
@@ -278,8 +278,6 @@ tmpfs_close(struct vop_close_args *v)
 {
 	struct vnode *vp = v->a_vp;
 
-	MPASS(VOP_ISLOCKED(vp));
-
 	/* Update node times. */
 	tmpfs_update(vp);
 
@@ -439,7 +437,6 @@ tmpfs_setattr(struct vop_setattr_args *v
 	return error;
 }
 
-/* --------------------------------------------------------------------- */
 static int
 tmpfs_nocacheread(vm_object_t tobj, vm_pindex_t idx,
     vm_offset_t offset, size_t tlen, struct uio *uio)
@@ -448,12 +445,35 @@ tmpfs_nocacheread(vm_object_t tobj, vm_p
 	int		error, rv;
 
 	VM_OBJECT_WLOCK(tobj);
-	m = vm_page_grab(tobj, idx, VM_ALLOC_WIRED |
-	    VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
+
+	/*
+	 * The kern_sendfile() code calls vn_rdwr() with the page
+	 * soft-busied.  Ignore the soft-busy state here. Parallel
+	 * reads of the page content from disk are prevented by
+	 * VPO_BUSY.
+	 *
+	 * Although the tmpfs vnode lock is held here, it is
+	 * nonetheless safe to sleep waiting for a free page.  The
+	 * pageout daemon does not need to acquire the tmpfs vnode
+	 * lock to page out tobj's pages because tobj is a OBJT_SWAP
+	 * type object.
+	 */
+	m = vm_page_grab(tobj, idx, VM_ALLOC_NORMAL | VM_ALLOC_RETRY |
+	    VM_ALLOC_IGN_SBUSY);
 	if (m->valid != VM_PAGE_BITS_ALL) {
 		if (vm_pager_has_page(tobj, idx, NULL, NULL)) {
 			rv = vm_pager_get_pages(tobj, &m, 1, 0);
+			m = vm_page_lookup(tobj, idx);
+			if (m == NULL) {
+				printf(
+		    "tmpfs: vm_obj %p idx %jd null lookup rv %d\n",
+				    tobj, idx, rv);
+				return (EIO);
+			}
 			if (rv != VM_PAGER_OK) {
+				printf(
+		    "tmpfs: vm_obj %p idx %jd valid %x pager error %d\n",
+				    tobj, idx, m->valid, rv);
 				vm_page_lock(m);
 				vm_page_free(m);
 				vm_page_unlock(m);
@@ -463,127 +483,38 @@ tmpfs_nocacheread(vm_object_t tobj, vm_p
 		} else
 			vm_page_zero_invalid(m, TRUE);
 	}
+	vm_page_lock(m);
+	vm_page_hold(m);
+	vm_page_wakeup(m);
+	vm_page_unlock(m);
 	VM_OBJECT_WUNLOCK(tobj);
 	error = uiomove_fromphys(&m, offset, tlen, uio);
 	VM_OBJECT_WLOCK(tobj);
 	vm_page_lock(m);
-	vm_page_unwire(m, TRUE);
+	vm_page_unhold(m);
+	vm_page_deactivate(m);
+	/* Requeue to maintain LRU ordering. */
+	vm_page_requeue(m);
 	vm_page_unlock(m);
-	vm_page_wakeup(m);
 	VM_OBJECT_WUNLOCK(tobj);
 
 	return (error);
 }
 
-static __inline int
-tmpfs_nocacheread_buf(vm_object_t tobj, vm_pindex_t idx,
-    vm_offset_t offset, size_t tlen, void *buf)
-{
-	struct uio uio;
-	struct iovec iov;
-
-	uio.uio_iovcnt = 1;
-	uio.uio_iov = &iov;
-	iov.iov_base = buf;
-	iov.iov_len = tlen;
-
-	uio.uio_offset = 0;
-	uio.uio_resid = tlen;
-	uio.uio_rw = UIO_READ;
-	uio.uio_segflg = UIO_SYSSPACE;
-	uio.uio_td = curthread;
-
-	return (tmpfs_nocacheread(tobj, idx, offset, tlen, &uio));
-}
-
-static int
-tmpfs_mappedread(vm_object_t vobj, vm_object_t tobj, size_t len, struct uio *uio)
-{
-	struct sf_buf	*sf;
-	vm_pindex_t	idx;
-	vm_page_t	m;
-	vm_offset_t	offset;
-	off_t		addr;
-	size_t		tlen;
-	char		*ma;
-	int		error;
-
-	addr = uio->uio_offset;
-	idx = OFF_TO_IDX(addr);
-	offset = addr & PAGE_MASK;
-	tlen = MIN(PAGE_SIZE - offset, len);
-
-	VM_OBJECT_WLOCK(vobj);
-lookupvpg:
-	if (((m = vm_page_lookup(vobj, idx)) != NULL) &&
-	    vm_page_is_valid(m, offset, tlen)) {
-		if ((m->oflags & VPO_BUSY) != 0) {
-			/*
-			 * Reference the page before unlocking and sleeping so
-			 * that the page daemon is less likely to reclaim it.  
-			 */
-			vm_page_reference(m);
-			vm_page_sleep(m, "tmfsmr");
-			goto lookupvpg;
-		}
-		vm_page_busy(m);
-		VM_OBJECT_WUNLOCK(vobj);
-		error = uiomove_fromphys(&m, offset, tlen, uio);
-		VM_OBJECT_WLOCK(vobj);
-		vm_page_wakeup(m);
-		VM_OBJECT_WUNLOCK(vobj);
-		return	(error);
-	} else if (m != NULL && uio->uio_segflg == UIO_NOCOPY) {
-		KASSERT(offset == 0,
-		    ("unexpected offset in tmpfs_mappedread for sendfile"));
-		if ((m->oflags & VPO_BUSY) != 0) {
-			/*
-			 * Reference the page before unlocking and sleeping so
-			 * that the page daemon is less likely to reclaim it.  
-			 */
-			vm_page_reference(m);
-			vm_page_sleep(m, "tmfsmr");
-			goto lookupvpg;
-		}
-		vm_page_busy(m);
-		VM_OBJECT_WUNLOCK(vobj);
-		sched_pin();
-		sf = sf_buf_alloc(m, SFB_CPUPRIVATE);
-		ma = (char *)sf_buf_kva(sf);
-		error = tmpfs_nocacheread_buf(tobj, idx, 0, tlen, ma);
-		if (error == 0) {
-			if (tlen != PAGE_SIZE)
-				bzero(ma + tlen, PAGE_SIZE - tlen);
-			uio->uio_offset += tlen;
-			uio->uio_resid -= tlen;
-		}
-		sf_buf_free(sf);
-		sched_unpin();
-		VM_OBJECT_WLOCK(vobj);
-		if (error == 0)
-			m->valid = VM_PAGE_BITS_ALL;
-		vm_page_wakeup(m);
-		VM_OBJECT_WUNLOCK(vobj);
-		return	(error);
-	}
-	VM_OBJECT_WUNLOCK(vobj);
-	error = tmpfs_nocacheread(tobj, idx, offset, tlen, uio);
-
-	return	(error);
-}
-
 static int
 tmpfs_read(struct vop_read_args *v)
 {
 	struct vnode *vp = v->a_vp;
 	struct uio *uio = v->a_uio;
-
 	struct tmpfs_node *node;
 	vm_object_t uobj;
 	size_t len;
 	int resid;
-
 	int error = 0;
+	vm_pindex_t	idx;
+	vm_offset_t	offset;
+	off_t		addr;
+	size_t		tlen;
 
 	node = VP_TO_TMPFS_NODE(vp);
 
@@ -607,7 +538,11 @@ tmpfs_read(struct vop_read_args *v)
 		len = MIN(node->tn_size - uio->uio_offset, resid);
 		if (len == 0)
 			break;
-		error = tmpfs_mappedread(vp->v_object, uobj, len, uio);
+		addr = uio->uio_offset;
+		idx = OFF_TO_IDX(addr);
+		offset = addr & PAGE_MASK;
+		tlen = MIN(PAGE_SIZE - offset, len);
+		error = tmpfs_nocacheread(uobj, idx, offset, tlen, uio);
 		if ((error != 0) || (resid == uio->uio_resid))
 			break;
 	}
@@ -620,10 +555,10 @@ out:
 /* --------------------------------------------------------------------- */
 
 static int
-tmpfs_mappedwrite(vm_object_t vobj, vm_object_t tobj, size_t len, struct uio *uio)
+tmpfs_mappedwrite(vm_object_t tobj, size_t len, struct uio *uio)
 {
 	vm_pindex_t	idx;
-	vm_page_t	vpg, tpg;
+	vm_page_t	tpg;
 	vm_offset_t	offset;
 	off_t		addr;
 	size_t		tlen;
@@ -636,69 +571,47 @@ tmpfs_mappedwrite(vm_object_t vobj, vm_o
 	offset = addr & PAGE_MASK;
 	tlen = MIN(PAGE_SIZE - offset, len);
 
-	VM_OBJECT_WLOCK(vobj);
-lookupvpg:
-	if (((vpg = vm_page_lookup(vobj, idx)) != NULL) &&
-	    vm_page_is_valid(vpg, offset, tlen)) {
-		if ((vpg->oflags & VPO_BUSY) != 0) {
-			/*
-			 * Reference the page before unlocking and sleeping so
-			 * that the page daemon is less likely to reclaim it.  
-			 */
-			vm_page_reference(vpg);
-			vm_page_sleep(vpg, "tmfsmw");
-			goto lookupvpg;
-		}
-		vm_page_busy(vpg);
-		vm_page_undirty(vpg);
-		VM_OBJECT_WUNLOCK(vobj);
-		error = uiomove_fromphys(&vpg, offset, tlen, uio);
-	} else {
-		if (vm_page_is_cached(vobj, idx))
-			vm_page_cache_free(vobj, idx, idx + 1);
-		VM_OBJECT_WUNLOCK(vobj);
-		vpg = NULL;
-	}
 	VM_OBJECT_WLOCK(tobj);
-	tpg = vm_page_grab(tobj, idx, VM_ALLOC_WIRED |
-	    VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
+	tpg = vm_page_grab(tobj, idx, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 	if (tpg->valid != VM_PAGE_BITS_ALL) {
 		if (vm_pager_has_page(tobj, idx, NULL, NULL)) {
 			rv = vm_pager_get_pages(tobj, &tpg, 1, 0);
+			tpg = vm_page_lookup(tobj, idx);
+			if (tpg == NULL) {
+				printf(
+		    "tmpfs: vm_obj %p idx %jd null lookup rv %d\n",
+				    tobj, idx, rv);
+				return (EIO);
+			}
 			if (rv != VM_PAGER_OK) {
+				printf(
+		    "tmpfs: vm_obj %p idx %jd valid %x pager error %d\n",
+				    tobj, idx, tpg->valid, rv);
 				vm_page_lock(tpg);
 				vm_page_free(tpg);
 				vm_page_unlock(tpg);
-				error = EIO;
-				goto out;
+				VM_OBJECT_WUNLOCK(tobj);
+				return (EIO);
 			}
 		} else
 			vm_page_zero_invalid(tpg, TRUE);
 	}
+	vm_page_lock(tpg);
+	vm_page_hold(tpg);
+	vm_page_wakeup(tpg);
+	vm_page_unlock(tpg);
 	VM_OBJECT_WUNLOCK(tobj);
-	if (vpg == NULL)
-		error = uiomove_fromphys(&tpg, offset, tlen, uio);
-	else {
-		KASSERT(vpg->valid == VM_PAGE_BITS_ALL, ("parts of vpg invalid"));
-		pmap_copy_page(vpg, tpg);
-	}
+	error = uiomove_fromphys(&tpg, offset, tlen, uio);
 	VM_OBJECT_WLOCK(tobj);
-	if (error == 0) {
-		KASSERT(tpg->valid == VM_PAGE_BITS_ALL,
-		    ("parts of tpg invalid"));
+	if (error == 0)
 		vm_page_dirty(tpg);
-	}
 	vm_page_lock(tpg);
-	vm_page_unwire(tpg, TRUE);
+	vm_page_unhold(tpg);
+	vm_page_deactivate(tpg);
+	/* Requeue to maintain LRU ordering. */
+	vm_page_requeue(tpg);
 	vm_page_unlock(tpg);
-	vm_page_wakeup(tpg);
-out:
 	VM_OBJECT_WUNLOCK(tobj);
-	if (vpg != NULL) {
-		VM_OBJECT_WLOCK(vobj);
-		vm_page_wakeup(vpg);
-		VM_OBJECT_WUNLOCK(vobj);
-	}
 
 	return	(error);
 }
@@ -756,7 +669,7 @@ tmpfs_write(struct vop_write_args *v)
 		len = MIN(node->tn_size - uio->uio_offset, resid);
 		if (len == 0)
 			break;
-		error = tmpfs_mappedwrite(vp->v_object, uobj, len, uio);
+		error = tmpfs_mappedwrite(uobj, len, uio);
 		if ((error != 0) || (resid == uio->uio_resid))
 			break;
 	}
@@ -1536,8 +1449,6 @@ tmpfs_inactive(struct vop_inactive_args 
 
 	struct tmpfs_node *node;
 
-	MPASS(VOP_ISLOCKED(vp));
-
 	node = VP_TO_TMPFS_NODE(vp);
 
 	if (node->tn_links == 0)
@@ -1555,11 +1466,24 @@ tmpfs_reclaim(struct vop_reclaim_args *v
 
 	struct tmpfs_mount *tmp;
 	struct tmpfs_node *node;
+	vm_object_t obj;
 
 	node = VP_TO_TMPFS_NODE(vp);
 	tmp = VFS_TO_TMPFS(vp->v_mount);
 
-	vnode_destroy_vobject(vp);
+	if (node->tn_type == VREG) {
+		obj = node->tn_reg.tn_aobj;
+		if (obj != NULL) {
+			/* Instead of vnode_destroy_vobject() */
+			VM_OBJECT_WLOCK(obj);
+			VI_LOCK(vp);
+			vm_object_clear_flag(obj, OBJ_TMPFS);
+			obj->un_pager.swp.swp_tmpfs = NULL;
+			VI_UNLOCK(vp);
+			VM_OBJECT_WUNLOCK(obj);
+		}
+	}
+	vp->v_object = NULL;
 	cache_purge(vp);
 
 	TMPFS_NODE_LOCK(node);

Modified: head/sys/vm/vm_object.c
==============================================================================
--- head/sys/vm/vm_object.c	Sun Apr 28 19:25:09 2013	(r250029)
+++ head/sys/vm/vm_object.c	Sun Apr 28 19:38:59 2013	(r250030)
@@ -505,6 +505,7 @@ void
 vm_object_deallocate(vm_object_t object)
 {
 	vm_object_t temp;
+	struct vnode *vp;
 
 	while (object != NULL) {
 		VM_OBJECT_WLOCK(object);
@@ -527,15 +528,36 @@ vm_object_deallocate(vm_object_t object)
 			VM_OBJECT_WUNLOCK(object);
 			return;
 		} else if (object->ref_count == 1) {
+			if (object->type == OBJT_SWAP &&
+			    (object->flags & OBJ_TMPFS) != 0) {
+				vp = object->un_pager.swp.swp_tmpfs;
+				vhold(vp);
+				VM_OBJECT_WUNLOCK(object);
+				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+				vdrop(vp);
+				VM_OBJECT_WLOCK(object);
+				if (object->type == OBJT_DEAD) {
+					VM_OBJECT_WUNLOCK(object);
+					VOP_UNLOCK(vp, 0);
+					return;
+				} else if ((object->flags & OBJ_TMPFS) != 0) {
+					if (object->ref_count == 1)
+						VOP_UNSET_TEXT(vp);
+					VOP_UNLOCK(vp, 0);
+				}
+			}
 			if (object->shadow_count == 0 &&
 			    object->handle == NULL &&
 			    (object->type == OBJT_DEFAULT ||
-			     object->type == OBJT_SWAP)) {
+			    (object->type == OBJT_SWAP &&
+			    (object->flags & OBJ_TMPFS) == 0))) {
 				vm_object_set_flag(object, OBJ_ONEMAPPING);
 			} else if ((object->shadow_count == 1) &&
 			    (object->handle == NULL) &&
 			    (object->type == OBJT_DEFAULT ||
 			     object->type == OBJT_SWAP)) {
+				KASSERT((object->flags & OBJ_TMPFS) == 0,
+				    ("Shadowed tmpfs v_object"));
 				vm_object_t robject;
 
 				robject = LIST_FIRST(&object->shadow_head);

Modified: head/sys/vm/vm_object.h
==============================================================================
--- head/sys/vm/vm_object.h	Sun Apr 28 19:25:09 2013	(r250029)
+++ head/sys/vm/vm_object.h	Sun Apr 28 19:38:59 2013	(r250030)
@@ -154,11 +154,21 @@ struct vm_object {
 		/*
 		 * Swap pager
 		 *
+		 *	swp_tmpfs - back-pointer to the tmpfs vnode,
+		 *		     if any, which uses the vm object
+		 *		     as backing store.  The handle
+		 *		     cannot be reused for linking,
+		 *		     because the vnode can be
+		 *		     reclaimed and recreated, making
+		 *		     the handle changed and hash-chain
+		 *		     invalid.
+		 *
 		 *	swp_bcount - number of swap 'swblock' metablocks, each
 		 *		     contains up to 16 swapblk assignments.
 		 *		     see vm/swap_pager.h
 		 */
 		struct {
+			void *swp_tmpfs;
 			int swp_bcount;
 		} swp;
 	} un_pager;
@@ -179,6 +189,7 @@ struct vm_object {
 #define	OBJ_COLORED	0x1000		/* pg_color is defined */
 #define	OBJ_ONEMAPPING	0x2000		/* One USE (a single, non-forked) mapping flag */
 #define	OBJ_DISCONNECTWNT 0x4000	/* disconnect from vnode wanted */
+#define	OBJ_TMPFS	0x8000
 
 #define IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT)
 #define OFF_TO_IDX(off) ((vm_pindex_t)(((vm_ooffset_t)(off)) >> PAGE_SHIFT))


More information about the svn-src-all mailing list