svn commit: r253939 - in head/sys: cddl/contrib/opensolaris/uts/common/fs/zfs fs/tmpfs kern vm

Attilio Rao attilio at FreeBSD.org
Sun Aug 4 21:07:26 UTC 2013


Author: attilio
Date: Sun Aug  4 21:07:24 2013
New Revision: 253939
URL: http://svnweb.freebsd.org/changeset/base/253939

Log:
  The page hold mechanism is fast but it has couple of fallouts:
  - It does not let pages respect the LRU policy
  - It bloats the active/inactive queues of few pages
  
  Try to avoid it as much as possible with the long-term target to
  completely remove it.
  Use the soft-busy mechanism to protect page content accesses during
  short-term operations (like uiomove_fromphys()).
  
  After this change only vm_fault_quick_hold_pages() is still using the
  hold mechanism for page content access.
  There is an additional complexity there as the quick path cannot
  immediately access the page object to busy the page and the slow path
  cannot however busy more than one page a time (to avoid deadlocks).
  
  Fixing such primitive can bring to complete removal of the page hold
  mechanism.
  
  Sponsored by:	EMC / Isilon storage division
  Discussed with:	alc
  Reviewed by:	jeff
  Tested by:	pho

Modified:
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
  head/sys/fs/tmpfs/tmpfs_vnops.c
  head/sys/kern/imgact_elf.c
  head/sys/kern/kern_exec.c
  head/sys/kern/sys_process.c
  head/sys/vm/vm_extern.h
  head/sys/vm/vm_fault.c
  head/sys/vm/vm_glue.c
  head/sys/vm/vm_map.h

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c	Sun Aug  4 21:00:22 2013	(r253938)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c	Sun Aug  4 21:07:24 2013	(r253939)
@@ -324,7 +324,8 @@ zfs_ioctl(vnode_t *vp, u_long com, intpt
 }
 
 static vm_page_t
-page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes)
+page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes,
+    boolean_t alloc)
 {
 	vm_object_t obj;
 	vm_page_t pp;
@@ -346,6 +347,8 @@ page_busy(vnode_t *vp, int64_t start, in
 				continue;
 			}
 		} else if (pp == NULL) {
+			if (!alloc)
+				break;
 			pp = vm_page_alloc(obj, OFF_TO_IDX(start),
 			    VM_ALLOC_SYSTEM | VM_ALLOC_IFCACHED |
 			    VM_ALLOC_NOBUSY);
@@ -356,8 +359,10 @@ page_busy(vnode_t *vp, int64_t start, in
 
 		if (pp != NULL) {
 			ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
-			vm_object_pip_add(obj, 1);
 			vm_page_io_start(pp);
+			if (!alloc)
+				break;
+			vm_object_pip_add(obj, 1);
 			pmap_remove_write(pp);
 			vm_page_clear_dirty(pp, off, nbytes);
 		}
@@ -367,55 +372,12 @@ page_busy(vnode_t *vp, int64_t start, in
 }
 
 static void
-page_unbusy(vm_page_t pp)
+page_unbusy(vm_page_t pp, boolean_t unalloc)
 {
 
 	vm_page_io_finish(pp);
-	vm_object_pip_subtract(pp->object, 1);
-}
-
-static vm_page_t
-page_hold(vnode_t *vp, int64_t start)
-{
-	vm_object_t obj;
-	vm_page_t pp;
-
-	obj = vp->v_object;
-	zfs_vmobject_assert_wlocked(obj);
-
-	for (;;) {
-		if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL &&
-		    pp->valid) {
-			if ((pp->oflags & VPO_BUSY) != 0) {
-				/*
-				 * Reference the page before unlocking and
-				 * sleeping so that the page daemon is less
-				 * likely to reclaim it.
-				 */
-				vm_page_reference(pp);
-				vm_page_sleep(pp, "zfsmwb");
-				continue;
-			}
-
-			ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
-			vm_page_lock(pp);
-			vm_page_hold(pp);
-			vm_page_unlock(pp);
-
-		} else
-			pp = NULL;
-		break;
-	}
-	return (pp);
-}
-
-static void
-page_unhold(vm_page_t pp)
-{
-
-	vm_page_lock(pp);
-	vm_page_unhold(pp);
-	vm_page_unlock(pp);
+	if (unalloc)
+		vm_object_pip_subtract(pp->object, 1);
 }
 
 static caddr_t
@@ -479,7 +441,8 @@ update_pages(vnode_t *vp, int64_t start,
 
 			zfs_vmobject_wlock(obj);
 			vm_page_undirty(pp);
-		} else if ((pp = page_busy(vp, start, off, nbytes)) != NULL) {
+		} else if ((pp = page_busy(vp, start, off, nbytes,
+		    TRUE)) != NULL) {
 			zfs_vmobject_wunlock(obj);
 
 			va = zfs_map_page(pp, &sf);
@@ -488,7 +451,7 @@ update_pages(vnode_t *vp, int64_t start,
 			zfs_unmap_page(sf);
 
 			zfs_vmobject_wlock(obj);
-			page_unbusy(pp);
+			page_unbusy(pp, TRUE);
 		}
 		len -= nbytes;
 		off = 0;
@@ -598,7 +561,7 @@ mappedread(vnode_t *vp, int nbytes, uio_
 		vm_page_t pp;
 		uint64_t bytes = MIN(PAGESIZE - off, len);
 
-		if (pp = page_hold(vp, start)) {
+		if (pp = page_busy(vp, start, 0, 0, FALSE)) {
 			struct sf_buf *sf;
 			caddr_t va;
 
@@ -607,7 +570,7 @@ mappedread(vnode_t *vp, int nbytes, uio_
 			error = uiomove(va + off, bytes, UIO_READ, uio);
 			zfs_unmap_page(sf);
 			zfs_vmobject_wlock(obj);
-			page_unhold(pp);
+			page_unbusy(pp, FALSE);
 		} else {
 			zfs_vmobject_wunlock(obj);
 			error = dmu_read_uio(os, zp->z_id, uio, bytes);

Modified: head/sys/fs/tmpfs/tmpfs_vnops.c
==============================================================================
--- head/sys/fs/tmpfs/tmpfs_vnops.c	Sun Aug  4 21:00:22 2013	(r253938)
+++ head/sys/fs/tmpfs/tmpfs_vnops.c	Sun Aug  4 21:07:24 2013	(r253939)
@@ -485,13 +485,13 @@ tmpfs_nocacheread(vm_object_t tobj, vm_p
 			vm_page_zero_invalid(m, TRUE);
 		vm_page_wakeup(m);
 	}
-	vm_page_lock(m);
-	vm_page_hold(m);
-	vm_page_unlock(m);
+	vm_page_io_start(m);
 	VM_OBJECT_WUNLOCK(tobj);
 	error = uiomove_fromphys(&m, offset, tlen, uio);
+	VM_OBJECT_WLOCK(tobj);
+	vm_page_io_finish(m);
+	VM_OBJECT_WUNLOCK(tobj);
 	vm_page_lock(m);
-	vm_page_unhold(m);
 	if (m->queue == PQ_NONE) {
 		vm_page_deactivate(m);
 	} else {
@@ -602,16 +602,14 @@ tmpfs_mappedwrite(vm_object_t tobj, size
 			vm_page_zero_invalid(tpg, TRUE);
 		vm_page_wakeup(tpg);
 	}
-	vm_page_lock(tpg);
-	vm_page_hold(tpg);
-	vm_page_unlock(tpg);
+	vm_page_io_start(tpg);
 	VM_OBJECT_WUNLOCK(tobj);
 	error = uiomove_fromphys(&tpg, offset, tlen, uio);
 	VM_OBJECT_WLOCK(tobj);
+	vm_page_io_finish(tpg);
 	if (error == 0)
 		vm_page_dirty(tpg);
 	vm_page_lock(tpg);
-	vm_page_unhold(tpg);
 	if (tpg->queue == PQ_NONE) {
 		vm_page_deactivate(tpg);
 	} else {

Modified: head/sys/kern/imgact_elf.c
==============================================================================
--- head/sys/kern/imgact_elf.c	Sun Aug  4 21:00:22 2013	(r253938)
+++ head/sys/kern/imgact_elf.c	Sun Aug  4 21:07:24 2013	(r253939)
@@ -378,7 +378,7 @@ __elfN(map_partial)(vm_map_t map, vm_obj
 		off = offset - trunc_page(offset);
 		error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start,
 		    end - start);
-		vm_imgact_unmap_page(sf);
+		vm_imgact_unmap_page(object, sf);
 		if (error) {
 			return (KERN_FAILURE);
 		}
@@ -433,7 +433,7 @@ __elfN(map_insert)(vm_map_t map, vm_obje
 					sz = PAGE_SIZE - off;
 				error = copyout((caddr_t)sf_buf_kva(sf) + off,
 				    (caddr_t)start, sz);
-				vm_imgact_unmap_page(sf);
+				vm_imgact_unmap_page(object, sf);
 				if (error) {
 					return (KERN_FAILURE);
 				}
@@ -553,7 +553,7 @@ __elfN(load_section)(struct image_params
 		    trunc_page(offset + filsz);
 		error = copyout((caddr_t)sf_buf_kva(sf) + off,
 		    (caddr_t)map_addr, copy_len);
-		vm_imgact_unmap_page(sf);
+		vm_imgact_unmap_page(object, sf);
 		if (error) {
 			return (error);
 		}

Modified: head/sys/kern/kern_exec.c
==============================================================================
--- head/sys/kern/kern_exec.c	Sun Aug  4 21:00:22 2013	(r253938)
+++ head/sys/kern/kern_exec.c	Sun Aug  4 21:07:24 2013	(r253939)
@@ -973,7 +973,7 @@ exec_map_first_page(imgp)
 		vm_page_wakeup(ma[0]);
 	}
 	vm_page_lock(ma[0]);
-	vm_page_hold(ma[0]);
+	vm_page_wire(ma[0]);
 	vm_page_unlock(ma[0]);
 	VM_OBJECT_WUNLOCK(object);
 
@@ -994,7 +994,7 @@ exec_unmap_first_page(imgp)
 		sf_buf_free(imgp->firstpage);
 		imgp->firstpage = NULL;
 		vm_page_lock(m);
-		vm_page_unhold(m);
+		vm_page_unwire(m, 0);
 		vm_page_unlock(m);
 	}
 }

Modified: head/sys/kern/sys_process.c
==============================================================================
--- head/sys/kern/sys_process.c	Sun Aug  4 21:00:22 2013	(r253938)
+++ head/sys/kern/sys_process.c	Sun Aug  4 21:07:24 2013	(r253939)
@@ -263,6 +263,7 @@ proc_rwmem(struct proc *p, struct uio *u
 	writing = uio->uio_rw == UIO_WRITE;
 	reqprot = writing ? VM_PROT_COPY | VM_PROT_READ : VM_PROT_READ;
 	fault_flags = writing ? VM_FAULT_DIRTY : VM_FAULT_NORMAL;
+	fault_flags |= VM_FAULT_IOBUSY;
 
 	/*
 	 * Only map in one page at a time.  We don't have to, but it
@@ -287,9 +288,9 @@ proc_rwmem(struct proc *p, struct uio *u
 		len = min(PAGE_SIZE - page_offset, uio->uio_resid);
 
 		/*
-		 * Fault and hold the page on behalf of the process.
+		 * Fault and busy the page on behalf of the process.
 		 */
-		error = vm_fault_hold(map, pageno, reqprot, fault_flags, &m);
+		error = vm_fault_handle(map, pageno, reqprot, fault_flags, &m);
 		if (error != KERN_SUCCESS) {
 			if (error == KERN_RESOURCE_SHORTAGE)
 				error = ENOMEM;
@@ -315,9 +316,9 @@ proc_rwmem(struct proc *p, struct uio *u
 		/*
 		 * Release the page.
 		 */
-		vm_page_lock(m);
-		vm_page_unhold(m);
-		vm_page_unlock(m);
+		VM_OBJECT_WLOCK(m->object);
+		vm_page_io_finish(m);
+		VM_OBJECT_WUNLOCK(m->object);
 
 	} while (error == 0 && uio->uio_resid > 0);
 

Modified: head/sys/vm/vm_extern.h
==============================================================================
--- head/sys/vm/vm_extern.h	Sun Aug  4 21:00:22 2013	(r253938)
+++ head/sys/vm/vm_extern.h	Sun Aug  4 21:07:24 2013	(r253939)
@@ -63,7 +63,7 @@ void vm_fault_copy_entry(vm_map_t, vm_ma
     vm_ooffset_t *);
 int vm_fault_disable_pagefaults(void);
 void vm_fault_enable_pagefaults(int save);
-int vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
+int vm_fault_handle(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
     int fault_flags, vm_page_t *m_hold);
 int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
     vm_prot_t prot, vm_page_t *ma, int max_count);
@@ -87,7 +87,7 @@ void vnode_pager_setsize(struct vnode *,
 int vslock(void *, size_t);
 void vsunlock(void *, size_t);
 struct sf_buf *vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset);
-void vm_imgact_unmap_page(struct sf_buf *sf);
+void vm_imgact_unmap_page(vm_object_t, struct sf_buf *sf);
 void vm_thread_dispose(struct thread *td);
 int vm_thread_new(struct thread *td, int pages);
 int vm_mlock(struct proc *, struct ucred *, const void *, size_t);

Modified: head/sys/vm/vm_fault.c
==============================================================================
--- head/sys/vm/vm_fault.c	Sun Aug  4 21:00:22 2013	(r253938)
+++ head/sys/vm/vm_fault.c	Sun Aug  4 21:07:24 2013	(r253939)
@@ -221,8 +221,8 @@ vm_fault(vm_map_t map, vm_offset_t vaddr
 	if (map != kernel_map && KTRPOINT(td, KTR_FAULT))
 		ktrfault(vaddr, fault_type);
 #endif
-	result = vm_fault_hold(map, trunc_page(vaddr), fault_type, fault_flags,
-	    NULL);
+	result = vm_fault_handle(map, trunc_page(vaddr), fault_type,
+	    fault_flags, NULL);
 #ifdef KTRACE
 	if (map != kernel_map && KTRPOINT(td, KTR_FAULTEND))
 		ktrfaultend(result);
@@ -231,7 +231,7 @@ vm_fault(vm_map_t map, vm_offset_t vaddr
 }
 
 int
-vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
+vm_fault_handle(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
     int fault_flags, vm_page_t *m_hold)
 {
 	vm_prot_t prot;
@@ -943,7 +943,10 @@ vnode_locked:
 		vm_page_activate(fs.m);
 	if (m_hold != NULL) {
 		*m_hold = fs.m;
-		vm_page_hold(fs.m);
+		if (fault_flags & VM_FAULT_IOBUSY)
+			vm_page_io_start(fs.m);
+		else
+			vm_page_hold(fs.m);
 	}
 	vm_page_unlock(fs.m);
 	vm_page_wakeup(fs.m);
@@ -1145,7 +1148,7 @@ vm_fault_quick_hold_pages(vm_map_t map, 
 		 * and hold these pages.
 		 */
 		for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE)
-			if (*mp == NULL && vm_fault_hold(map, va, prot,
+			if (*mp == NULL && vm_fault_handle(map, va, prot,
 			    VM_FAULT_NORMAL, mp) != KERN_SUCCESS)
 				goto error;
 	}

Modified: head/sys/vm/vm_glue.c
==============================================================================
--- head/sys/vm/vm_glue.c	Sun Aug  4 21:00:22 2013	(r253938)
+++ head/sys/vm/vm_glue.c	Sun Aug  4 21:07:24 2013	(r253939)
@@ -223,7 +223,7 @@ vsunlock(void *addr, size_t len)
  * Return the pinned page if successful; otherwise, return NULL.
  */
 static vm_page_t
-vm_imgact_hold_page(vm_object_t object, vm_ooffset_t offset)
+vm_imgact_page_iostart(vm_object_t object, vm_ooffset_t offset)
 {
 	vm_page_t m, ma[1];
 	vm_pindex_t pindex;
@@ -249,9 +249,7 @@ vm_imgact_hold_page(vm_object_t object, 
 		}
 		vm_page_wakeup(m);
 	}
-	vm_page_lock(m);
-	vm_page_hold(m);
-	vm_page_unlock(m);
+	vm_page_io_start(m);
 out:
 	VM_OBJECT_WUNLOCK(object);
 	return (m);
@@ -266,7 +264,7 @@ vm_imgact_map_page(vm_object_t object, v
 {
 	vm_page_t m;
 
-	m = vm_imgact_hold_page(object, offset);
+	m = vm_imgact_page_iostart(object, offset);
 	if (m == NULL)
 		return (NULL);
 	sched_pin();
@@ -277,16 +275,16 @@ vm_imgact_map_page(vm_object_t object, v
  * Destroy the given CPU private mapping and unpin the page that it mapped.
  */
 void
-vm_imgact_unmap_page(struct sf_buf *sf)
+vm_imgact_unmap_page(vm_object_t object, struct sf_buf *sf)
 {
 	vm_page_t m;
 
 	m = sf_buf_page(sf);
 	sf_buf_free(sf);
 	sched_unpin();
-	vm_page_lock(m);
-	vm_page_unhold(m);
-	vm_page_unlock(m);
+	VM_OBJECT_WLOCK(object);
+	vm_page_io_finish(m);
+	VM_OBJECT_WUNLOCK(object);
 }
 
 void

Modified: head/sys/vm/vm_map.h
==============================================================================
--- head/sys/vm/vm_map.h	Sun Aug  4 21:00:22 2013	(r253938)
+++ head/sys/vm/vm_map.h	Sun Aug  4 21:07:24 2013	(r253939)
@@ -329,6 +329,7 @@ long vmspace_resident_count(struct vmspa
 #define VM_FAULT_NORMAL 0		/* Nothing special */
 #define VM_FAULT_CHANGE_WIRING 1	/* Change the wiring as appropriate */
 #define	VM_FAULT_DIRTY 2		/* Dirty the page; use w/VM_PROT_COPY */
+#define	VM_FAULT_IOBUSY 4		/* Busy the faulted page */
 
 /*
  * Initially, mappings are slightly sequential.  The maximum window size must


More information about the svn-src-head mailing list