svn commit: r255608 - in head/sys: conf kern modules/cxgb/cxgb modules/sfxge modules/ti sys vm

Konstantin Belousov kib at FreeBSD.org
Mon Sep 16 06:25:57 UTC 2013


Author: kib
Date: Mon Sep 16 06:25:54 2013
New Revision: 255608
URL: http://svnweb.freebsd.org/changeset/base/255608

Log:
  Remove zero-copy sockets code.  It only worked for anonymous memory,
  and the equivalent functionality is now provided by sendfile(2) over
  posix shared memory filedescriptor.
  
  Remove the cow member of struct vm_page, and rearrange the remaining
  members.  While there, make hold_count unsigned.
  
  Requested and reviewed by:	alc
  Tested by:	pho
  Sponsored by:	The FreeBSD Foundation
  Approved by:	re (delphij)

Modified:
  head/sys/conf/NOTES
  head/sys/conf/options
  head/sys/kern/subr_uio.c
  head/sys/kern/uipc_socket.c
  head/sys/modules/cxgb/cxgb/Makefile
  head/sys/modules/sfxge/Makefile
  head/sys/modules/ti/Makefile
  head/sys/sys/socketvar.h
  head/sys/sys/uio.h
  head/sys/vm/vm_fault.c
  head/sys/vm/vm_page.c
  head/sys/vm/vm_page.h

Modified: head/sys/conf/NOTES
==============================================================================
--- head/sys/conf/NOTES	Mon Sep 16 06:15:15 2013	(r255607)
+++ head/sys/conf/NOTES	Mon Sep 16 06:25:54 2013	(r255608)
@@ -996,21 +996,6 @@ options 	TCP_SIGNATURE		#include support
 # a smooth scheduling of the traffic.
 options 	DUMMYNET
 
-# "Zero copy" sockets support is split into the send and receive path
-# which operate very differently.
-# For the send path the VM page with the data is wired into the kernel
-# and marked as COW (copy-on-write).  If the application touches the
-# data while it is still in the send socket buffer the page is copied
-# and divorced from its kernel wiring (no longer zero copy).
-# The receive side requires explicit NIC driver support to create
-# disposable pages which are flipped from kernel to user-space VM.
-# See zero_copy(9) for more details.
-# XXX: The COW based send mechanism is not safe and may result in
-# kernel crashes.
-# XXX: None of the current NIC drivers support disposable pages.
-options		SOCKET_SEND_COW
-options		SOCKET_RECV_PFLIP
-
 #####################################################################
 # FILESYSTEM OPTIONS
 

Modified: head/sys/conf/options
==============================================================================
--- head/sys/conf/options	Mon Sep 16 06:15:15 2013	(r255607)
+++ head/sys/conf/options	Mon Sep 16 06:25:54 2013	(r255608)
@@ -528,8 +528,6 @@ NGATM_CCATM		opt_netgraph.h
 # DRM options
 DRM_DEBUG		opt_drm.h
 
-SOCKET_SEND_COW		opt_zero.h
-SOCKET_RECV_PFLIP	opt_zero.h
 TI_SF_BUF_JUMBO		opt_ti.h
 TI_JUMBO_HDRSPLIT	opt_ti.h
 

Modified: head/sys/kern/subr_uio.c
==============================================================================
--- head/sys/kern/subr_uio.c	Mon Sep 16 06:15:15 2013	(r255607)
+++ head/sys/kern/subr_uio.c	Mon Sep 16 06:25:54 2013	(r255608)
@@ -37,8 +37,6 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
-#include "opt_zero.h"
-
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
@@ -58,84 +56,12 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_map.h>
-#ifdef SOCKET_SEND_COW
-#include <vm/vm_object.h>
-#endif
 
 SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV,
 	"Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)");
 
 static int uiomove_faultflag(void *cp, int n, struct uio *uio, int nofault);
 
-#ifdef SOCKET_SEND_COW
-/* Declared in uipc_socket.c */
-extern int so_zero_copy_receive;
-
-/*
- * Identify the physical page mapped at the given kernel virtual
- * address.  Insert this physical page into the given address space at
- * the given virtual address, replacing the physical page, if any,
- * that already exists there.
- */
-static int
-vm_pgmoveco(vm_map_t mapa, vm_offset_t kaddr, vm_offset_t uaddr)
-{
-	vm_map_t map = mapa;
-	vm_page_t kern_pg, user_pg;
-	vm_object_t uobject;
-	vm_map_entry_t entry;
-	vm_pindex_t upindex;
-	vm_prot_t prot;
-	boolean_t wired;
-
-	KASSERT((uaddr & PAGE_MASK) == 0,
-	    ("vm_pgmoveco: uaddr is not page aligned"));
-
-	/*
-	 * Herein the physical page is validated and dirtied.  It is
-	 * unwired in sf_buf_mext().
-	 */
-	kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr));
-	kern_pg->valid = VM_PAGE_BITS_ALL;
-	KASSERT(kern_pg->queue == PQ_NONE && kern_pg->wire_count == 1,
-	    ("vm_pgmoveco: kern_pg is not correctly wired"));
-
-	if ((vm_map_lookup(&map, uaddr,
-			   VM_PROT_WRITE, &entry, &uobject,
-			   &upindex, &prot, &wired)) != KERN_SUCCESS) {
-		return(EFAULT);
-	}
-	VM_OBJECT_WLOCK(uobject);
-retry:
-	if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) {
-		if (vm_page_sleep_if_busy(user_pg, "vm_pgmoveco"))
-			goto retry;
-		vm_page_lock(user_pg);
-		pmap_remove_all(user_pg);
-		vm_page_free(user_pg);
-		vm_page_unlock(user_pg);
-	} else {
-		/*
-		 * Even if a physical page does not exist in the
-		 * object chain's first object, a physical page from a
-		 * backing object may be mapped read only.
-		 */
-		if (uobject->backing_object != NULL)
-			pmap_remove(map->pmap, uaddr, uaddr + PAGE_SIZE);
-	}
-	if (vm_page_insert(kern_pg, uobject, upindex)) {
-		VM_OBJECT_WUNLOCK(uobject);
-		VM_WAIT;
-		VM_OBJECT_WLOCK(uobject);
-		goto retry;
-	}
-	vm_page_dirty(kern_pg);
-	VM_OBJECT_WUNLOCK(uobject);
-	vm_map_lookup_done(map, entry);
-	return(KERN_SUCCESS);
-}
-#endif /* SOCKET_SEND_COW */
-
 int
 copyin_nofault(const void *udaddr, void *kaddr, size_t len)
 {
@@ -313,103 +239,6 @@ uiomove_frombuf(void *buf, int buflen, s
 	return (uiomove((char *)buf + offset, n, uio));
 }
 
-#ifdef SOCKET_RECV_PFLIP
-/*
- * Experimental support for zero-copy I/O
- */
-static int
-userspaceco(void *cp, u_int cnt, struct uio *uio, int disposable)
-{
-	struct iovec *iov;
-	int error;
-
-	iov = uio->uio_iov;
-	if (uio->uio_rw == UIO_READ) {
-		if ((so_zero_copy_receive != 0)
-		 && ((cnt & PAGE_MASK) == 0)
-		 && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0)
-		 && ((uio->uio_offset & PAGE_MASK) == 0)
-		 && ((((intptr_t) cp) & PAGE_MASK) == 0)
-		 && (disposable != 0)) {
-			/* SOCKET: use page-trading */
-			/*
-			 * We only want to call vm_pgmoveco() on
-			 * disposeable pages, since it gives the
-			 * kernel page to the userland process.
-			 */
-			error =	vm_pgmoveco(&curproc->p_vmspace->vm_map,
-			    (vm_offset_t)cp, (vm_offset_t)iov->iov_base);
-
-			/*
-			 * If we get an error back, attempt
-			 * to use copyout() instead.  The
-			 * disposable page should be freed
-			 * automatically if we weren't able to move
-			 * it into userland.
-			 */
-			if (error != 0)
-				error = copyout(cp, iov->iov_base, cnt);
-		} else {
-			error = copyout(cp, iov->iov_base, cnt);
-		}
-	} else {
-		error = copyin(iov->iov_base, cp, cnt);
-	}
-	return (error);
-}
-
-int
-uiomoveco(void *cp, int n, struct uio *uio, int disposable)
-{
-	struct iovec *iov;
-	u_int cnt;
-	int error;
-
-	KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE,
-	    ("uiomoveco: mode"));
-	KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread,
-	    ("uiomoveco proc"));
-
-	while (n > 0 && uio->uio_resid) {
-		iov = uio->uio_iov;
-		cnt = iov->iov_len;
-		if (cnt == 0) {
-			uio->uio_iov++;
-			uio->uio_iovcnt--;
-			continue;
-		}
-		if (cnt > n)
-			cnt = n;
-
-		switch (uio->uio_segflg) {
-
-		case UIO_USERSPACE:
-			maybe_yield();
-			error = userspaceco(cp, cnt, uio, disposable);
-			if (error)
-				return (error);
-			break;
-
-		case UIO_SYSSPACE:
-			if (uio->uio_rw == UIO_READ)
-				bcopy(cp, iov->iov_base, cnt);
-			else
-				bcopy(iov->iov_base, cp, cnt);
-			break;
-		case UIO_NOCOPY:
-			break;
-		}
-		iov->iov_base = (char *)iov->iov_base + cnt;
-		iov->iov_len -= cnt;
-		uio->uio_resid -= cnt;
-		uio->uio_offset += cnt;
-		cp = (char *)cp + cnt;
-		n -= cnt;
-	}
-	return (0);
-}
-#endif /* SOCKET_RECV_PFLIP */
-
 /*
  * Give next character to user as result of read.
  */

Modified: head/sys/kern/uipc_socket.c
==============================================================================
--- head/sys/kern/uipc_socket.c	Mon Sep 16 06:15:15 2013	(r255607)
+++ head/sys/kern/uipc_socket.c	Mon Sep 16 06:25:54 2013	(r255608)
@@ -105,7 +105,6 @@ __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
-#include "opt_zero.h"
 #include "opt_compat.h"
 
 #include <sys/param.h>
@@ -221,21 +220,6 @@ static int numopensockets;
 SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD,
     &numopensockets, 0, "Number of open sockets");
 
-#if defined(SOCKET_SEND_COW) || defined(SOCKET_RECV_PFLIP)
-SYSCTL_NODE(_kern_ipc, OID_AUTO, zero_copy, CTLFLAG_RD, 0,
-    "Zero copy controls");
-#ifdef SOCKET_RECV_PFLIP
-int so_zero_copy_receive = 1;
-SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO, receive, CTLFLAG_RW,
-    &so_zero_copy_receive, 0, "Enable zero copy receive");
-#endif
-#ifdef SOCKET_SEND_COW
-int so_zero_copy_send = 1;
-SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO, send, CTLFLAG_RW,
-    &so_zero_copy_send, 0, "Enable zero copy send");
-#endif /* SOCKET_SEND_COW */
-#endif /* SOCKET_SEND_COW || SOCKET_RECV_PFLIP */
-
 /*
  * accept_mtx locks down per-socket fields relating to accept queues.  See
  * socketvar.h for an annotation of the protected fields of struct socket.
@@ -978,113 +962,6 @@ sodisconnect(struct socket *so)
 	return (error);
 }
 
-#ifdef SOCKET_SEND_COW
-struct so_zerocopy_stats{
-	int size_ok;
-	int align_ok;
-	int found_ifp;
-};
-struct so_zerocopy_stats so_zerocp_stats = {0,0,0};
-
-/*
- * sosend_copyin() is only used if zero copy sockets are enabled.  Otherwise
- * sosend_dgram() and sosend_generic() use m_uiotombuf().
- *
- * sosend_copyin() accepts a uio and prepares an mbuf chain holding part or
- * all of the data referenced by the uio.  If desired, it uses zero-copy.
- * *space will be updated to reflect data copied in.
- *
- * NB: If atomic I/O is requested, the caller must already have checked that
- * space can hold resid bytes.
- *
- * NB: In the event of an error, the caller may need to free the partial
- * chain pointed to by *mpp.  The contents of both *uio and *space may be
- * modified even in the case of an error.
- */
-static int
-sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space,
-    int flags)
-{
-	struct mbuf *m, **mp, *top;
-	long len;
-	ssize_t resid;
-	int error;
-	int cow_send;
-
-	*retmp = top = NULL;
-	mp = ⊤
-	len = 0;
-	resid = uio->uio_resid;
-	error = 0;
-	do {
-		cow_send = 0;
-		if (resid >= MINCLSIZE) {
-			if (top == NULL) {
-				m = m_gethdr(M_WAITOK, MT_DATA);
-				m->m_pkthdr.len = 0;
-				m->m_pkthdr.rcvif = NULL;
-			} else
-				m = m_get(M_WAITOK, MT_DATA);
-			if (so_zero_copy_send &&
-			    resid >= PAGE_SIZE &&
-			    *space >= PAGE_SIZE &&
-			    uio->uio_iov->iov_len >= PAGE_SIZE) {
-				so_zerocp_stats.size_ok++;
-				so_zerocp_stats.align_ok++;
-				cow_send = socow_setup(m, uio);
-				len = cow_send;
-			}
-			if (!cow_send) {
-				m_clget(m, M_WAITOK);
-				len = min(min(MCLBYTES, resid), *space);
-			}
-		} else {
-			if (top == NULL) {
-				m = m_gethdr(M_WAITOK, MT_DATA);
-				m->m_pkthdr.len = 0;
-				m->m_pkthdr.rcvif = NULL;
-
-				len = min(min(MHLEN, resid), *space);
-				/*
-				 * For datagram protocols, leave room
-				 * for protocol headers in first mbuf.
-				 */
-				if (atomic && m && len < MHLEN)
-					MH_ALIGN(m, len);
-			} else {
-				m = m_get(M_WAITOK, MT_DATA);
-				len = min(min(MLEN, resid), *space);
-			}
-		}
-		if (m == NULL) {
-			error = ENOBUFS;
-			goto out;
-		}
-
-		*space -= len;
-		if (cow_send)
-			error = 0;
-		else
-			error = uiomove(mtod(m, void *), (int)len, uio);
-		resid = uio->uio_resid;
-		m->m_len = len;
-		*mp = m;
-		top->m_pkthdr.len += len;
-		if (error)
-			goto out;
-		mp = &m->m_next;
-		if (resid <= 0) {
-			if (flags & MSG_EOR)
-				top->m_flags |= M_EOR;
-			break;
-		}
-	} while (*space > 0 && atomic);
-out:
-	*retmp = top;
-	return (error);
-}
-#endif /* SOCKET_SEND_COW */
-
 #define	SBLOCKWAIT(f)	(((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
 
 int
@@ -1094,9 +971,6 @@ sosend_dgram(struct socket *so, struct s
 	long space;
 	ssize_t resid;
 	int clen = 0, error, dontroute;
-#ifdef SOCKET_SEND_COW
-	int atomic = sosendallatonce(so) || top;
-#endif
 
 	KASSERT(so->so_type == SOCK_DGRAM, ("sosend_dgram: !SOCK_DGRAM"));
 	KASSERT(so->so_proto->pr_flags & PR_ATOMIC,
@@ -1179,11 +1053,6 @@ sosend_dgram(struct socket *so, struct s
 		if (flags & MSG_EOR)
 			top->m_flags |= M_EOR;
 	} else {
-#ifdef SOCKET_SEND_COW
-		error = sosend_copyin(uio, &top, atomic, &space, flags);
-		if (error)
-			goto out;
-#else
 		/*
 		 * Copy the data from userland into a mbuf chain.
 		 * If no data is to be copied in, a single empty mbuf
@@ -1196,7 +1065,6 @@ sosend_dgram(struct socket *so, struct s
 			goto out;
 		}
 		space -= resid - uio->uio_resid;
-#endif /* SOCKET_SEND_COW */
 		resid = uio->uio_resid;
 	}
 	KASSERT(resid == 0, ("sosend_dgram: resid != 0"));
@@ -1368,12 +1236,6 @@ restart:
 				if (flags & MSG_EOR)
 					top->m_flags |= M_EOR;
 			} else {
-#ifdef SOCKET_SEND_COW
-				error = sosend_copyin(uio, &top, atomic,
-				    &space, flags);
-				if (error != 0)
-					goto release;
-#else
 				/*
 				 * Copy the data from userland into a mbuf
 				 * chain.  If no data is to be copied in,
@@ -1388,7 +1250,6 @@ restart:
 					goto release;
 				}
 				space -= resid - uio->uio_resid;
-#endif /* SOCKET_SEND_COW */
 				resid = uio->uio_resid;
 			}
 			if (dontroute) {
@@ -1480,20 +1341,6 @@ soreceive_rcvoob(struct socket *so, stru
 	if (error)
 		goto bad;
 	do {
-#ifdef SOCKET_RECV_PFLIP
-		if (so_zero_copy_receive) {
-			int disposable;
-
-			if ((m->m_flags & M_EXT)
-			 && (m->m_ext.ext_type == EXT_DISPOSABLE))
-				disposable = 1;
-			else
-				disposable = 0;
-
-			error = uiomoveco(mtod(m, void *),
-			    min(uio->uio_resid, m->m_len), uio, disposable);
-		} else
-#endif /* SOCKET_RECV_PFLIP */
 		error = uiomove(mtod(m, void *),
 		    (int) min(uio->uio_resid, m->m_len), uio);
 		m = m_free(m);
@@ -1816,20 +1663,6 @@ dontblock:
 			SBLASTRECORDCHK(&so->so_rcv);
 			SBLASTMBUFCHK(&so->so_rcv);
 			SOCKBUF_UNLOCK(&so->so_rcv);
-#ifdef SOCKET_RECV_PFLIP
-			if (so_zero_copy_receive) {
-				int disposable;
-
-				if ((m->m_flags & M_EXT)
-				 && (m->m_ext.ext_type == EXT_DISPOSABLE))
-					disposable = 1;
-				else
-					disposable = 0;
-
-				error = uiomoveco(mtod(m, char *) + moff,
-				    (int)len, uio, disposable);
-			} else
-#endif /* SOCKET_RECV_PFLIP */
 			error = uiomove(mtod(m, char *) + moff, (int)len, uio);
 			SOCKBUF_LOCK(&so->so_rcv);
 			if (error) {

Modified: head/sys/modules/cxgb/cxgb/Makefile
==============================================================================
--- head/sys/modules/cxgb/cxgb/Makefile	Mon Sep 16 06:15:15 2013	(r255607)
+++ head/sys/modules/cxgb/cxgb/Makefile	Mon Sep 16 06:25:54 2013	(r255608)
@@ -10,7 +10,7 @@ SRCS=	cxgb_mc5.c cxgb_vsc8211.c cxgb_ael
 SRCS+=	cxgb_xgmac.c cxgb_vsc7323.c cxgb_t3_hw.c cxgb_main.c cxgb_aq100x.c
 SRCS+=  cxgb_sge.c cxgb_tn1010.c
 SRCS+=	device_if.h bus_if.h pci_if.h
-SRCS+=	opt_inet.h opt_inet6.h opt_zero.h opt_sched.h
+SRCS+=	opt_inet.h opt_inet6.h opt_sched.h
 SRCS+=	uipc_mvec.c
 
 CFLAGS+= -g -DDEFAULT_JUMBO -I${CXGB}

Modified: head/sys/modules/sfxge/Makefile
==============================================================================
--- head/sys/modules/sfxge/Makefile	Mon Sep 16 06:15:15 2013	(r255607)
+++ head/sys/modules/sfxge/Makefile	Mon Sep 16 06:25:54 2013	(r255608)
@@ -5,7 +5,7 @@ KMOD=	sfxge
 SFXGE= ${.CURDIR}/../../dev/sfxge
 
 SRCS=	device_if.h bus_if.h pci_if.h
-SRCS+=	opt_inet.h opt_zero.h opt_sched.h
+SRCS+=	opt_inet.h opt_sched.h
 
 .PATH: ${.CURDIR}/../../dev/sfxge
 SRCS+=	sfxge.c sfxge_dma.c sfxge_ev.c

Modified: head/sys/modules/ti/Makefile
==============================================================================
--- head/sys/modules/ti/Makefile	Mon Sep 16 06:15:15 2013	(r255607)
+++ head/sys/modules/ti/Makefile	Mon Sep 16 06:25:54 2013	(r255608)
@@ -3,6 +3,6 @@
 .PATH: ${.CURDIR}/../../dev/ti
 
 KMOD=	if_ti
-SRCS=	if_ti.c device_if.h bus_if.h pci_if.h opt_ti.h opt_zero.h
+SRCS=	if_ti.c device_if.h bus_if.h pci_if.h opt_ti.h
 
 .include <bsd.kmod.mk>

Modified: head/sys/sys/socketvar.h
==============================================================================
--- head/sys/sys/socketvar.h	Mon Sep 16 06:15:15 2013	(r255607)
+++ head/sys/sys/socketvar.h	Mon Sep 16 06:25:54 2013	(r255608)
@@ -325,7 +325,6 @@ int	soconnect(struct socket *so, struct 
 int	soconnectat(int fd, struct socket *so, struct sockaddr *nam,
 	    struct thread *td);
 int	soconnect2(struct socket *so1, struct socket *so2);
-int	socow_setup(struct mbuf *m0, struct uio *uio);
 int	socreate(int dom, struct socket **aso, int type, int proto,
 	    struct ucred *cred, struct thread *td);
 int	sodisconnect(struct socket *so);

Modified: head/sys/sys/uio.h
==============================================================================
--- head/sys/sys/uio.h	Mon Sep 16 06:15:15 2013	(r255607)
+++ head/sys/sys/uio.h	Mon Sep 16 06:25:54 2013	(r255608)
@@ -104,7 +104,6 @@ int	uiomove_fromphys(struct vm_page *ma[
 	    struct uio *uio);
 int	uiomove_nofault(void *cp, int n, struct uio *uio);
 int	uiomove_object(struct vm_object *obj, off_t obj_size, struct uio *uio);
-int	uiomoveco(void *cp, int n, struct uio *uio, int disposable);
 
 #else /* !_KERNEL */
 

Modified: head/sys/vm/vm_fault.c
==============================================================================
--- head/sys/vm/vm_fault.c	Mon Sep 16 06:15:15 2013	(r255607)
+++ head/sys/vm/vm_fault.c	Mon Sep 16 06:25:54 2013	(r255608)
@@ -333,24 +333,6 @@ RetryFault:;
 		 */
 		fs.m = vm_page_lookup(fs.object, fs.pindex);
 		if (fs.m != NULL) {
-			/* 
-			 * check for page-based copy on write.
-			 * We check fs.object == fs.first_object so
-			 * as to ensure the legacy COW mechanism is
-			 * used when the page in question is part of
-			 * a shadow object.  Otherwise, vm_page_cowfault()
-			 * removes the page from the backing object, 
-			 * which is not what we want.
-			 */
-			vm_page_lock(fs.m);
-			if ((fs.m->cow) && 
-			    (fault_type & VM_PROT_WRITE) &&
-			    (fs.object == fs.first_object)) {
-				vm_page_cowfault(fs.m);
-				unlock_and_deallocate(&fs);
-				goto RetryFault;
-			}
-
 			/*
 			 * Wait/Retry if the page is busy.  We have to do this
 			 * if the page is either exclusive or shared busy
@@ -374,7 +356,6 @@ RetryFault:;
 				 * likely to reclaim it. 
 				 */
 				vm_page_aflag_set(fs.m, PGA_REFERENCED);
-				vm_page_unlock(fs.m);
 				if (fs.object != fs.first_object) {
 					if (!VM_OBJECT_TRYWLOCK(
 					    fs.first_object)) {
@@ -400,6 +381,7 @@ RetryFault:;
 				vm_object_deallocate(fs.first_object);
 				goto RetryFault;
 			}
+			vm_page_lock(fs.m);
 			vm_page_remque(fs.m);
 			vm_page_unlock(fs.m);
 

Modified: head/sys/vm/vm_page.c
==============================================================================
--- head/sys/vm/vm_page.c	Mon Sep 16 06:15:15 2013	(r255607)
+++ head/sys/vm/vm_page.c	Mon Sep 16 06:25:54 2013	(r255608)
@@ -674,8 +674,8 @@ vm_page_unhold(vm_page_t mem)
 {
 
 	vm_page_lock_assert(mem, MA_OWNED);
+	KASSERT(mem->hold_count >= 1, ("vm_page_unhold: hold count < 0!!!"));
 	--mem->hold_count;
-	KASSERT(mem->hold_count >= 0, ("vm_page_unhold: hold count < 0!!!"));
 	if (mem->hold_count == 0 && (mem->flags & PG_UNHOLDFREE) != 0)
 		vm_page_free_toq(mem);
 }
@@ -3108,108 +3108,6 @@ vm_page_lock_assert_KBI(vm_page_t m, int
 }
 #endif
 
-int so_zerocp_fullpage = 0;
-
-/*
- *	Replace the given page with a copy.  The copied page assumes
- *	the portion of the given page's "wire_count" that is not the
- *	responsibility of this copy-on-write mechanism.
- *
- *	The object containing the given page must have a non-zero
- *	paging-in-progress count and be locked.
- */
-void
-vm_page_cowfault(vm_page_t m)
-{
-	vm_page_t mnew;
-	vm_object_t object;
-	vm_pindex_t pindex;
-
-	vm_page_lock_assert(m, MA_OWNED);
-	object = m->object;
-	VM_OBJECT_ASSERT_WLOCKED(object);
-	KASSERT(object->paging_in_progress != 0,
-	    ("vm_page_cowfault: object %p's paging-in-progress count is zero.",
-	    object)); 
-	pindex = m->pindex;
-
- retry_alloc:
-	mnew = vm_page_alloc(NULL, pindex, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ);
-	if (mnew == NULL) {
-		vm_page_unlock(m);
-		VM_OBJECT_WUNLOCK(object);
-		VM_WAIT;
-		VM_OBJECT_WLOCK(object);
-		if (m == vm_page_lookup(object, pindex)) {
-			vm_page_lock(m);
-			goto retry_alloc;
-		} else {
-			/*
-			 * Page disappeared during the wait.
-			 */
-			return;
-		}
-	}
-
-	if (m->cow == 0) {
-		/* 
-		 * check to see if we raced with an xmit complete when 
-		 * waiting to allocate a page.  If so, put things back 
-		 * the way they were 
-		 */
-		vm_page_unlock(m);
-		vm_page_lock(mnew);
-		vm_page_free(mnew);
-		vm_page_unlock(mnew);
-	} else { /* clear COW & copy page */
-		pmap_remove_all(m);
-		mnew->object = object;
-		if (object->memattr != VM_MEMATTR_DEFAULT &&
-		    (object->flags & OBJ_FICTITIOUS) == 0)
-			pmap_page_set_memattr(mnew, object->memattr);
-		if (vm_page_replace(mnew, object, pindex) != m)
-			panic("vm_page_cowfault: invalid page replacement");
-		if (!so_zerocp_fullpage)
-			pmap_copy_page(m, mnew);
-		mnew->valid = VM_PAGE_BITS_ALL;
-		vm_page_dirty(mnew);
-		mnew->wire_count = m->wire_count - m->cow;
-		m->wire_count = m->cow;
-		vm_page_unlock(m);
-	}
-}
-
-void 
-vm_page_cowclear(vm_page_t m)
-{
-
-	vm_page_lock_assert(m, MA_OWNED);
-	if (m->cow) {
-		m->cow--;
-		/* 
-		 * let vm_fault add back write permission  lazily
-		 */
-	} 
-	/*
-	 *  sf_buf_free() will free the page, so we needn't do it here
-	 */ 
-}
-
-int
-vm_page_cowsetup(vm_page_t m)
-{
-
-	vm_page_lock_assert(m, MA_OWNED);
-	if ((m->flags & PG_FICTITIOUS) != 0 ||
-	    (m->oflags & VPO_UNMANAGED) != 0 ||
-	    m->cow == USHRT_MAX - 1 || !VM_OBJECT_TRYWLOCK(m->object))
-		return (EBUSY);
-	m->cow++;
-	pmap_remove_write(m);
-	VM_OBJECT_WUNLOCK(m->object);
-	return (0);
-}
-
 #ifdef INVARIANTS
 void
 vm_page_object_lock_assert(vm_page_t m)

Modified: head/sys/vm/vm_page.h
==============================================================================
--- head/sys/vm/vm_page.h	Mon Sep 16 06:15:15 2013	(r255607)
+++ head/sys/vm/vm_page.h	Mon Sep 16 06:25:54 2013	(r255608)
@@ -142,23 +142,21 @@ struct vm_page {
 	vm_pindex_t pindex;		/* offset into object (O,P) */
 	vm_paddr_t phys_addr;		/* physical address of page */
 	struct md_page md;		/* machine dependant stuff */
+	u_int wire_count;		/* wired down maps refs (P) */
+	volatile u_int busy_lock;	/* busy owners lock */
+	uint16_t hold_count;		/* page hold count (P) */
+	uint16_t flags;			/* page PG_* flags (P) */
+	uint8_t aflags;			/* access is atomic */
+	uint8_t oflags;			/* page VPO_* flags (O) */
 	uint8_t	queue;			/* page queue index (P,Q) */
 	int8_t segind;
-	short hold_count;		/* page hold count (P) */
 	uint8_t	order;			/* index of the buddy queue */
 	uint8_t pool;
-	u_short cow;			/* page cow mapping count (P) */
-	u_int wire_count;		/* wired down maps refs (P) */
-	uint8_t aflags;			/* access is atomic */
-	uint8_t oflags;			/* page VPO_* flags (O) */
-	uint16_t flags;			/* page PG_* flags (P) */
 	u_char	act_count;		/* page usage count (P) */
-	u_char __pad0;			/* unused padding */
 	/* NOTE that these must support one bit per DEV_BSIZE in a page */
 	/* so, on normal X86 kernels, they must be at least 8 bits wide */
 	vm_page_bits_t valid;		/* map of valid DEV_BSIZE chunks (O) */
 	vm_page_bits_t dirty;		/* map of dirty DEV_BSIZE chunks (M) */
-	volatile u_int busy_lock;	/* busy owners lock */
 };
 
 /*
@@ -482,9 +480,6 @@ vm_page_bits_t vm_page_bits(int base, in
 void vm_page_zero_invalid(vm_page_t m, boolean_t setvalid);
 void vm_page_free_toq(vm_page_t m);
 void vm_page_zero_idle_wakeup(void);
-void vm_page_cowfault (vm_page_t);
-int vm_page_cowsetup(vm_page_t);
-void vm_page_cowclear (vm_page_t);
 
 void vm_page_dirty_KBI(vm_page_t m);
 void vm_page_lock_KBI(vm_page_t m, const char *file, int line);


More information about the svn-src-all mailing list