svn commit: r216699 - in head/sys: dev/cxgb/ulp/tom dev/drm kern
net vm
Alan Cox
alc at FreeBSD.org
Sat Dec 25 21:26:56 UTC 2010
Author: alc
Date: Sat Dec 25 21:26:56 2010
New Revision: 216699
URL: http://svn.freebsd.org/changeset/base/216699
Log:
Introduce and use a new VM interface for temporarily pinning pages. This
new interface replaces the combined use of vm_fault_quick() and
pmap_extract_and_hold() throughout the kernel.
In collaboration with: kib@
Deleted:
head/sys/dev/cxgb/ulp/tom/cxgb_vm.c
head/sys/dev/cxgb/ulp/tom/cxgb_vm.h
Modified:
head/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c
head/sys/dev/cxgb/ulp/tom/cxgb_ddp.c
head/sys/dev/drm/via_dmablit.c
head/sys/kern/sys_pipe.c
head/sys/kern/uipc_cow.c
head/sys/kern/vfs_bio.c
head/sys/net/bpf_zerocopy.c
head/sys/vm/vm_extern.h
head/sys/vm/vm_fault.c
Modified: head/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c
==============================================================================
--- head/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c Sat Dec 25 17:35:30 2010 (r216698)
+++ head/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c Sat Dec 25 21:26:56 2010 (r216699)
@@ -90,7 +90,6 @@ __FBSDID("$FreeBSD$");
#include <ulp/tom/cxgb_t3_ddp.h>
#include <ulp/tom/cxgb_toepcb.h>
#include <ulp/tom/cxgb_tcp.h>
-#include <ulp/tom/cxgb_vm.h>
static int (*pru_sosend)(struct socket *so, struct sockaddr *addr,
@@ -218,8 +217,9 @@ cxgb_hold_iovec_pages(struct uio *uio, v
count = min(count, npages);
- err = vm_fault_hold_user_pages(map,
- (vm_offset_t)iov->iov_base, mp, count, prot);
+ /* The following return value is not used. XXX */
+ err = vm_fault_quick_hold_pages(map,
+ (vm_offset_t)iov->iov_base, iov->iov_len, prot, mp, count);
mp += count;
totcount += count;
curbytes = iov->iov_len;
@@ -503,7 +503,7 @@ cxgb_sosend(struct socket *so, struct so
* - the number of bytes to be transferred exceeds the threshold
* - the number of bytes currently in flight won't exceed the in-flight
* threshold XXX TODO
- * - vm_fault_hold_user_pages succeeds
+ * - vm_fault_quick_hold_pages succeeds
* - blocking socket XXX for now
*
*/
@@ -970,7 +970,7 @@ cxgb_soreceive(struct socket *so, struct
* - the number of bytes to be transferred exceeds the threshold
* - the number of bytes currently in flight won't exceed the in-flight
* threshold XXX TODO
- * - vm_fault_hold_user_pages succeeds
+ * - vm_fault_quick_hold_pages succeeds
* - blocking socket XXX for now
* - iovcnt is 1
*
Modified: head/sys/dev/cxgb/ulp/tom/cxgb_ddp.c
==============================================================================
--- head/sys/dev/cxgb/ulp/tom/cxgb_ddp.c Sat Dec 25 17:35:30 2010 (r216698)
+++ head/sys/dev/cxgb/ulp/tom/cxgb_ddp.c Sat Dec 25 21:26:56 2010 (r216699)
@@ -90,7 +90,6 @@ __FBSDID("$FreeBSD$");
#include <ulp/tom/cxgb_t3_ddp.h>
#include <ulp/tom/cxgb_toepcb.h>
#include <ulp/tom/cxgb_tcp.h>
-#include <ulp/tom/cxgb_vm.h>
#define MAX_SCHEDULE_TIMEOUT 300
@@ -130,14 +129,6 @@ t3_pin_pages(bus_dma_tag_t tag, bus_dmam
struct ddp_gather_list *p;
vm_map_t map;
- /*
- * XXX need x86 agnostic check
- */
- if (addr + len > VM_MAXUSER_ADDRESS)
- return (EFAULT);
-
-
-
pg_off = addr & PAGE_MASK;
npages = (pg_off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
p = malloc(sizeof(struct ddp_gather_list) + npages * sizeof(vm_page_t *),
@@ -146,10 +137,11 @@ t3_pin_pages(bus_dma_tag_t tag, bus_dmam
return (ENOMEM);
map = &curthread->td_proc->p_vmspace->vm_map;
- err = vm_fault_hold_user_pages(map, addr, p->dgl_pages, npages,
- VM_PROT_READ | VM_PROT_WRITE);
- if (err)
+ if (vm_fault_quick_hold_pages(map, addr, len, VM_PROT_READ |
+ VM_PROT_WRITE, p->dgl_pages, npages) < 0) {
+ err = EFAULT;
goto free_gl;
+ }
if (gl && gl->dgl_offset == pg_off && gl->dgl_nelem >= npages &&
gl->dgl_length >= len) {
Modified: head/sys/dev/drm/via_dmablit.c
==============================================================================
--- head/sys/dev/drm/via_dmablit.c Sat Dec 25 17:35:30 2010 (r216698)
+++ head/sys/dev/drm/via_dmablit.c Sat Dec 25 21:26:56 2010 (r216699)
@@ -177,11 +177,10 @@ via_free_sg_info(drm_via_sg_info_t *vsg)
free(vsg->desc_pages, DRM_MEM_DRIVER);
case dr_via_pages_locked:
for (i=0; i < vsg->num_pages; ++i) {
- if ( NULL != (page = vsg->pages[i])) {
- vm_page_lock(page);
- vm_page_unwire(page, 0);
- vm_page_unlock(page);
- }
+ page = vsg->pages[i];
+ vm_page_lock(page);
+ vm_page_unwire(page, 0);
+ vm_page_unlock(page);
}
case dr_via_pages_alloc:
free(vsg->pages, DRM_MEM_DRIVER);
@@ -224,41 +223,31 @@ via_lock_all_dma_pages(drm_via_sg_info_t
{
unsigned long first_pfn = VIA_PFN(xfer->mem_addr);
vm_page_t m;
- vm_map_t map;
int i;
- map = &curproc->p_vmspace->vm_map;
-
vsg->num_pages = VIA_PFN(xfer->mem_addr +
(xfer->num_lines * xfer->mem_stride -1)) - first_pfn + 1;
- /* Make sure that the user has access to these pages */
- for(i = 0; i < vsg->num_pages; i++) {
- if (vm_fault_quick((caddr_t)xfer->mem_addr + IDX_TO_OFF(i),
- VM_PROT_RW) < 0)
- return (-EACCES);
- }
-
if (NULL == (vsg->pages = malloc(sizeof(vm_page_t) * vsg->num_pages,
- DRM_MEM_DRIVER, M_NOWAIT | M_ZERO)))
+ DRM_MEM_DRIVER, M_NOWAIT)))
return -ENOMEM;
- for(i = 0; i < vsg->num_pages; i++) {
- m = pmap_extract_and_hold(map->pmap,
- (vm_offset_t)xfer->mem_addr + IDX_TO_OFF(i), VM_PROT_RW);
- if (m == NULL)
- break;
+ vsg->state = dr_via_pages_alloc;
+
+ if (vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
+ (vm_offset_t)xfer->mem_addr, vsg->num_pages * PAGE_SIZE,
+ VM_PROT_READ | VM_PROT_WRITE, vsg->pages, vsg->num_pages) < 0)
+ return -EACCES;
+
+ for (i = 0; i < vsg->num_pages; i++) {
+ m = vsg->pages[i];
vm_page_lock(m);
vm_page_wire(m);
vm_page_unhold(m);
vm_page_unlock(m);
- vsg->pages[i] = m;
}
vsg->state = dr_via_pages_locked;
- if (i != vsg->num_pages)
- return -EINVAL;
-
DRM_DEBUG("DMA pages locked\n");
return 0;
Modified: head/sys/kern/sys_pipe.c
==============================================================================
--- head/sys/kern/sys_pipe.c Sat Dec 25 17:35:30 2010 (r216698)
+++ head/sys/kern/sys_pipe.c Sat Dec 25 21:26:56 2010 (r216699)
@@ -747,10 +747,8 @@ pipe_build_write_buffer(wpipe, uio)
struct pipe *wpipe;
struct uio *uio;
{
- pmap_t pmap;
u_int size;
int i;
- vm_offset_t addr, endaddr;
PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED);
KASSERT(wpipe->pipe_state & PIPE_DIRECTW,
@@ -760,25 +758,10 @@ pipe_build_write_buffer(wpipe, uio)
if (size > wpipe->pipe_buffer.size)
size = wpipe->pipe_buffer.size;
- pmap = vmspace_pmap(curproc->p_vmspace);
- endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size);
- addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base);
- if (endaddr < addr)
+ if ((i = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
+ (vm_offset_t)uio->uio_iov->iov_base, size, VM_PROT_READ,
+ wpipe->pipe_map.ms, PIPENPAGES)) < 0)
return (EFAULT);
- for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) {
- /*
- * vm_fault_quick() can sleep.
- */
- race:
- if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0) {
- vm_page_unhold_pages(wpipe->pipe_map.ms, i);
- return (EFAULT);
- }
- wpipe->pipe_map.ms[i] = pmap_extract_and_hold(pmap, addr,
- VM_PROT_READ);
- if (wpipe->pipe_map.ms[i] == NULL)
- goto race;
- }
/*
* set up the control block
Modified: head/sys/kern/uipc_cow.c
==============================================================================
--- head/sys/kern/uipc_cow.c Sat Dec 25 17:35:30 2010 (r216698)
+++ head/sys/kern/uipc_cow.c Sat Dec 25 21:26:56 2010 (r216699)
@@ -103,24 +103,20 @@ socow_setup(struct mbuf *m0, struct uio
struct vmspace *vmspace;
struct vm_map *map;
vm_offset_t offset, uva;
+ vm_size_t len;
socow_stats.attempted++;
vmspace = curproc->p_vmspace;
map = &vmspace->vm_map;
uva = (vm_offset_t) uio->uio_iov->iov_base;
offset = uva & PAGE_MASK;
+ len = PAGE_SIZE - offset;
/*
* Verify that access to the given address is allowed from user-space.
*/
- if (vm_fault_quick((caddr_t)uva, VM_PROT_READ) < 0)
- return (0);
-
- /*
- * verify page is mapped & not already wired for i/o
- */
- pp = pmap_extract_and_hold(map->pmap, uva, VM_PROT_READ);
- if (pp == NULL) {
+ if (vm_fault_quick_hold_pages(map, uva, len, &pp, 1, VM_PROT_READ) <
+ 0) {
socow_stats.fail_not_mapped++;
return(0);
}
@@ -165,7 +161,7 @@ socow_setup(struct mbuf *m0, struct uio
*/
MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, socow_iodone,
(void*)sf_buf_kva(sf), sf, M_RDONLY, EXT_SFBUF);
- m0->m_len = PAGE_SIZE - offset;
+ m0->m_len = len;
m0->m_data = (caddr_t)sf_buf_kva(sf) + offset;
socow_stats.success++;
Modified: head/sys/kern/vfs_bio.c
==============================================================================
--- head/sys/kern/vfs_bio.c Sat Dec 25 17:35:30 2010 (r216698)
+++ head/sys/kern/vfs_bio.c Sat Dec 25 21:26:56 2010 (r216699)
@@ -3855,46 +3855,19 @@ vm_hold_free_pages(struct buf *bp, int n
int
vmapbuf(struct buf *bp)
{
- caddr_t addr, kva;
+ caddr_t kva;
vm_prot_t prot;
- int pidx, i;
- struct vm_page *m;
- struct pmap *pmap = &curproc->p_vmspace->vm_pmap;
+ int pidx;
if (bp->b_bufsize < 0)
return (-1);
prot = VM_PROT_READ;
if (bp->b_iocmd == BIO_READ)
prot |= VM_PROT_WRITE; /* Less backwards than it looks */
- for (addr = (caddr_t)trunc_page((vm_offset_t)bp->b_data), pidx = 0;
- addr < bp->b_data + bp->b_bufsize;
- addr += PAGE_SIZE, pidx++) {
- /*
- * Do the vm_fault if needed; do the copy-on-write thing
- * when reading stuff off device into memory.
- *
- * NOTE! Must use pmap_extract() because addr may be in
- * the userland address space, and kextract is only guarenteed
- * to work for the kernland address space (see: sparc64 port).
- */
-retry:
- if (vm_fault_quick(addr >= bp->b_data ? addr : bp->b_data,
- prot) < 0) {
- for (i = 0; i < pidx; ++i) {
- vm_page_lock(bp->b_pages[i]);
- vm_page_unhold(bp->b_pages[i]);
- vm_page_unlock(bp->b_pages[i]);
- bp->b_pages[i] = NULL;
- }
- return(-1);
- }
- m = pmap_extract_and_hold(pmap, (vm_offset_t)addr, prot);
- if (m == NULL)
- goto retry;
- bp->b_pages[pidx] = m;
- }
- if (pidx > btoc(MAXPHYS))
- panic("vmapbuf: mapped more than MAXPHYS");
+ if ((pidx = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
+ (vm_offset_t)bp->b_data, bp->b_bufsize, prot, bp->b_pages,
+ btoc(MAXPHYS))) < 0)
+ return (-1);
pmap_qenter((vm_offset_t)bp->b_saveaddr, bp->b_pages, pidx);
kva = bp->b_saveaddr;
Modified: head/sys/net/bpf_zerocopy.c
==============================================================================
--- head/sys/net/bpf_zerocopy.c Sat Dec 25 17:35:30 2010 (r216698)
+++ head/sys/net/bpf_zerocopy.c Sat Dec 25 21:26:56 2010 (r216699)
@@ -161,12 +161,8 @@ zbuf_sfbuf_get(struct vm_map *map, vm_of
struct sf_buf *sf;
vm_page_t pp;
- if (vm_fault_quick((caddr_t) uaddr, VM_PROT_READ | VM_PROT_WRITE) <
- 0)
- return (NULL);
- pp = pmap_extract_and_hold(map->pmap, uaddr, VM_PROT_READ |
- VM_PROT_WRITE);
- if (pp == NULL)
+ if (vm_fault_quick_hold_pages(map, uaddr, PAGE_SIZE, VM_PROT_READ |
+ VM_PROT_WRITE, &pp, 1) < 0)
return (NULL);
vm_page_lock(pp);
vm_page_wire(pp);
Modified: head/sys/vm/vm_extern.h
==============================================================================
--- head/sys/vm/vm_extern.h Sat Dec 25 17:35:30 2010 (r216698)
+++ head/sys/vm/vm_extern.h Sat Dec 25 21:26:56 2010 (r216699)
@@ -63,6 +63,8 @@ void vm_fault_copy_entry(vm_map_t, vm_ma
vm_ooffset_t *);
int vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
int fault_flags, vm_page_t *m_hold);
+int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
+ vm_prot_t prot, vm_page_t *ma, int max_count);
void vm_fault_unwire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t);
int vm_fault_wire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t);
int vm_forkproc(struct thread *, struct proc *, struct thread *, struct vmspace *, int);
Modified: head/sys/vm/vm_fault.c
==============================================================================
--- head/sys/vm/vm_fault.c Sat Dec 25 17:35:30 2010 (r216698)
+++ head/sys/vm/vm_fault.c Sat Dec 25 21:26:56 2010 (r216699)
@@ -1045,6 +1045,81 @@ vm_fault_prefault(pmap_t pmap, vm_offset
}
/*
+ * Hold each of the physical pages that are mapped by the specified range of
+ * virtual addresses, ["addr", "addr" + "len"), if those mappings are valid
+ * and allow the specified types of access, "prot". If all of the implied
+ * pages are successfully held, then the number of held pages is returned
+ * together with pointers to those pages in the array "ma". However, if any
+ * of the pages cannot be held, -1 is returned.
+ */
+int
+vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
+ vm_prot_t prot, vm_page_t *ma, int max_count)
+{
+ vm_offset_t end, va;
+ vm_page_t *mp;
+ int count;
+ boolean_t pmap_failed;
+
+ end = round_page(addr + len);
+ addr = trunc_page(addr);
+
+ /*
+ * Check for illegal addresses.
+ */
+ if (addr < vm_map_min(map) || addr > end || end > vm_map_max(map))
+ return (-1);
+
+ count = howmany(end - addr, PAGE_SIZE);
+ if (count > max_count)
+ panic("vm_fault_quick_hold_pages: count > max_count");
+
+ /*
+ * Most likely, the physical pages are resident in the pmap, so it is
+ * faster to try pmap_extract_and_hold() first.
+ */
+ pmap_failed = FALSE;
+ for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) {
+ *mp = pmap_extract_and_hold(map->pmap, va, prot);
+ if (*mp == NULL)
+ pmap_failed = TRUE;
+ else if ((prot & VM_PROT_WRITE) != 0 &&
+ (*ma)->dirty != VM_PAGE_BITS_ALL) {
+ /*
+ * Explicitly dirty the physical page. Otherwise, the
+ * caller's changes may go unnoticed because they are
+ * performed through an unmanaged mapping or by a DMA
+ * operation.
+ */
+ vm_page_lock_queues();
+ vm_page_dirty(*mp);
+ vm_page_unlock_queues();
+ }
+ }
+ if (pmap_failed) {
+ /*
+ * One or more pages could not be held by the pmap. Either no
+ * page was mapped at the specified virtual address or that
+ * mapping had insufficient permissions. Attempt to fault in
+ * and hold these pages.
+ */
+ for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE)
+ if (*mp == NULL && vm_fault_hold(map, va, prot,
+ VM_FAULT_NORMAL, mp) != KERN_SUCCESS)
+ goto error;
+ }
+ return (count);
+error:
+ for (mp = ma; mp < ma + count; mp++)
+ if (*mp != NULL) {
+ vm_page_lock(*mp);
+ vm_page_unhold(*mp);
+ vm_page_unlock(*mp);
+ }
+ return (-1);
+}
+
+/*
* vm_fault_quick:
*
* Ensure that the requested virtual address, which may be in userland,
More information about the svn-src-all
mailing list