svn commit: r248508 - in head/sys: amd64/amd64 arm/arm geom i386/i386 i386/xen ia64/ia64 kern mips/mips powerpc/aim powerpc/powerpc sparc64/sparc64 sys vm

Tue Mar 19 14:13:15 UTC 2013

Author: kib
Date: Tue Mar 19 14:13:12 2013
New Revision: 248508
URL: http://svnweb.freebsd.org/changeset/base/248508

Log:
  Implement the concept of the unmapped VMIO buffers, i.e. buffers which
  do not map the b_pages pages into buffer_map KVA.  The use of the
  unmapped buffers eliminate the need to perform TLB shootdown for
  mapping on the buffer creation and reuse, greatly reducing the amount
  of IPIs for shootdown on big-SMP machines and eliminating up to 25-30%
  of the system time on i/o intensive workloads.
  
  The unmapped buffer should be explicitely requested by the GB_UNMAPPED
  flag by the consumer.  For unmapped buffer, no KVA reservation is
  performed at all. The consumer might request unmapped buffer which
  does have a KVA reserve, to manually map it without recursing into
  buffer cache and blocking, with the GB_KVAALLOC flag.
  
  When the mapped buffer is requested and unmapped buffer already
  exists, the cache performs an upgrade, possibly reusing the KVA
  reservation.
  
  Unmapped buffer is translated into unmapped bio in g_vfs_strategy().
  Unmapped bio carry a pointer to the vm_page_t array, offset and length
  instead of the data pointer.  The provider which processes the bio
  should explicitely specify a readiness to accept unmapped bio,
  otherwise g_down geom thread performs the transient upgrade of the bio
  request by mapping the pages into the new bio_transient_map KVA
  submap.
  
  The bio_transient_map submap claims up to 10% of the buffer map, and
  the total buffer_map + bio_transient_map KVA usage stays the
  same. Still, it could be manually tuned by kern.bio_transient_maxcnt
  tunable, in the units of the transient mappings.  Eventually, the
  bio_transient_map could be removed after all geom classes and drivers
  can accept unmapped i/o requests.
  
  Unmapped support can be turned off by the vfs.unmapped_buf_allowed
  tunable, disabling which makes the buffer (or cluster) creation
  requests to ignore GB_UNMAPPED and GB_KVAALLOC flags.  Unmapped
  buffers are only enabled by default on the architectures where
  pmap_copy_page() was implemented and tested.
  
  In the rework, filesystem metadata is not the subject to maxbufspace
  limit anymore. Since the metadata buffers are always mapped, the
  buffers still have to fit into the buffer map, which provides a
  reasonable (but practically unreachable) upper bound on it. The
  non-metadata buffer allocations, both mapped and unmapped, is
  accounted against maxbufspace, as before. Effectively, this means that
  the maxbufspace is forced on mapped and unmapped buffers separately.
  The pre-patch bufspace limiting code did not worked, because
  buffer_map fragmentation does not allow the limit to be reached.
  
  By Jeff Roberson request, the getnewbuf() function was split into
  smaller single-purpose functions.
  
  Sponsored by:	The FreeBSD Foundation
  Discussed with:	jeff (previous version)
  Tested by:	pho, scottl (previous version), jhb, bf
  MFC after:	2 weeks

Modified:
  head/sys/amd64/amd64/pmap.c
  head/sys/arm/arm/pmap-v6.c
  head/sys/arm/arm/pmap.c
  head/sys/geom/geom.h
  head/sys/geom/geom_io.c
  head/sys/geom/geom_vfs.c
  head/sys/i386/i386/pmap.c
  head/sys/i386/xen/pmap.c
  head/sys/ia64/ia64/pmap.c
  head/sys/kern/subr_bus_dma.c
  head/sys/kern/subr_param.c
  head/sys/kern/vfs_bio.c
  head/sys/kern/vfs_cluster.c
  head/sys/mips/mips/pmap.c
  head/sys/powerpc/aim/mmu_oea64.c
  head/sys/powerpc/powerpc/pmap_dispatch.c
  head/sys/sparc64/sparc64/pmap.c
  head/sys/sys/bio.h
  head/sys/sys/buf.h
  head/sys/sys/systm.h
  head/sys/vm/vm.h
  head/sys/vm/vm_init.c
  head/sys/vm/vm_kern.c

Modified: head/sys/amd64/amd64/pmap.c
==============================================================================

--- head/sys/amd64/amd64/pmap.c	Tue Mar 19 13:43:55 2013	(r248507)
+++ head/sys/amd64/amd64/pmap.c	Tue Mar 19 14:13:12 2013	(r248508)
@@ -4235,6 +4235,8 @@ pmap_copy_page(vm_page_t msrc, vm_page_t
 	pagecopy((void *)src, (void *)dst);
 }
 
+int unmapped_buf_allowed = 1;
+
 void
 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
     vm_offset_t b_offset, int xfersize)

Modified: head/sys/arm/arm/pmap-v6.c
==============================================================================
--- head/sys/arm/arm/pmap-v6.c	Tue Mar 19 13:43:55 2013	(r248507)
+++ head/sys/arm/arm/pmap-v6.c	Tue Mar 19 14:13:12 2013	(r248508)
@@ -3312,6 +3312,8 @@ pmap_copy_page_generic(vm_paddr_t src, v
 	mtx_unlock(&cmtx);
 }
 
+int unmapped_buf_allowed = 1;
+
 void
 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
     vm_offset_t b_offset, int xfersize)

Modified: head/sys/arm/arm/pmap.c
==============================================================================
--- head/sys/arm/arm/pmap.c	Tue Mar 19 13:43:55 2013	(r248507)
+++ head/sys/arm/arm/pmap.c	Tue Mar 19 14:13:12 2013	(r248508)
@@ -4428,6 +4428,8 @@ pmap_copy_page(vm_page_t src, vm_page_t 
 #endif
 }
 
+int unmapped_buf_allowed = 1;
+
 void
 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
     vm_offset_t b_offset, int xfersize)

Modified: head/sys/geom/geom.h
==============================================================================
--- head/sys/geom/geom.h	Tue Mar 19 13:43:55 2013	(r248507)
+++ head/sys/geom/geom.h	Tue Mar 19 14:13:12 2013	(r248508)
@@ -205,6 +205,7 @@ struct g_provider {
 	u_int			flags;
 #define G_PF_WITHER		0x2
 #define G_PF_ORPHAN		0x4
+#define	G_PF_ACCEPT_UNMAPPED	0x8
 
 	/* Two fields for the implementing class to use */
 	void			*private;

Modified: head/sys/geom/geom_io.c
==============================================================================
--- head/sys/geom/geom_io.c	Tue Mar 19 13:43:55 2013	(r248507)
+++ head/sys/geom/geom_io.c	Tue Mar 19 14:13:12 2013	(r248508)
@@ -1,6 +1,7 @@
 /*-
  * Copyright (c) 2002 Poul-Henning Kamp
  * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * Copyright (c) 2013 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
@@ -8,6 +9,9 @@
  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
  * DARPA CHATS research program.
  *
+ * Portions of this software were developed by Konstantin Belousov
+ * under sponsorship from the FreeBSD Foundation.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -44,6 +48,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/ktr.h>
 #include <sys/proc.h>
 #include <sys/stack.h>
+#include <sys/sysctl.h>
 
 #include <sys/errno.h>
 #include <geom/geom.h>
@@ -51,6 +56,13 @@ __FBSDID("$FreeBSD$");
 #include <sys/devicestat.h>
 
 #include <vm/uma.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_object.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_map.h>
 
 static struct g_bioq g_bio_run_down;
 static struct g_bioq g_bio_run_up;
@@ -180,12 +192,17 @@ g_clone_bio(struct bio *bp)
 		/*
 		 *  BIO_ORDERED flag may be used by disk drivers to enforce
 		 *  ordering restrictions, so this flag needs to be cloned.
+		 *  BIO_UNMAPPED should be inherited, to properly indicate
+		 *  which way the buffer is passed.
 		 *  Other bio flags are not suitable for cloning.
 		 */
-		bp2->bio_flags = bp->bio_flags & BIO_ORDERED;
+		bp2->bio_flags = bp->bio_flags & (BIO_ORDERED | BIO_UNMAPPED);
 		bp2->bio_length = bp->bio_length;
 		bp2->bio_offset = bp->bio_offset;
 		bp2->bio_data = bp->bio_data;
+		bp2->bio_ma = bp->bio_ma;
+		bp2->bio_ma_n = bp->bio_ma_n;
+		bp2->bio_ma_offset = bp->bio_ma_offset;
 		bp2->bio_attribute = bp->bio_attribute;
 		/* Inherit classification info from the parent */
 		bp2->bio_classifier1 = bp->bio_classifier1;
@@ -210,11 +227,15 @@ g_duplicate_bio(struct bio *bp)
 	struct bio *bp2;
 
 	bp2 = uma_zalloc(biozone, M_WAITOK | M_ZERO);
+	bp2->bio_flags = bp->bio_flags & BIO_UNMAPPED;
 	bp2->bio_parent = bp;
 	bp2->bio_cmd = bp->bio_cmd;
 	bp2->bio_length = bp->bio_length;
 	bp2->bio_offset = bp->bio_offset;
 	bp2->bio_data = bp->bio_data;
+	bp2->bio_ma = bp->bio_ma;
+	bp2->bio_ma_n = bp->bio_ma_n;
+	bp2->bio_ma_offset = bp->bio_ma_offset;
 	bp2->bio_attribute = bp->bio_attribute;
 	bp->bio_children++;
 #ifdef KTR
@@ -575,6 +596,83 @@ g_io_deliver(struct bio *bp, int error)
 	return;
 }
 
+SYSCTL_DECL(_kern_geom);
+
+static long transient_maps;
+SYSCTL_LONG(_kern_geom, OID_AUTO, transient_maps, CTLFLAG_RD,
+    &transient_maps, 0,
+    "Total count of the transient mapping requests");
+u_int transient_map_retries = 10;
+SYSCTL_UINT(_kern_geom, OID_AUTO, transient_map_retries, CTLFLAG_RW,
+    &transient_map_retries, 0,
+    "Max count of retries used before giving up on creating transient map");
+int transient_map_hard_failures;
+SYSCTL_INT(_kern_geom, OID_AUTO, transient_map_hard_failures, CTLFLAG_RD,
+    &transient_map_hard_failures, 0,
+    "Failures to establish the transient mapping due to retry attempts "
+    "exhausted");
+int transient_map_soft_failures;
+SYSCTL_INT(_kern_geom, OID_AUTO, transient_map_soft_failures, CTLFLAG_RD,
+    &transient_map_soft_failures, 0,
+    "Count of retried failures to establish the transient mapping");
+int inflight_transient_maps;
+SYSCTL_INT(_kern_geom, OID_AUTO, inflight_transient_maps, CTLFLAG_RD,
+    &inflight_transient_maps, 0,
+    "Current count of the active transient maps");
+
+static int
+g_io_transient_map_bio(struct bio *bp)
+{
+	vm_offset_t addr;
+	long size;
+	u_int retried;
+	int rv;
+
+	size = round_page(bp->bio_ma_offset + bp->bio_length);
+	KASSERT(size / PAGE_SIZE == bp->bio_ma_n, ("Bio too short %p", bp));
+	addr = 0;
+	retried = 0;
+	atomic_add_long(&transient_maps, 1);
+retry:
+	vm_map_lock(bio_transient_map);
+	if (vm_map_findspace(bio_transient_map, vm_map_min(bio_transient_map),
+	    size, &addr)) {
+		vm_map_unlock(bio_transient_map);
+		if (transient_map_retries != 0 &&
+		    retried >= transient_map_retries) {
+			g_io_deliver(bp, EDEADLK/* XXXKIB */);
+			CTR2(KTR_GEOM, "g_down cannot map bp %p provider %s",
+			    bp, bp->bio_to->name);
+			atomic_add_int(&transient_map_hard_failures, 1);
+			return (1);
+		} else {
+			/*
+			 * Naive attempt to quisce the I/O to get more
+			 * in-flight requests completed and defragment
+			 * the bio_transient_map.
+			 */
+			CTR3(KTR_GEOM, "g_down retrymap bp %p provider %s r %d",
+			    bp, bp->bio_to->name, retried);
+			pause("g_d_tra", hz / 10);
+			retried++;
+			atomic_add_int(&transient_map_soft_failures, 1);
+			goto retry;
+		}
+	}
+	rv = vm_map_insert(bio_transient_map, NULL, 0, addr, addr + size,
+	    VM_PROT_RW, VM_PROT_RW, MAP_NOFAULT);
+	KASSERT(rv == KERN_SUCCESS,
+	    ("vm_map_insert(bio_transient_map) rv %d %jx %lx",
+	    rv, (uintmax_t)addr, size));
+	vm_map_unlock(bio_transient_map);
+	atomic_add_int(&inflight_transient_maps, 1);
+	pmap_qenter((vm_offset_t)addr, bp->bio_ma, OFF_TO_IDX(size));
+	bp->bio_data = (caddr_t)addr + bp->bio_ma_offset;
+	bp->bio_flags |= BIO_TRANSIENT_MAPPING;
+	bp->bio_flags &= ~BIO_UNMAPPED;
+	return (0);
+}
+
 void
 g_io_schedule_down(struct thread *tp __unused)
 {
@@ -636,6 +734,12 @@ g_io_schedule_down(struct thread *tp __u
 		default:
 			break;
 		}
+		if ((bp->bio_flags & BIO_UNMAPPED) != 0 &&
+		    (bp->bio_to->flags & G_PF_ACCEPT_UNMAPPED) == 0 &&
+		    (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) {
+			if (g_io_transient_map_bio(bp))
+				continue;
+		}
 		THREAD_NO_SLEEPING();
 		CTR4(KTR_GEOM, "g_down starting bp %p provider %s off %ld "
 		    "len %ld", bp, bp->bio_to->name, bp->bio_offset,

Modified: head/sys/geom/geom_vfs.c
==============================================================================
--- head/sys/geom/geom_vfs.c	Tue Mar 19 13:43:55 2013	(r248507)
+++ head/sys/geom/geom_vfs.c	Tue Mar 19 14:13:12 2013	(r248508)
@@ -188,14 +188,14 @@ g_vfs_strategy(struct bufobj *bo, struct
 	bip = g_alloc_bio();
 	bip->bio_cmd = bp->b_iocmd;
 	bip->bio_offset = bp->b_iooffset;
-	bip->bio_data = bp->b_data;
-	bip->bio_done = g_vfs_done;
-	bip->bio_caller2 = bp;
 	bip->bio_length = bp->b_bcount;
-	if (bp->b_flags & B_BARRIER) {
+	bdata2bio(bp, bip);
+	if ((bp->b_flags & B_BARRIER) != 0) {
 		bip->bio_flags |= BIO_ORDERED;
 		bp->b_flags &= ~B_BARRIER;
 	}
+	bip->bio_done = g_vfs_done;
+	bip->bio_caller2 = bp;
 	g_io_request(bip, cp);
 }
 

Modified: head/sys/i386/i386/pmap.c
==============================================================================
--- head/sys/i386/i386/pmap.c	Tue Mar 19 13:43:55 2013	(r248507)
+++ head/sys/i386/i386/pmap.c	Tue Mar 19 14:13:12 2013	(r248508)
@@ -4205,6 +4205,8 @@ pmap_copy_page(vm_page_t src, vm_page_t 
 	mtx_unlock(&sysmaps->lock);
 }
 
+int unmapped_buf_allowed = 1;
+
 void
 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
     vm_offset_t b_offset, int xfersize)

Modified: head/sys/i386/xen/pmap.c
==============================================================================
--- head/sys/i386/xen/pmap.c	Tue Mar 19 13:43:55 2013	(r248507)
+++ head/sys/i386/xen/pmap.c	Tue Mar 19 14:13:12 2013	(r248508)
@@ -3448,6 +3448,8 @@ pmap_copy_page(vm_page_t src, vm_page_t 
 	mtx_unlock(&sysmaps->lock);
 }
 
+int unmapped_buf_allowed = 1;
+
 void
 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
     vm_offset_t b_offset, int xfersize)

Modified: head/sys/ia64/ia64/pmap.c
==============================================================================
--- head/sys/ia64/ia64/pmap.c	Tue Mar 19 13:43:55 2013	(r248507)
+++ head/sys/ia64/ia64/pmap.c	Tue Mar 19 14:13:12 2013	(r248508)
@@ -2014,6 +2014,8 @@ pmap_copy_page(vm_page_t msrc, vm_page_t
 	bcopy(src, dst, PAGE_SIZE);
 }
 
+int unmapped_buf_allowed;
+
 void
 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
     vm_offset_t b_offset, int xfersize)

Modified: head/sys/kern/subr_bus_dma.c
==============================================================================
--- head/sys/kern/subr_bus_dma.c	Tue Mar 19 13:43:55 2013	(r248507)
+++ head/sys/kern/subr_bus_dma.c	Tue Mar 19 14:13:12 2013	(r248508)
@@ -126,11 +126,28 @@ static int
 _bus_dmamap_load_bio(bus_dma_tag_t dmat, bus_dmamap_t map, struct bio *bio,
     int *nsegs, int flags)
 {
-	int error;
-
-	error = _bus_dmamap_load_buffer(dmat, map, bio->bio_data,
-	    bio->bio_bcount, kernel_pmap, flags, NULL, nsegs);
+	vm_paddr_t paddr;
+	bus_size_t len, tlen;
+	int error, i, ma_offs;
+
+	if ((bio->bio_flags & BIO_UNMAPPED) == 0) {
+		error = _bus_dmamap_load_buffer(dmat, map, bio->bio_data,
+		    bio->bio_bcount, kernel_pmap, flags, NULL, nsegs);
+		return (error);
+	}
 
+	error = 0;
+	tlen = bio->bio_bcount;
+	ma_offs = bio->bio_ma_offset;
+	for (i = 0; tlen > 0; i++, tlen -= len) {
+		len = min(PAGE_SIZE - ma_offs, tlen);
+		paddr = VM_PAGE_TO_PHYS(bio->bio_ma[i]) + ma_offs;
+		error = _bus_dmamap_load_phys(dmat, map, paddr, len,
+		    flags, NULL, nsegs);
+		if (error != 0)
+			break;
+		ma_offs = 0;
+	}
 	return (error);
 }
 

Modified: head/sys/kern/subr_param.c
==============================================================================
--- head/sys/kern/subr_param.c	Tue Mar 19 13:43:55 2013	(r248507)
+++ head/sys/kern/subr_param.c	Tue Mar 19 14:13:12 2013	(r248508)
@@ -92,6 +92,7 @@ int	maxfiles;			/* sys. wide open files 
 int	maxfilesperproc;		/* per-proc open files limit */
 int	msgbufsize;			/* size of kernel message buffer */
 int	nbuf;
+int	bio_transient_maxcnt;
 int	ngroups_max;			/* max # groups per process */
 int	nswbuf;
 pid_t	pid_max = PID_MAX;
@@ -118,6 +119,9 @@ SYSCTL_LONG(_kern, OID_AUTO, maxswzone, 
     "Maximum memory for swap metadata");
 SYSCTL_LONG(_kern, OID_AUTO, maxbcache, CTLFLAG_RDTUN, &maxbcache, 0,
     "Maximum value of vfs.maxbufspace");
+SYSCTL_INT(_kern, OID_AUTO, bio_transient_maxcnt, CTLFLAG_RDTUN,
+    &bio_transient_maxcnt, 0,
+    "Maximum number of transient BIOs mappings");
 SYSCTL_ULONG(_kern, OID_AUTO, maxtsiz, CTLFLAG_RW | CTLFLAG_TUN, &maxtsiz, 0,
     "Maximum text size");
 SYSCTL_ULONG(_kern, OID_AUTO, dfldsiz, CTLFLAG_RW | CTLFLAG_TUN, &dfldsiz, 0,
@@ -266,6 +270,8 @@ init_param1(void)
 		pid_max = PID_MAX;
 	else if (pid_max < 300)
 		pid_max = 300;
+
+	TUNABLE_INT_FETCH("vfs.unmapped_buf_allowed", &unmapped_buf_allowed);
 }
 
 /*
@@ -322,6 +328,7 @@ init_param2(long physpages)
 	 */
 	nbuf = NBUF;
 	TUNABLE_INT_FETCH("kern.nbuf", &nbuf);
+	TUNABLE_INT_FETCH("kern.bio_transient_maxcnt", &bio_transient_maxcnt);
 
 	/*
 	 * The default for maxpipekva is min(1/64 of the kernel address space,

Modified: head/sys/kern/vfs_bio.c
==============================================================================
--- head/sys/kern/vfs_bio.c	Tue Mar 19 13:43:55 2013	(r248507)
+++ head/sys/kern/vfs_bio.c	Tue Mar 19 14:13:12 2013	(r248508)
@@ -1,8 +1,12 @@
 /*-
  * Copyright (c) 2004 Poul-Henning Kamp
  * Copyright (c) 1994,1997 John S. Dyson
+ * Copyright (c) 2013 The FreeBSD Foundation
  * All rights reserved.
  *
+ * Portions of this software were developed by Konstantin Belousov
+ * under sponsorship from the FreeBSD Foundation.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -92,6 +96,7 @@ struct	buf_ops buf_ops_bio = {
  * carnal knowledge of buffers.  This knowledge should be moved to vfs_bio.c.
  */
 struct buf *buf;		/* buffer header pool */
+caddr_t unmapped_buf;
 
 static struct proc *bufdaemonproc;
 
@@ -132,6 +137,10 @@ SYSCTL_PROC(_vfs, OID_AUTO, bufspace, CT
 SYSCTL_LONG(_vfs, OID_AUTO, bufspace, CTLFLAG_RD, &bufspace, 0,
     "Virtual memory used for buffers");
 #endif
+static long unmapped_bufspace;
+SYSCTL_LONG(_vfs, OID_AUTO, unmapped_bufspace, CTLFLAG_RD,
+    &unmapped_bufspace, 0,
+    "Amount of unmapped buffers, inclusive in the bufspace");
 static long maxbufspace;
 SYSCTL_LONG(_vfs, OID_AUTO, maxbufspace, CTLFLAG_RD, &maxbufspace, 0,
     "Maximum allowed value of bufspace (including buf_daemon)");
@@ -201,6 +210,10 @@ SYSCTL_INT(_vfs, OID_AUTO, getnewbufcall
 static int getnewbufrestarts;
 SYSCTL_INT(_vfs, OID_AUTO, getnewbufrestarts, CTLFLAG_RW, &getnewbufrestarts, 0,
     "Number of times getnewbuf has had to restart a buffer aquisition");
+static int mappingrestarts;
+SYSCTL_INT(_vfs, OID_AUTO, mappingrestarts, CTLFLAG_RW, &mappingrestarts, 0,
+    "Number of times getblk has had to restart a buffer mapping for "
+    "unmapped buffer");
 static int flushbufqtarget = 100;
 SYSCTL_INT(_vfs, OID_AUTO, flushbufqtarget, CTLFLAG_RW, &flushbufqtarget, 0,
     "Amount of work to do in flushbufqueues when helping bufdaemon");
@@ -210,6 +223,9 @@ SYSCTL_LONG(_vfs, OID_AUTO, notbufdflash
 static long barrierwrites;
 SYSCTL_LONG(_vfs, OID_AUTO, barrierwrites, CTLFLAG_RW, &barrierwrites, 0,
     "Number of barrier writes");
+SYSCTL_INT(_vfs, OID_AUTO, unmapped_buf_allowed, CTLFLAG_RD,
+    &unmapped_buf_allowed, 0,
+    "Permit the use of the unmapped i/o");
 
 /*
  * Wakeup point for bufdaemon, as well as indicator of whether it is already
@@ -281,6 +297,9 @@ static struct mtx nblock;
 
 /* Queues for free buffers with various properties */
 static TAILQ_HEAD(bqueues, buf) bufqueues[BUFFER_QUEUES] = { { 0 } };
+#ifdef INVARIANTS
+static int bq_len[BUFFER_QUEUES];
+#endif
 
 /* Lock for the bufqueues */
 static struct mtx bqlock;
@@ -511,7 +530,7 @@ caddr_t
 kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est)
 {
 	int tuned_nbuf;
-	long maxbuf;
+	long maxbuf, maxbuf_sz, buf_sz,	biotmap_sz;
 
 	/*
 	 * physmem_est is in pages.  Convert it to kilobytes (assumes
@@ -555,6 +574,52 @@ kern_vfs_bio_buffer_alloc(caddr_t v, lon
 	}
 
 	/*
+	 * Ideal allocation size for the transient bio submap if 10%
+	 * of the maximal space buffer map.  This roughly corresponds
+	 * to the amount of the buffer mapped for typical UFS load.
+	 *
+	 * Clip the buffer map to reserve space for the transient
+	 * BIOs, if its extent is bigger than 90% of the maximum
+	 * buffer map extent on the platform.
+	 *
+	 * The fall-back to the maxbuf in case of maxbcache unset,
+	 * allows to not trim the buffer KVA for the architectures
+	 * with ample KVA space.
+	 */
+	if (bio_transient_maxcnt == 0) {
+		maxbuf_sz = maxbcache != 0 ? maxbcache : maxbuf * BKVASIZE;
+		buf_sz = (long)nbuf * BKVASIZE;
+		if (buf_sz < maxbuf_sz / 10 * 9) {
+			/*
+			 * There is more KVA than memory.  Do not
+			 * adjust buffer map size, and assign the rest
+			 * of maxbuf to transient map.
+			 */
+			biotmap_sz = maxbuf_sz - buf_sz;
+		} else {
+			/*
+			 * Buffer map spans all KVA we could afford on
+			 * this platform.  Give 10% of the buffer map
+			 * to the transient bio map.
+			 */
+ 			biotmap_sz = buf_sz / 10;
+			buf_sz -= biotmap_sz;
+		}
+		if (biotmap_sz / INT_MAX > MAXPHYS)
+			bio_transient_maxcnt = INT_MAX;
+		else
+			bio_transient_maxcnt = biotmap_sz / MAXPHYS;
+		/*
+		 * Artifically limit to 1024 simultaneous in-flight I/Os
+		 * using the transient mapping.
+		 */
+		if (bio_transient_maxcnt > 1024)
+			bio_transient_maxcnt = 1024;
+		if (tuned_nbuf)
+			nbuf = buf_sz / BKVASIZE;
+	}
+
+	/*
 	 * swbufs are used as temporary holders for I/O, such as paging I/O.
 	 * We have no less then 16 and no more then 256.
 	 */
@@ -607,6 +672,9 @@ bufinit(void)
 		LIST_INIT(&bp->b_dep);
 		BUF_LOCKINIT(bp);
 		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist);
+#ifdef INVARIANTS
+		bq_len[QUEUE_EMPTY]++;
+#endif
 	}
 
 	/*
@@ -675,6 +743,55 @@ bufinit(void)
 
 	bogus_page = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ |
 	    VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
+	unmapped_buf = (caddr_t)kmem_alloc_nofault(kernel_map, MAXPHYS);
+}
+
+#ifdef INVARIANTS
+static inline void
+vfs_buf_check_mapped(struct buf *bp)
+{
+
+	KASSERT((bp->b_flags & B_UNMAPPED) == 0,
+	    ("mapped buf %p %x", bp, bp->b_flags));
+	KASSERT(bp->b_kvabase != unmapped_buf,
+	    ("mapped buf: b_kvabase was not updated %p", bp));
+	KASSERT(bp->b_data != unmapped_buf,
+	    ("mapped buf: b_data was not updated %p", bp));
+}
+
+static inline void
+vfs_buf_check_unmapped(struct buf *bp)
+{
+
+	KASSERT((bp->b_flags & B_UNMAPPED) == B_UNMAPPED,
+	    ("unmapped buf %p %x", bp, bp->b_flags));
+	KASSERT(bp->b_kvabase == unmapped_buf,
+	    ("unmapped buf: corrupted b_kvabase %p", bp));
+	KASSERT(bp->b_data == unmapped_buf,
+	    ("unmapped buf: corrupted b_data %p", bp));
+}
+
+#define	BUF_CHECK_MAPPED(bp) vfs_buf_check_mapped(bp)
+#define	BUF_CHECK_UNMAPPED(bp) vfs_buf_check_unmapped(bp)
+#else
+#define	BUF_CHECK_MAPPED(bp) do {} while (0)
+#define	BUF_CHECK_UNMAPPED(bp) do {} while (0)
+#endif
+
+static void
+bpmap_qenter(struct buf *bp)
+{
+
+	BUF_CHECK_MAPPED(bp);
+
+	/*
+	 * bp->b_data is relative to bp->b_offset, but
+	 * bp->b_offset may be offset into the first page.
+	 */
+	bp->b_data = (caddr_t)trunc_page((vm_offset_t)bp->b_data);
+	pmap_qenter((vm_offset_t)bp->b_data, bp->b_pages, bp->b_npages);
+	bp->b_data = (caddr_t)((vm_offset_t)bp->b_data |
+	    (vm_offset_t)(bp->b_offset & PAGE_MASK));
 }
 
 /*
@@ -686,14 +803,26 @@ static void
 bfreekva(struct buf *bp)
 {
 
-	if (bp->b_kvasize) {
-		atomic_add_int(&buffreekvacnt, 1);
-		atomic_subtract_long(&bufspace, bp->b_kvasize);
-		vm_map_remove(buffer_map, (vm_offset_t) bp->b_kvabase,
-		    (vm_offset_t) bp->b_kvabase + bp->b_kvasize);
-		bp->b_kvasize = 0;
-		bufspacewakeup();
+	if (bp->b_kvasize == 0)
+		return;
+
+	atomic_add_int(&buffreekvacnt, 1);
+	atomic_subtract_long(&bufspace, bp->b_kvasize);
+	if ((bp->b_flags & B_UNMAPPED) == 0) {
+		BUF_CHECK_MAPPED(bp);
+		vm_map_remove(buffer_map, (vm_offset_t)bp->b_kvabase,
+		    (vm_offset_t)bp->b_kvabase + bp->b_kvasize);
+	} else {
+		BUF_CHECK_UNMAPPED(bp);
+		if ((bp->b_flags & B_KVAALLOC) != 0) {
+			vm_map_remove(buffer_map, (vm_offset_t)bp->b_kvaalloc,
+			    (vm_offset_t)bp->b_kvaalloc + bp->b_kvasize);
+		}
+		atomic_subtract_long(&unmapped_bufspace, bp->b_kvasize);
+		bp->b_flags &= ~(B_UNMAPPED | B_KVAALLOC);
 	}
+	bp->b_kvasize = 0;
+	bufspacewakeup();
 }
 
 /*
@@ -760,6 +889,11 @@ bremfreel(struct buf *bp)
 	mtx_assert(&bqlock, MA_OWNED);
 
 	TAILQ_REMOVE(&bufqueues[bp->b_qindex], bp, b_freelist);
+#ifdef INVARIANTS
+	KASSERT(bq_len[bp->b_qindex] >= 1, ("queue %d underflow",
+	    bp->b_qindex));
+	bq_len[bp->b_qindex]--;
+#endif
 	bp->b_qindex = QUEUE_NONE;
 	/*
 	 * If this was a delayed bremfree() we only need to remove the buffer
@@ -1414,7 +1548,8 @@ brelse(struct buf *bp)
 					}
 				}
 
-				if ((bp->b_flags & B_INVAL) == 0) {
+				if ((bp->b_flags & (B_INVAL | B_UNMAPPED)) == 0) {
+					BUF_CHECK_MAPPED(bp);
 					pmap_qenter(
 					    trunc_page((vm_offset_t)bp->b_data),
 					    bp->b_pages, bp->b_npages);
@@ -1509,11 +1644,17 @@ brelse(struct buf *bp)
 			bp->b_qindex = QUEUE_DIRTY;
 		else
 			bp->b_qindex = QUEUE_CLEAN;
-		if (bp->b_flags & B_AGE)
-			TAILQ_INSERT_HEAD(&bufqueues[bp->b_qindex], bp, b_freelist);
-		else
-			TAILQ_INSERT_TAIL(&bufqueues[bp->b_qindex], bp, b_freelist);
+		if (bp->b_flags & B_AGE) {
+			TAILQ_INSERT_HEAD(&bufqueues[bp->b_qindex], bp,
+			    b_freelist);
+		} else {
+			TAILQ_INSERT_TAIL(&bufqueues[bp->b_qindex], bp,
+			    b_freelist);
+		}
 	}
+#ifdef INVARIANTS
+	bq_len[bp->b_qindex]++;
+#endif
 	mtx_unlock(&bqlock);
 
 	/*
@@ -1604,6 +1745,9 @@ bqrelse(struct buf *bp)
 	if (bp->b_flags & B_DELWRI) {
 		bp->b_qindex = QUEUE_DIRTY;
 		TAILQ_INSERT_TAIL(&bufqueues[bp->b_qindex], bp, b_freelist);
+#ifdef INVARIANTS
+		bq_len[bp->b_qindex]++;
+#endif
 	} else {
 		/*
 		 * The locking of the BO_LOCK for checking of the
@@ -1616,6 +1760,9 @@ bqrelse(struct buf *bp)
 			bp->b_qindex = QUEUE_CLEAN;
 			TAILQ_INSERT_TAIL(&bufqueues[QUEUE_CLEAN], bp,
 			    b_freelist);
+#ifdef INVARIANTS
+			bq_len[QUEUE_CLEAN]++;
+#endif
 		} else {
 			/*
 			 * We are too low on memory, we have to try to free
@@ -1657,7 +1804,11 @@ vfs_vmio_release(struct buf *bp)
 	int i;
 	vm_page_t m;
 
-	pmap_qremove(trunc_page((vm_offset_t)bp->b_data), bp->b_npages);
+	if ((bp->b_flags & B_UNMAPPED) == 0) {
+		BUF_CHECK_MAPPED(bp);
+		pmap_qremove(trunc_page((vm_offset_t)bp->b_data), bp->b_npages);
+	} else
+		BUF_CHECK_UNMAPPED(bp);
 	VM_OBJECT_WLOCK(bp->b_bufobj->bo_object);
 	for (i = 0; i < bp->b_npages; i++) {
 		m = bp->b_pages[i];
@@ -1761,8 +1912,10 @@ vfs_bio_awrite(struct buf *bp)
 	int nwritten;
 	int size;
 	int maxcl;
+	int gbflags;
 
 	bo = &vp->v_bufobj;
+	gbflags = (bp->b_flags & B_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
 	/*
 	 * right now we support clustered writing only to regular files.  If
 	 * we find a clusterable block we could be in the middle of a cluster
@@ -1794,7 +1947,7 @@ vfs_bio_awrite(struct buf *bp)
 		if (ncl != 1) {
 			BUF_UNLOCK(bp);
 			nwritten = cluster_wbuild(vp, size, lblkno - j, ncl,
-			    0);
+			    gbflags);
 			return (nwritten);
 		}
 	}
@@ -1811,46 +1964,207 @@ vfs_bio_awrite(struct buf *bp)
 	return (nwritten);
 }
 
+static void
+setbufkva(struct buf *bp, vm_offset_t addr, int maxsize, int gbflags)
+{
+
+	KASSERT((bp->b_flags & (B_UNMAPPED | B_KVAALLOC)) == 0 &&
+	    bp->b_kvasize == 0, ("call bfreekva(%p)", bp));
+	if ((gbflags & GB_UNMAPPED) == 0) {
+		bp->b_kvabase = (caddr_t)addr;
+	} else if ((gbflags & GB_KVAALLOC) != 0) {
+		KASSERT((gbflags & GB_UNMAPPED) != 0,
+		    ("GB_KVAALLOC without GB_UNMAPPED"));
+		bp->b_kvaalloc = (caddr_t)addr;
+		bp->b_flags |= B_UNMAPPED | B_KVAALLOC;
+		atomic_add_long(&unmapped_bufspace, bp->b_kvasize);
+	}
+	bp->b_kvasize = maxsize;
+}
+
 /*
- *	getnewbuf:
- *
- *	Find and initialize a new buffer header, freeing up existing buffers 
- *	in the bufqueues as necessary.  The new buffer is returned locked.
- *
- *	Important:  B_INVAL is not set.  If the caller wishes to throw the
- *	buffer away, the caller must set B_INVAL prior to calling brelse().
- *
- *	We block if:
- *		We have insufficient buffer headers
- *		We have insufficient buffer space
- *		buffer_map is too fragmented ( space reservation fails )
- *		If we have to flush dirty buffers ( but we try to avoid this )
- *
- *	To avoid VFS layer recursion we do not flush dirty buffers ourselves.
- *	Instead we ask the buf daemon to do it for us.  We attempt to
- *	avoid piecemeal wakeups of the pageout daemon.
+ * Allocate the buffer KVA and set b_kvasize. Also set b_kvabase if
+ * needed.
  */
+static int
+allocbufkva(struct buf *bp, int maxsize, int gbflags)
+{
+	vm_offset_t addr;
+	int rv;
 
-static struct buf *
-getnewbuf(struct vnode *vp, int slpflag, int slptimeo, int size, int maxsize,
-    int gbflags)
+	bfreekva(bp);
+	addr = 0;
+
+	vm_map_lock(buffer_map);
+	if (vm_map_findspace(buffer_map, vm_map_min(buffer_map), maxsize,
+	    &addr)) {
+		vm_map_unlock(buffer_map);
+		/*
+		 * Buffer map is too fragmented.  Request the caller
+		 * to defragment the map.
+		 */
+		atomic_add_int(&bufdefragcnt, 1);
+		return (1);
+	}
+	rv = vm_map_insert(buffer_map, NULL, 0, addr, addr + maxsize,
+	    VM_PROT_RW, VM_PROT_RW, MAP_NOFAULT);
+	KASSERT(rv == KERN_SUCCESS, ("vm_map_insert(buffer_map) rv %d", rv));
+	vm_map_unlock(buffer_map);
+	setbufkva(bp, addr, maxsize, gbflags);
+	atomic_add_long(&bufspace, bp->b_kvasize);
+	return (0);
+}
+
+/*
+ * Ask the bufdaemon for help, or act as bufdaemon itself, when a
+ * locked vnode is supplied.
+ */
+static void
+getnewbuf_bufd_help(struct vnode *vp, int gbflags, int slpflag, int slptimeo,
+    int defrag)
 {
 	struct thread *td;
-	struct buf *bp;
-	struct buf *nbp;
-	int defrag = 0;
-	int nqindex;
-	static int flushingbufs;
+	char *waitmsg;
+	int fl, flags, norunbuf;
+
+	mtx_assert(&bqlock, MA_OWNED);
+
+	if (defrag) {
+		flags = VFS_BIO_NEED_BUFSPACE;
+		waitmsg = "nbufkv";
+	} else if (bufspace >= hibufspace) {
+		waitmsg = "nbufbs";
+		flags = VFS_BIO_NEED_BUFSPACE;
+	} else {
+		waitmsg = "newbuf";
+		flags = VFS_BIO_NEED_ANY;
+	}
+	mtx_lock(&nblock);
+	needsbuffer |= flags;
+	mtx_unlock(&nblock);
+	mtx_unlock(&bqlock);
+
+	bd_speedup();	/* heeeelp */
+	if ((gbflags & GB_NOWAIT_BD) != 0)
+		return;
 
 	td = curthread;
+	mtx_lock(&nblock);
+	while (needsbuffer & flags) {
+		if (vp != NULL && (td->td_pflags & TDP_BUFNEED) == 0) {
+			mtx_unlock(&nblock);
+			/*
+			 * getblk() is called with a vnode locked, and
+			 * some majority of the dirty buffers may as
+			 * well belong to the vnode.  Flushing the
+			 * buffers there would make a progress that
+			 * cannot be achieved by the buf_daemon, that
+			 * cannot lock the vnode.
+			 */
+			norunbuf = ~(TDP_BUFNEED | TDP_NORUNNINGBUF) |
+			    (td->td_pflags & TDP_NORUNNINGBUF);
+			/* play bufdaemon */
+			td->td_pflags |= TDP_BUFNEED | TDP_NORUNNINGBUF;
+			fl = buf_do_flush(vp);
+			td->td_pflags &= norunbuf;
+			mtx_lock(&nblock);
+			if (fl != 0)
+				continue;
+			if ((needsbuffer & flags) == 0)
+				break;
+		}
+		if (msleep(&needsbuffer, &nblock, (PRIBIO + 4) | slpflag,
+		    waitmsg, slptimeo))
+			break;
+	}
+	mtx_unlock(&nblock);
+}
+
+static void
+getnewbuf_reuse_bp(struct buf *bp, int qindex)
+{
+
+	CTR6(KTR_BUF, "getnewbuf(%p) vp %p flags %X kvasize %d bufsize %d "
+	    "queue %d (recycling)", bp, bp->b_vp, bp->b_flags,
+	     bp->b_kvasize, bp->b_bufsize, qindex);
+	mtx_assert(&bqlock, MA_NOTOWNED);
+
 	/*
-	 * We can't afford to block since we might be holding a vnode lock,
-	 * which may prevent system daemons from running.  We deal with
-	 * low-memory situations by proactively returning memory and running
-	 * async I/O rather then sync I/O.
+	 * Note: we no longer distinguish between VMIO and non-VMIO
+	 * buffers.
 	 */
-	atomic_add_int(&getnewbufcalls, 1);
-	atomic_subtract_int(&getnewbufrestarts, 1);
+	KASSERT((bp->b_flags & B_DELWRI) == 0,
+	    ("delwri buffer %p found in queue %d", bp, qindex));
+
+	if (qindex == QUEUE_CLEAN) {
+		if (bp->b_flags & B_VMIO) {
+			bp->b_flags &= ~B_ASYNC;
+			vfs_vmio_release(bp);
+		}
+		if (bp->b_vp != NULL)
+			brelvp(bp);
+	}
+
+	/*
+	 * Get the rest of the buffer freed up.  b_kva* is still valid
+	 * after this operation.
+	 */
+
+	if (bp->b_rcred != NOCRED) {
+		crfree(bp->b_rcred);
+		bp->b_rcred = NOCRED;
+	}
+	if (bp->b_wcred != NOCRED) {
+		crfree(bp->b_wcred);
+		bp->b_wcred = NOCRED;
+	}
+	if (!LIST_EMPTY(&bp->b_dep))
+		buf_deallocate(bp);
+	if (bp->b_vflags & BV_BKGRDINPROG)
+		panic("losing buffer 3");
+	KASSERT(bp->b_vp == NULL, ("bp: %p still has vnode %p.  qindex: %d",
+	    bp, bp->b_vp, qindex));
+	KASSERT((bp->b_xflags & (BX_VNCLEAN|BX_VNDIRTY)) == 0,
+	    ("bp: %p still on a buffer list. xflags %X", bp, bp->b_xflags));
+
+	if (bp->b_bufsize)
+		allocbuf(bp, 0);
+
+	bp->b_flags &= B_UNMAPPED | B_KVAALLOC;
+	bp->b_ioflags = 0;
+	bp->b_xflags = 0;
+	KASSERT((bp->b_vflags & BV_INFREECNT) == 0,
+	    ("buf %p still counted as free?", bp));
+	bp->b_vflags = 0;
+	bp->b_vp = NULL;
+	bp->b_blkno = bp->b_lblkno = 0;
+	bp->b_offset = NOOFFSET;
+	bp->b_iodone = 0;
+	bp->b_error = 0;
+	bp->b_resid = 0;
+	bp->b_bcount = 0;
+	bp->b_npages = 0;
+	bp->b_dirtyoff = bp->b_dirtyend = 0;
+	bp->b_bufobj = NULL;
+	bp->b_pin_count = 0;
+	bp->b_fsprivate1 = NULL;
+	bp->b_fsprivate2 = NULL;
+	bp->b_fsprivate3 = NULL;
+
+	LIST_INIT(&bp->b_dep);
+}
+
+static int flushingbufs;
+
+static struct buf *
+getnewbuf_scan(int maxsize, int defrag, int unmapped, int metadata)
+{
+	struct buf *bp, *nbp;
+	int nqindex, qindex, pass;
+
+	KASSERT(!unmapped || !defrag, ("both unmapped and defrag"));
+
+	pass = 1;
 restart:
 	atomic_add_int(&getnewbufrestarts, 1);
 
@@ -1860,66 +2174,90 @@ restart:
 	 * that if we are specially marked process, we are allowed to
 	 * dip into our reserves.
 	 *
-	 * The scanning sequence is nominally:  EMPTY->EMPTYKVA->CLEAN
+	 * The scanning sequence is nominally: EMPTY->EMPTYKVA->CLEAN
+	 * for the allocation of the mapped buffer.  For unmapped, the
+	 * easiest is to start with EMPTY outright.
 	 *
 	 * We start with EMPTYKVA.  If the list is empty we backup to EMPTY.
 	 * However, there are a number of cases (defragging, reusing, ...)
 	 * where we cannot backup.
 	 */
+	nbp = NULL;
 	mtx_lock(&bqlock);
-	nqindex = QUEUE_EMPTYKVA;
-	nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTYKVA]);
-
+	if (!defrag && unmapped) {
+		nqindex = QUEUE_EMPTY;
+		nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTY]);
+	}
 	if (nbp == NULL) {
-		/*
-		 * If no EMPTYKVA buffers and we are either
-		 * defragging or reusing, locate a CLEAN buffer
-		 * to free or reuse.  If bufspace useage is low
-		 * skip this step so we can allocate a new buffer.
-		 */
-		if (defrag || bufspace >= lobufspace) {
-			nqindex = QUEUE_CLEAN;
-			nbp = TAILQ_FIRST(&bufqueues[QUEUE_CLEAN]);
-		}
+		nqindex = QUEUE_EMPTYKVA;
+		nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTYKVA]);
+	}
 
-		/*
-		 * If we could not find or were not allowed to reuse a
-		 * CLEAN buffer, check to see if it is ok to use an EMPTY
-		 * buffer.  We can only use an EMPTY buffer if allocating
-		 * its KVA would not otherwise run us out of buffer space.
-		 */
-		if (nbp == NULL && defrag == 0 &&
-		    bufspace + maxsize < hibufspace) {
-			nqindex = QUEUE_EMPTY;
-			nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTY]);
-		}
+	/*
+	 * If no EMPTYKVA buffers and we are either defragging or
+	 * reusing, locate a CLEAN buffer to free or reuse.  If
+	 * bufspace useage is low skip this step so we can allocate a
+	 * new buffer.
+	 */
+	if (nbp == NULL && (defrag || bufspace >= lobufspace)) {
+		nqindex = QUEUE_CLEAN;
+		nbp = TAILQ_FIRST(&bufqueues[QUEUE_CLEAN]);
+	}
+
+	/*
+	 * If we could not find or were not allowed to reuse a CLEAN
+	 * buffer, check to see if it is ok to use an EMPTY buffer.
+	 * We can only use an EMPTY buffer if allocating its KVA would
+	 * not otherwise run us out of buffer space.  No KVA is needed
+	 * for the unmapped allocation.
+	 */
+	if (nbp == NULL && defrag == 0 && (bufspace + maxsize < hibufspace ||
+	    metadata)) {
+		nqindex = QUEUE_EMPTY;
+		nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTY]);
+	}
+
+	/*
+	 * All available buffers might be clean, retry ignoring the
+	 * lobufspace as the last resort.
+	 */
+	if (nbp == NULL && !TAILQ_EMPTY(&bufqueues[QUEUE_CLEAN])) {
+		nqindex = QUEUE_CLEAN;
+		nbp = TAILQ_FIRST(&bufqueues[QUEUE_CLEAN]);
 	}
 
 	/*
 	 * Run scan, possibly freeing data and/or kva mappings on the fly
 	 * depending.
 	 */
-
 	while ((bp = nbp) != NULL) {
-		int qindex = nqindex;
+		qindex = nqindex;
 
 		/*
-		 * Calculate next bp ( we can only use it if we do not block
-		 * or do other fancy things ).
+		 * Calculate next bp (we can only use it if we do not

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***