svn commit: r319965 - in stable/10/sys: kern sys vm

Alan Cox alc at FreeBSD.org
Thu Jun 15 03:58:25 UTC 2017


Author: alc
Date: Thu Jun 15 03:58:23 2017
New Revision: 319965
URL: https://svnweb.freebsd.org/changeset/base/319965

Log:
  MFC r318995
    In r118390, the swap pager's approach to striping swap allocation over
    multiple devices was changed.  However, swapoff_one() was not fully and
    correctly converted.  In particular, with r118390's introduction of a per-
    device blist, the maximum swap block size, "dmmax", became irrelevant to
    swapoff_one()'s operation.  Moreover, swapoff_one() was performing out-of-
    range operations on the per-device blist that were silently ignored by
    blist_fill().
  
    This change corrects both of these problems with swapoff_one(), which will
    allow us to potentially increase MAX_PAGEOUT_CLUSTER.  Previously,
    swapoff_one() would panic inside of blist_fill() if you increased
    MAX_PAGEOUT_CLUSTER.
  
  MFC r319001
    After r118390, the variable "dmmax" was neither the correct strip size
    nor the correct maximum block size.  Moreover, after r318995, it serves
    no purpose except to provide information to user space through a read-
    sysctl.
  
    This change eliminates the variable "dmmax" but retains the sysctl.  It
    also corrects the value returned by the sysctl.
  
  MFC r319604
    Halve the memory being internally allocated by the blist allocator.  In
    short, half of the memory that is allocated to implement the radix tree is
    wasted because we did not change "u_daddr_t" to be a 64-bit unsigned int
    when we changed "daddr_t" to be a 64-bit (signed) int.  (See r96849 and
    r96851.)
  
  MFC r319612
    When the function blist_fill() was added to the kernel in r107913, the swap
    pager used a different scheme for striping the allocation of swap space
    across multiple devices.  And, although blist_fill() was intended to support
    fill operations with large counts, the old striping scheme never performed a
    fill larger than the stripe size.  Consequently, the misplacement of a
    sanity check in blst_meta_fill() went undetected.  Now, moving forward in
    time to r118390, a new scheme for striping was introduced that maintained a
    blist allocator per device, but as noted in r318995, swapoff_one() was not
    fully and correctly converted to the new scheme.  This change completes what
    was started in r318995 by fixing the underlying bug in blst_meta_fill() that
    stops swapoff_one() from simply performing a single blist_fill() operation.
  
  MFC r319627
    Starting in r118390, swaponsomething() began to reserve the blocks at the
    beginning of a swap area for a disk label.  However, neither r118390 nor
    r118544, which increased the reservation from one to two blocks, correctly
    accounted for these blocks when updating the variable "swap_pager_avail".
    This change corrects that error.
  
  MFC r319655
    Originally, this file could be compiled as a user-space application for
    testing purposes.  However, over the years, various changes to the kernel
    have broken this feature.  This revision applies some fixes to get user-
    space compilation working again.  There are no changes in this revision
    to code that is used by the kernel.

Modified:
  stable/10/sys/kern/subr_blist.c
  stable/10/sys/sys/blist.h
  stable/10/sys/vm/swap_pager.c
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/sys/kern/subr_blist.c
==============================================================================
--- stable/10/sys/kern/subr_blist.c	Thu Jun 15 02:45:43 2017	(r319964)
+++ stable/10/sys/kern/subr_blist.c	Thu Jun 15 03:58:23 2017	(r319965)
@@ -99,9 +99,8 @@ __FBSDID("$FreeBSD$");
 #define BLIST_DEBUG
 #endif
 
-#define SWAPBLK_NONE ((daddr_t)-1)
-
 #include <sys/types.h>
+#include <sys/malloc.h>
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
@@ -110,8 +109,6 @@ __FBSDID("$FreeBSD$");
 #define malloc(a,b,c)	calloc(a, 1)
 #define free(a,b)	free(a)
 
-typedef unsigned int u_daddr_t;
-
 #include <sys/blist.h>
 
 void panic(const char *ctl, ...);
@@ -366,7 +363,7 @@ blst_leaf_alloc(
 			j >>= 1;
 			mask >>= j;
 		}
-		scan->u.bmu_bitmap &= ~(1 << r);
+		scan->u.bmu_bitmap &= ~((u_daddr_t)1 << r);
 		return(blk + r);
 	}
 	if (count <= BLIST_BMAP_RADIX) {
@@ -658,7 +655,7 @@ static void blst_copy(
 			int i;
 
 			for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) {
-				if (v & (1 << i))
+				if (v & ((u_daddr_t)1 << i))
 					blist_free(dest, blk + i, 1);
 			}
 		}
@@ -769,6 +766,8 @@ blst_meta_fill(
 	int next_skip = ((u_int)skip / BLIST_META_RADIX);
 	int nblks = 0;
 
+	if (count > radix)
+		panic("blist_meta_fill: allocation too large");
 	if (count == radix || scan->u.bmu_avail == 0)  {
 		/*
 		 * ALL-ALLOCATED special case
@@ -800,9 +799,6 @@ blst_meta_fill(
 		radix /= BLIST_META_RADIX;
 	}
 
-	if (count > radix)
-		panic("blist_meta_fill: allocation too large");
-
 	i = (allocBlk - blk) / radix;
 	blk += i * radix;
 	i = i * next_skip + 1;
@@ -922,7 +918,7 @@ blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t 
 
 	if (radix == BLIST_BMAP_RADIX) {
 		printf(
-		    "%*.*s(%08llx,%lld): bitmap %08llx big=%lld\n", 
+		    "%*.*s(%08llx,%lld): bitmap %016llx big=%lld\n", 
 		    tab, tab, "",
 		    (long long)blk, (long long)radix,
 		    (long long)scan->u.bmu_bitmap,
@@ -1016,10 +1012,9 @@ main(int ac, char **av)
 
 	for (;;) {
 		char buf[1024];
-		daddr_t da = 0;
-		daddr_t count = 0;
+		long long da = 0;
+		long long count = 0;
 
-
 		printf("%lld/%lld/%lld> ", (long long)bl->bl_free,
 		    (long long)size, (long long)bl->bl_radix);
 		fflush(stdout);
@@ -1028,7 +1023,7 @@ main(int ac, char **av)
 		switch(buf[0]) {
 		case 'r':
 			if (sscanf(buf + 1, "%lld", &count) == 1) {
-				blist_resize(&bl, count, 1);
+				blist_resize(&bl, count, 1, M_WAITOK);
 			} else {
 				printf("?\n");
 			}
@@ -1044,16 +1039,14 @@ main(int ac, char **av)
 			}
 			break;
 		case 'f':
-			if (sscanf(buf + 1, "%llx %lld",
-			    (long long *)&da, (long long *)&count) == 2) {
+			if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) {
 				blist_free(bl, da, count);
 			} else {
 				printf("?\n");
 			}
 			break;
 		case 'l':
-			if (sscanf(buf + 1, "%llx %lld",
-			    (long long *)&da, (long long *)&count) == 2) {
+			if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) {
 				printf("    n=%d\n",
 				    blist_fill(bl, da, count));
 			} else {

Modified: stable/10/sys/sys/blist.h
==============================================================================
--- stable/10/sys/sys/blist.h	Thu Jun 15 02:45:43 2017	(r319964)
+++ stable/10/sys/sys/blist.h	Thu Jun 15 03:58:23 2017	(r319965)
@@ -44,7 +44,7 @@
  *		ops.
  *
  *		SWAPBLK_NONE is returned on failure.  This module is typically
- *		capable of managing up to (2^31) blocks per blist, though
+ *		capable of managing up to (2^63) blocks per blist, though
  *		the memory utilization would be insane if you actually did
  *		that.  Managing something like 512MB worth of 4K blocks 
  *		eats around 32 KBytes of memory. 
@@ -56,7 +56,7 @@
 #ifndef _SYS_BLIST_H_
 #define _SYS_BLIST_H_
 
-typedef	u_int32_t	u_daddr_t;	/* unsigned disk address */
+typedef	uint64_t	u_daddr_t;	/* unsigned disk address */
 
 /*
  * note: currently use SWAPBLK_NONE as an absolute value rather then 

Modified: stable/10/sys/vm/swap_pager.c
==============================================================================
--- stable/10/sys/vm/swap_pager.c	Thu Jun 15 02:45:43 2017	(r319964)
+++ stable/10/sys/vm/swap_pager.c	Thu Jun 15 03:58:23 2017	(r319965)
@@ -115,9 +115,8 @@ __FBSDID("$FreeBSD$");
 #include <geom/geom.h>
 
 /*
- * SWB_NPAGES must be a power of 2.  It may be set to 1, 2, 4, 8, 16
- * or 32 pages per allocation.
- * The 32-page limit is due to the radix code (kern/subr_blist.c).
+ * MAX_PAGEOUT_CLUSTER must be a power of 2 between 1 and 64.
+ * The 64-page limit is due to the radix code (kern/subr_blist.c).
  */
 #ifndef MAX_PAGEOUT_CLUSTER
 #define MAX_PAGEOUT_CLUSTER 16
@@ -381,18 +380,14 @@ struct pagerops swappagerops = {
 };
 
 /*
- * dmmax is in page-sized chunks with the new swap system.  It was
- * dev-bsized chunks in the old.  dmmax is always a power of 2.
- *
  * swap_*() routines are externally accessible.  swp_*() routines are
  * internal.
  */
-static int dmmax;
 static int nswap_lowat = 128;	/* in pages, swap_pager_almost_full warn */
 static int nswap_hiwat = 512;	/* in pages, swap_pager_almost_full warn */
 
-SYSCTL_INT(_vm, OID_AUTO, dmmax,
-	CTLFLAG_RD, &dmmax, 0, "Maximum size of a swap block");
+SYSCTL_INT(_vm, OID_AUTO, dmmax, CTLFLAG_RD, &nsw_cluster_max, 0,
+    "Maximum size of a swap block in pages");
 
 static void	swp_sizecheck(void);
 static void	swp_pager_async_iodone(struct buf *bp);
@@ -499,11 +494,6 @@ swap_pager_init(void)
 	mtx_init(&sw_alloc_mtx, "swap_pager list", NULL, MTX_DEF);
 	mtx_init(&sw_dev_mtx, "swapdev", NULL, MTX_DEF);
 	sx_init(&sw_alloc_sx, "swspsx");
-
-	/*
-	 * Device Stripe, in PAGE_SIZE'd blocks
-	 */
-	dmmax = SWB_NPAGES * 2;
 }
 
 /*
@@ -2252,7 +2242,7 @@ swaponsomething(struct vnode *vp, void *id, u_long nbl
 	sp->sw_end = dvbase + nblks;
 	TAILQ_INSERT_TAIL(&swtailq, sp, sw_list);
 	nswapdev++;
-	swap_pager_avail += nblks;
+	swap_pager_avail += nblks - 2;
 	swap_total += (vm_ooffset_t)nblks * PAGE_SIZE;
 	swapon_check_swzone(swap_total / PAGE_SIZE);
 	swp_sizecheck();
@@ -2324,7 +2314,7 @@ done:
 static int
 swapoff_one(struct swdevt *sp, struct ucred *cred)
 {
-	u_long nblks, dvbase;
+	u_long nblks;
 #ifdef MAC
 	int error;
 #endif
@@ -2355,10 +2345,7 @@ swapoff_one(struct swdevt *sp, struct ucred *cred)
 	 */
 	mtx_lock(&sw_dev_mtx);
 	sp->sw_flags |= SW_CLOSING;
-	for (dvbase = 0; dvbase < sp->sw_end; dvbase += dmmax) {
-		swap_pager_avail -= blist_fill(sp->sw_blist,
-		     dvbase, dmmax);
-	}
+	swap_pager_avail -= blist_fill(sp->sw_blist, 0, nblks);
 	swap_total -= (vm_ooffset_t)nblks * PAGE_SIZE;
 	mtx_unlock(&sw_dev_mtx);
 


More information about the svn-src-all mailing list