svn commit: r339187 - in head/sys: kern sys vm

Matt Macy mmacy at FreeBSD.org
Fri Oct 5 05:50:58 UTC 2018


Author: mmacy
Date: Fri Oct  5 05:50:56 2018
New Revision: 339187
URL: https://svnweb.freebsd.org/changeset/base/339187

Log:
  eliminate locking surrounding ui_vmsize and swap reserve by using atomics
  
  Change swap_reserve and swap_total to be in units of pages so that
  swap reservations can be done using only atomics instead of using a single
  global mutex for swap_reserve and a single mutex for all processes running
  under the same uid for uid accounting.
  
  Results in mmap speed up and a 70% increase in brk calls / second.
  
  Reviewed by:	alc@, markj@, kib@
  Approved by:	re (delphij@)
  Differential Revision:	https://reviews.freebsd.org/D16273

Modified:
  head/sys/kern/kern_resource.c
  head/sys/sys/resourcevar.h
  head/sys/vm/swap_pager.c

Modified: head/sys/kern/kern_resource.c
==============================================================================
--- head/sys/kern/kern_resource.c	Thu Oct  4 23:55:03 2018	(r339186)
+++ head/sys/kern/kern_resource.c	Fri Oct  5 05:50:56 2018	(r339187)
@@ -1276,7 +1276,6 @@ uifind(uid_t uid)
 	racct_create(&new_uip->ui_racct);
 	refcount_init(&new_uip->ui_ref, 1);
 	new_uip->ui_uid = uid;
-	mtx_init(&new_uip->ui_vmsize_mtx, "ui_vmsize", NULL, MTX_DEF);
 
 	rw_wlock(&uihashtbl_lock);
 	/*
@@ -1291,7 +1290,6 @@ uifind(uid_t uid)
 	} else {
 		rw_wunlock(&uihashtbl_lock);
 		racct_destroy(&new_uip->ui_racct);
-		mtx_destroy(&new_uip->ui_vmsize_mtx);
 		free(new_uip, M_UIDINFO);
 	}
 	return (uip);
@@ -1352,7 +1350,6 @@ uifree(struct uidinfo *uip)
 	if (uip->ui_vmsize != 0)
 		printf("freeing uidinfo: uid = %d, swapuse = %lld\n",
 		    uip->ui_uid, (unsigned long long)uip->ui_vmsize);
-	mtx_destroy(&uip->ui_vmsize_mtx);
 	free(uip, M_UIDINFO);
 }
 

Modified: head/sys/sys/resourcevar.h
==============================================================================
--- head/sys/sys/resourcevar.h	Thu Oct  4 23:55:03 2018	(r339186)
+++ head/sys/sys/resourcevar.h	Fri Oct  5 05:50:56 2018	(r339187)
@@ -93,12 +93,10 @@ struct racct;
  * (a) Constant from inception
  * (b) Lockless, updated using atomics
  * (c) Locked by global uihashtbl_lock
- * (d) Locked by the ui_vmsize_mtx
  */
 struct uidinfo {
 	LIST_ENTRY(uidinfo) ui_hash;	/* (c) hash chain of uidinfos */
-	struct mtx ui_vmsize_mtx;
-	vm_ooffset_t ui_vmsize;		/* (d) swap reservation by uid */
+	u_long ui_vmsize;	/* (b) pages of swap reservation by uid */
 	long	ui_sbsize;		/* (b) socket buffer space consumed */
 	long	ui_proccnt;		/* (b) number of processes */
 	long	ui_ptscnt;		/* (b) number of pseudo-terminals */

Modified: head/sys/vm/swap_pager.c
==============================================================================
--- head/sys/vm/swap_pager.c	Thu Oct  4 23:55:03 2018	(r339186)
+++ head/sys/vm/swap_pager.c	Fri Oct  5 05:50:56 2018	(r339187)
@@ -151,12 +151,16 @@ static int nswapdev;		/* Number of swap devices */
 int swap_pager_avail;
 static struct sx swdev_syscall_lock;	/* serialize swap(on|off) */
 
-static vm_ooffset_t swap_total;
-SYSCTL_QUAD(_vm, OID_AUTO, swap_total, CTLFLAG_RD, &swap_total, 0,
-    "Total amount of available swap storage.");
-static vm_ooffset_t swap_reserved;
-SYSCTL_QUAD(_vm, OID_AUTO, swap_reserved, CTLFLAG_RD, &swap_reserved, 0,
+static u_long swap_reserved;
+static u_long swap_total;
+static int sysctl_page_shift(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_vm, OID_AUTO, swap_reserved, CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+    &swap_reserved, 0, sysctl_page_shift, "A", 
     "Amount of swap storage needed to back all allocated anonymous memory.");
+SYSCTL_PROC(_vm, OID_AUTO, swap_total, CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+    &swap_total, 0, sysctl_page_shift, "A", 
+    "Total amount of available swap storage.");
+
 static int overcommit = 0;
 SYSCTL_INT(_vm, VM_OVERCOMMIT, overcommit, CTLFLAG_RW, &overcommit, 0,
     "Configure virtual memory overcommit behavior. See tuning(7) "
@@ -173,6 +177,16 @@ SYSCTL_ULONG(_vm, OID_AUTO, swap_maxpages, CTLFLAG_RD,
 #define	SWAP_RESERVE_RLIMIT_ON		(1 << 1)
 #define	SWAP_RESERVE_ALLOW_NONWIRED	(1 << 2)
 
+static int
+sysctl_page_shift(SYSCTL_HANDLER_ARGS)
+{
+	uint64_t newval;
+	u_long value = *(u_long *)arg1;
+
+	newval = ((uint64_t)value) << PAGE_SHIFT;
+	return (sysctl_handle_64(oidp, &newval, 0, req));
+}
+
 int
 swap_reserve(vm_ooffset_t incr)
 {
@@ -183,7 +197,7 @@ swap_reserve(vm_ooffset_t incr)
 int
 swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *cred)
 {
-	vm_ooffset_t r, s;
+	u_long r, s, prev, pincr;
 	int res, error;
 	static int curfail;
 	static struct timeval lastfail;
@@ -191,8 +205,8 @@ swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *
 
 	uip = cred->cr_ruidinfo;
 
-	if (incr & PAGE_MASK)
-		panic("swap_reserve: & PAGE_MASK");
+	KASSERT((incr & PAGE_MASK) == 0, ("%s: incr: %ju & PAGE_MASK", __func__,
+	    (uintmax_t)incr));
 
 #ifdef RACCT
 	if (racct_enable) {
@@ -204,36 +218,33 @@ swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *
 	}
 #endif
 
+	pincr = atop(incr);
 	res = 0;
-	mtx_lock(&sw_dev_mtx);
-	r = swap_reserved + incr;
+	prev = atomic_fetchadd_long(&swap_reserved, pincr);
+	r = prev + pincr;
 	if (overcommit & SWAP_RESERVE_ALLOW_NONWIRED) {
 		s = vm_cnt.v_page_count - vm_cnt.v_free_reserved -
 		    vm_wire_count();
-		s *= PAGE_SIZE;
 	} else
 		s = 0;
 	s += swap_total;
 	if ((overcommit & SWAP_RESERVE_FORCE_ON) == 0 || r <= s ||
 	    (error = priv_check(curthread, PRIV_VM_SWAP_NOQUOTA)) == 0) {
 		res = 1;
-		swap_reserved = r;
+	} else {
+		prev = atomic_fetchadd_long(&swap_reserved, -pincr);
+		if (prev < pincr)
+			panic("swap_reserved < incr on overcommit fail");
 	}
-	mtx_unlock(&sw_dev_mtx);
-
 	if (res) {
-		UIDINFO_VMSIZE_LOCK(uip);
+		prev = atomic_fetchadd_long(&uip->ui_vmsize, pincr);
 		if ((overcommit & SWAP_RESERVE_RLIMIT_ON) != 0 &&
-		    uip->ui_vmsize + incr > lim_cur(curthread, RLIMIT_SWAP) &&
-		    priv_check(curthread, PRIV_VM_SWAP_NORLIMIT))
+		    prev + pincr > lim_cur(curthread, RLIMIT_SWAP) &&
+		    priv_check(curthread, PRIV_VM_SWAP_NORLIMIT)) {
 			res = 0;
-		else
-			uip->ui_vmsize += incr;
-		UIDINFO_VMSIZE_UNLOCK(uip);
-		if (!res) {
-			mtx_lock(&sw_dev_mtx);
-			swap_reserved -= incr;
-			mtx_unlock(&sw_dev_mtx);
+			prev = atomic_fetchadd_long(&uip->ui_vmsize, -pincr);
+			if (prev < pincr)
+				panic("uip->ui_vmsize < incr on overcommit fail");
 		}
 	}
 	if (!res && ppsratecheck(&lastfail, &curfail, 1)) {
@@ -242,7 +253,7 @@ swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *
 	}
 
 #ifdef RACCT
-	if (!res) {
+	if (racct_enable && !res) {
 		PROC_LOCK(curproc);
 		racct_sub(curproc, RACCT_SWAP, incr);
 		PROC_UNLOCK(curproc);
@@ -256,22 +267,20 @@ void
 swap_reserve_force(vm_ooffset_t incr)
 {
 	struct uidinfo *uip;
+	u_long pincr;
 
-	mtx_lock(&sw_dev_mtx);
-	swap_reserved += incr;
-	mtx_unlock(&sw_dev_mtx);
+	KASSERT((incr & PAGE_MASK) == 0, ("%s: incr: %ju & PAGE_MASK", __func__,
+	    (uintmax_t)incr));
 
-#ifdef RACCT
 	PROC_LOCK(curproc);
-	racct_add_force(curproc, RACCT_SWAP, incr);
-	PROC_UNLOCK(curproc);
+#ifdef RACCT
+	if (racct_enable)
+		racct_add_force(curproc, RACCT_SWAP, incr);
 #endif
-
-	uip = curthread->td_ucred->cr_ruidinfo;
-	PROC_LOCK(curproc);
-	UIDINFO_VMSIZE_LOCK(uip);
-	uip->ui_vmsize += incr;
-	UIDINFO_VMSIZE_UNLOCK(uip);
+	pincr = atop(incr);
+	atomic_add_long(&swap_reserved, pincr);
+	uip = curproc->p_ucred->cr_ruidinfo;
+	atomic_add_long(&uip->ui_vmsize, pincr);
 	PROC_UNLOCK(curproc);
 }
 
@@ -281,7 +290,7 @@ swap_release(vm_ooffset_t decr)
 	struct ucred *cred;
 
 	PROC_LOCK(curproc);
-	cred = curthread->td_ucred;
+	cred = curproc->p_ucred;
 	swap_release_by_cred(decr, cred);
 	PROC_UNLOCK(curproc);
 }
@@ -289,26 +298,26 @@ swap_release(vm_ooffset_t decr)
 void
 swap_release_by_cred(vm_ooffset_t decr, struct ucred *cred)
 {
+	u_long prev, pdecr;
  	struct uidinfo *uip;
 
 	uip = cred->cr_ruidinfo;
 
-	if (decr & PAGE_MASK)
-		panic("swap_release: & PAGE_MASK");
+	KASSERT((decr & PAGE_MASK) == 0, ("%s: decr: %ju & PAGE_MASK", __func__,
+	    (uintmax_t)decr));
 
-	mtx_lock(&sw_dev_mtx);
-	if (swap_reserved < decr)
+	pdecr = atop(decr);
+	prev = atomic_fetchadd_long(&swap_reserved, -pdecr);
+	if (prev < pdecr)
 		panic("swap_reserved < decr");
-	swap_reserved -= decr;
-	mtx_unlock(&sw_dev_mtx);
 
-	UIDINFO_VMSIZE_LOCK(uip);
-	if (uip->ui_vmsize < decr)
+	prev = atomic_fetchadd_long(&uip->ui_vmsize, -pdecr);
+	if (prev < pdecr)
 		printf("negative vmsize for uid = %d\n", uip->ui_uid);
-	uip->ui_vmsize -= decr;
-	UIDINFO_VMSIZE_UNLOCK(uip);
-
-	racct_sub_cred(cred, RACCT_SWAP, decr);
+#ifdef RACCT
+	if (racct_enable)
+		racct_sub_cred(cred, RACCT_SWAP, decr);
+#endif
 }
 
 #define SWM_POP		0x01	/* pop out			*/
@@ -2176,7 +2185,7 @@ swapon_check_swzone(void)
 {
 	unsigned long maxpages, npages;
 
-	npages = swap_total / PAGE_SIZE;
+	npages = swap_total;
 	/* absolute maximum we can handle assuming 100% efficiency */
 	maxpages = uma_zone_get_max(swblk_zone) * SWAP_META_PAGES;
 
@@ -2254,7 +2263,7 @@ swaponsomething(struct vnode *vp, void *id, u_long nbl
 	TAILQ_INSERT_TAIL(&swtailq, sp, sw_list);
 	nswapdev++;
 	swap_pager_avail += nblks - 2;
-	swap_total += (vm_ooffset_t)nblks * PAGE_SIZE;
+	swap_total += nblks;
 	swapon_check_swzone();
 	swp_sizecheck();
 	mtx_unlock(&sw_dev_mtx);
@@ -2351,7 +2360,7 @@ swapoff_one(struct swdevt *sp, struct ucred *cred)
 	mtx_lock(&sw_dev_mtx);
 	sp->sw_flags |= SW_CLOSING;
 	swap_pager_avail -= blist_fill(sp->sw_blist, 0, nblks);
-	swap_total -= (vm_ooffset_t)nblks * PAGE_SIZE;
+	swap_total -= nblks;
 	mtx_unlock(&sw_dev_mtx);
 
 	/*


More information about the svn-src-all mailing list