svn commit: r329616 - in user/jeff/numa/sys: kern sys vm
Jeff Roberson
jeff at FreeBSD.org
Tue Feb 20 02:30:52 UTC 2018
Author: jeff
Date: Tue Feb 20 02:30:51 2018
New Revision: 329616
URL: https://svnweb.freebsd.org/changeset/base/329616
Log:
PID Controlled page daemon
Differential Revision: https://reviews.freebsd.org/D14402
Added:
user/jeff/numa/sys/kern/subr_pidctrl.c (contents, props changed)
user/jeff/numa/sys/sys/pidctrl.h (contents, props changed)
Modified:
user/jeff/numa/sys/vm/vm_meter.c
user/jeff/numa/sys/vm/vm_page.c
user/jeff/numa/sys/vm/vm_pageout.c
user/jeff/numa/sys/vm/vm_pagequeue.h
Added: user/jeff/numa/sys/kern/subr_pidctrl.c
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ user/jeff/numa/sys/kern/subr_pidctrl.c Tue Feb 20 02:30:51 2018 (r329616)
@@ -0,0 +1,157 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2017, Jeffrey Roberson <jeff at freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/sysctl.h>
+#include <sys/pidctrl.h>
+
+void
+pidctrl_init(struct pidctrl *pc, int interval, int setpoint, int bound,
+ int Kpd, int Kid, int Kdd)
+{
+
+ bzero(pc, sizeof(*pc));
+ pc->pc_setpoint = setpoint;
+ pc->pc_interval = interval;
+ pc->pc_bound = bound * setpoint * Kid;
+ pc->pc_Kpd = Kpd;
+ pc->pc_Kid = Kid;
+ pc->pc_Kdd = Kdd;
+}
+
+void
+pidctrl_init_sysctl(struct pidctrl *pc, struct sysctl_oid_list *parent)
+{
+
+ SYSCTL_ADD_INT(NULL, parent, OID_AUTO, "error", CTLFLAG_RD,
+ &pc->pc_error, 0, "Current difference from setpoint value (P)");
+ SYSCTL_ADD_INT(NULL, parent, OID_AUTO, "olderror", CTLFLAG_RD,
+ &pc->pc_olderror, 0, "Error value from last interval");
+ SYSCTL_ADD_INT(NULL, parent, OID_AUTO, "integral", CTLFLAG_RD,
+ &pc->pc_integral, 0, "Accumulated error integral (I)");
+ SYSCTL_ADD_INT(NULL, parent, OID_AUTO, "derivative",
+ CTLFLAG_RD, &pc->pc_derivative, 0, "Error derivative (I)");
+ SYSCTL_ADD_INT(NULL, parent, OID_AUTO, "input", CTLFLAG_RD,
+ &pc->pc_input, 0, "Last controller process variable input");
+ SYSCTL_ADD_INT(NULL, parent, OID_AUTO, "output", CTLFLAG_RD,
+ &pc->pc_output, 0, "Last controller output");
+ SYSCTL_ADD_INT(NULL, parent, OID_AUTO, "ticks", CTLFLAG_RD,
+ &pc->pc_ticks, 0, "Last controler runtime");
+ SYSCTL_ADD_INT(NULL, parent, OID_AUTO, "setpoint", CTLFLAG_RW,
+ &pc->pc_setpoint, 0, "Desired level for process variable");
+ SYSCTL_ADD_INT(NULL, parent, OID_AUTO, "interval", CTLFLAG_RD,
+ &pc->pc_interval, 0, "Interval between calculations (ticks)");
+ SYSCTL_ADD_INT(NULL, parent, OID_AUTO, "bound", CTLFLAG_RW,
+ &pc->pc_bound, 0, "Integral wind-up limit");
+ SYSCTL_ADD_INT(NULL, parent, OID_AUTO, "kpd", CTLFLAG_RW,
+ &pc->pc_Kpd, 0, "Inverse of proportional gain");
+ SYSCTL_ADD_INT(NULL, parent, OID_AUTO, "kid", CTLFLAG_RW,
+ &pc->pc_Kid, 0, "Inverse of integral gain");
+ SYSCTL_ADD_INT(NULL, parent, OID_AUTO, "kdd", CTLFLAG_RW,
+ &pc->pc_Kdd, 0, "Inverse of derivative gain");
+}
+
+int
+pidctrl_classic(struct pidctrl *pc, int input)
+{
+ int output, error;
+ int Kpd, Kid, Kdd;
+
+ error = pc->pc_setpoint - input;
+ pc->pc_ticks = ticks;
+ pc->pc_olderror = pc->pc_error;
+
+ /* Fetch gains and prevent divide by zero. */
+ Kpd = MAX(pc->pc_Kpd, 1);
+ Kid = MAX(pc->pc_Kid, 1);
+ Kdd = MAX(pc->pc_Kdd, 1);
+
+ /* Compute P (proportional error), I (integral), D (derivative) */
+ pc->pc_error = error;
+ pc->pc_integral =
+ MAX(MIN(pc->pc_integral + error, pc->pc_bound), -pc->pc_bound);
+ pc->pc_derivative = error - pc->pc_olderror;
+
+ /* Divide by inverse gain values to produce output. */
+ output = ((pc->pc_error / pc->pc_Kpd) +
+ (pc->pc_integral / pc->pc_Kid)) +
+ (pc->pc_derivative / pc->pc_Kdd);
+ /* Save for sysctl. */
+ pc->pc_output = output;
+ pc->pc_input = input;
+
+ return output;
+}
+
+int
+pidctrl_daemon(struct pidctrl *pc, int input)
+{
+ int output, error;
+ int Kpd, Kid, Kdd;
+
+ error = pc->pc_setpoint - input;
+ /*
+ * When ticks expired we reset our variables and start a new
+ * interval. If we're called multiple times during one interval
+ * we attempt to report a target as if the entire error came at
+ * the interval boundary.
+ */
+ if ((u_int)(ticks - pc->pc_ticks) >= pc->pc_interval) {
+ pc->pc_ticks = ticks;
+ pc->pc_olderror = pc->pc_error;
+ pc->pc_output = pc->pc_error = 0;
+ } else {
+ error = MAX(error + pc->pc_error, 0);
+ }
+
+ /* Fetch gains and prevent divide by zero. */
+ Kpd = MAX(pc->pc_Kpd, 1);
+ Kid = MAX(pc->pc_Kid, 1);
+ Kdd = MAX(pc->pc_Kdd, 1);
+
+ /* Compute P (proportional error), I (integral), D (derivative) */
+ pc->pc_error = error;
+ pc->pc_integral =
+ MAX(MIN(pc->pc_integral + error, pc->pc_bound), 0);
+ pc->pc_derivative = error - pc->pc_olderror;
+
+ /* Divide by inverse gain values to produce output. */
+ output = ((error / pc->pc_Kpd) +
+ (pc->pc_integral / pc->pc_Kid)) +
+ (pc->pc_derivative / pc->pc_Kdd);
+ output = MAX(output - pc->pc_output, 0);
+ pc->pc_output += output;
+ pc->pc_input = input;
+
+ return output;
+}
Added: user/jeff/numa/sys/sys/pidctrl.h
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ user/jeff/numa/sys/sys/pidctrl.h Tue Feb 20 02:30:51 2018 (r329616)
@@ -0,0 +1,123 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2017, Jeffrey Roberson <jeff at freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_PIDCTRL_H_
+#define _SYS_PIDCTRL_H_
+
+/*
+ * Proportional Integral Derivative controller.
+ *
+ * This controller is intended to replace a multitude of threshold based
+ * daemon regulation systems. These systems produce sharp sawtooths of
+ * activity which can cause latency spikes and other undesireable bursty
+ * behavior. The PID controller adapts to changing load conditions and
+ * adjusts the work done by the daemon to keep a smoother output.
+ *
+ * The setpoint can be thought of as a single watermark that the controller
+ * is always trying to reach. Compared to a high water/low water type
+ * algorithm the pid controller is dynamically deciding the low water and
+ * regulating to the high water. The setpoint should be high enough that
+ * the controller and daemon have time to observe the rise in value and
+ * respond to it, else the resource may be exhausted. More frequent wakeups
+ * permit higher setpoints and less underutilized resources.
+ *
+ * The controller has been optimised for simplicity of math making it quite
+ * inexpensive to execute. There is no floating point and so the gains must
+ * be the inverse of whole integers.
+ *
+ * Failing to measure and tune the gain parameters can result in wild
+ * oscillations in output. It is strongly encouraged that controllers are
+ * tested and tuned under a wide variety of workloads before gain values are
+ * picked. Some reasonable defaults are provided below.
+ */
+
+struct pidctrl {
+ /* Saved control variables. */
+ int pc_error; /* Current error. */
+ int pc_olderror; /* Saved error for derivative. */
+ int pc_integral; /* Integral accumulator. */
+ int pc_derivative; /* Change from last error. */
+ int pc_input; /* Last input. */
+ int pc_output; /* Last output. */
+ int pc_ticks; /* Last sampling time. */
+ /* configuration options, runtime tunable via sysctl */
+ int pc_setpoint; /* Desired level */
+ int pc_interval; /* Update interval in ticks. */
+ int pc_bound; /* Integral wind-up limit. */
+ int pc_Kpd; /* Proportional gain divisor. */
+ int pc_Kid; /* Integral gain divisor. */
+ int pc_Kdd; /* Derivative gain divisor. */
+};
+
+/*
+ * Reasonable default divisors.
+ *
+ * Actual gains are 1/divisor. Gains interact in complex ways with the
+ * setpoint and interval. Measurement under multiple loads should be
+ * taken to ensure adequate stability and rise time.
+ */
+#define PIDCTRL_KPD 3 /* Default proportional divisor. */
+#define PIDCTRL_KID 4 /* Default integral divisor. */
+#define PIDCTRL_KDD 8 /* Default derivative divisor. */
+#define PIDCTRL_BOUND 4 /* Bound factor, setpoint multiple. */
+
+struct sysctl_oid_list;
+
+void pidctrl_init(struct pidctrl *pc, int interval, int setpoint,
+ int bound, int Kpd, int Kid, int Kdd);
+void pidctrl_init_sysctl(struct pidctrl *pc, struct sysctl_oid_list *parent);
+
+/*
+ * This is the classic PID controller where the interval is clamped to
+ * [-bound, bound] and the output may be negative. This should be used
+ * in continuous control loops that can adjust a process variable in
+ * either direction. This is a descrete time controller and should
+ * only be called once per-interval or the derivative term will be
+ * inaccurate.
+ */
+int pidctrl_classic(struct pidctrl *pc, int input);
+
+/*
+ * This controler is intended for consumer type daemons that can only
+ * regulate in a positive direction, that is to say, they can not exert
+ * positive pressure on the process variable or input. They can only
+ * reduce it by doing work. As such the integral is bound between [0, bound]
+ * and the output is similarly a positive value reflecting the units of
+ * work necessary to be completed in the current interval to eliminate error.
+ *
+ * It is a descrete time controller but can be invoked more than once in a
+ * given time interval for ease of client implementation. This should only
+ * be done in overload situations or the controller may not produce a stable
+ * output. Calling it less frequently when there is no work to be done will
+ * increase the rise time but should otherwise be harmless.
+ */
+int pidctrl_daemon(struct pidctrl *pc, int input);
+
+#endif /* !_SYS_PIDCTRL_H_ */
Modified: user/jeff/numa/sys/vm/vm_meter.c
==============================================================================
--- user/jeff/numa/sys/vm/vm_meter.c Tue Feb 20 02:18:30 2018 (r329615)
+++ user/jeff/numa/sys/vm/vm_meter.c Tue Feb 20 02:30:51 2018 (r329616)
@@ -473,3 +473,58 @@ vm_laundry_count(void)
return vm_pagequeue_count(PQ_LAUNDRY);
}
+static void
+vm_domain_stats_init(struct vm_domain *vmd, struct sysctl_oid *parent)
+{
+ struct sysctl_oid *oid;
+
+ vmd->vmd_oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(parent), OID_AUTO,
+ vmd->vmd_name, CTLFLAG_RD, NULL, "");
+ oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(vmd->vmd_oid), OID_AUTO,
+ "stats", CTLFLAG_RD, NULL, "");
+ SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
+ "free_count", CTLFLAG_RD, &vmd->vmd_free_count, 0,
+ "Free pages");
+ SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
+ "active", CTLFLAG_RD, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_cnt, 0,
+ "Active pages");
+ SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
+ "inactive", CTLFLAG_RD, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_cnt, 0,
+ "Inactive pages");
+ SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
+ "laundry", CTLFLAG_RD, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_cnt, 0,
+ "laundry pages");
+ SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "unswappable",
+ CTLFLAG_RD, &vmd->vmd_pagequeues[PQ_UNSWAPPABLE].pq_cnt, 0,
+ "Unswappable pages");
+ SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
+ "inactive_target", CTLFLAG_RD, &vmd->vmd_inactive_target, 0,
+ "Target inactive pages");
+ SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
+ "free_target", CTLFLAG_RD, &vmd->vmd_free_target, 0,
+ "Target free pages");
+ SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
+ "free_reserved", CTLFLAG_RD, &vmd->vmd_free_reserved, 0,
+ "Reserved free pages");
+ SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
+ "free_min", CTLFLAG_RD, &vmd->vmd_free_min, 0,
+ "Minimum free pages");
+ SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
+ "free_severe", CTLFLAG_RD, &vmd->vmd_free_severe, 0,
+ "Severe free pages");
+
+}
+
+static void
+vm_stats_init(void *arg __unused)
+{
+ struct sysctl_oid *oid;
+ int i;
+
+ oid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_vm), OID_AUTO,
+ "domain", CTLFLAG_RD, NULL, "");
+ for (i = 0; i < vm_ndomains; i++)
+ vm_domain_stats_init(VM_DOMAIN(i), oid);
+}
+
+SYSINIT(vmstats_init, SI_SUB_VM_CONF, SI_ORDER_FIRST, vm_stats_init, NULL);
Modified: user/jeff/numa/sys/vm/vm_page.c
==============================================================================
--- user/jeff/numa/sys/vm/vm_page.c Tue Feb 20 02:18:30 2018 (r329615)
+++ user/jeff/numa/sys/vm/vm_page.c Tue Feb 20 02:30:51 2018 (r329616)
@@ -476,6 +476,7 @@ vm_page_domain_init(int domain)
}
}
mtx_init(&vmd->vmd_free_mtx, "vm page free queue", NULL, MTX_DEF);
+ snprintf(vmd->vmd_name, sizeof(vmd->vmd_name), "%d", domain);
}
/*
@@ -2934,9 +2935,9 @@ vm_wait(void)
* consume all freed pages while old allocators wait.
*/
mtx_lock(&vm_domainset_lock);
- if (vm_page_count_min()) {
+ if (vm_page_count_severe()) {
vm_min_waiters++;
- msleep(&vm_min_domains, &vm_domainset_lock, PVM,
+ msleep(&vm_severe_domains, &vm_domainset_lock, PVM,
"vmwait", 0);
}
mtx_unlock(&vm_domainset_lock);
@@ -3204,7 +3205,7 @@ vm_domain_free_wakeup(struct vm_domain *vmd)
* high water mark. And wakeup scheduler process if we have
* lots of memory. this process will swapin processes.
*/
- if (vmd->vmd_pages_needed && !vm_paging_min(vmd)) {
+ if (vmd->vmd_pages_needed && !vm_paging_severe(vmd)) {
vmd->vmd_pages_needed = false;
wakeup(&vmd->vmd_free_count);
}
Modified: user/jeff/numa/sys/vm/vm_pageout.c
==============================================================================
--- user/jeff/numa/sys/vm/vm_pageout.c Tue Feb 20 02:18:30 2018 (r329615)
+++ user/jeff/numa/sys/vm/vm_pageout.c Tue Feb 20 02:30:51 2018 (r329616)
@@ -124,7 +124,7 @@ static void vm_pageout(void);
static void vm_pageout_init(void);
static int vm_pageout_clean(vm_page_t m, int *numpagedout);
static int vm_pageout_cluster(vm_page_t m);
-static bool vm_pageout_scan(struct vm_domain *vmd, int pass);
+static bool vm_pageout_scan(struct vm_domain *vmd, int pass, int shortage);
static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage,
int starting_page_shortage);
@@ -146,7 +146,7 @@ SDT_PROBE_DEFINE(vm, , , vm__lowmem_scan);
/* Pagedaemon activity rates, in subdivisions of one second. */
#define VM_LAUNDER_RATE 10
-#define VM_INACT_SCAN_RATE 2
+#define VM_INACT_SCAN_RATE 10
static int vm_pageout_oom_seq = 12;
@@ -1206,7 +1206,7 @@ out:
* queue scan to meet the target.
*/
static bool
-vm_pageout_scan(struct vm_domain *vmd, int pass)
+vm_pageout_scan(struct vm_domain *vmd, int pass, int shortage)
{
struct pgo_pglist pglist;
vm_page_t m, next;
@@ -1251,7 +1251,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
*/
if (pass > 0) {
deficit = atomic_readandclear_int(&vmd->vmd_pageout_deficit);
- page_shortage = vm_paging_target(vmd) + deficit;
+ page_shortage = shortage + deficit;
} else
page_shortage = deficit = 0;
starting_page_shortage = page_shortage;
@@ -1505,7 +1505,7 @@ lock_queue:
*/
inactq_shortage = vmd->vmd_inactive_target - (pq->pq_cnt +
vmd->vmd_pagequeues[PQ_LAUNDRY].pq_cnt / act_scan_laundry_weight) +
- vm_paging_target(vmd) + deficit + addl_page_shortage;
+ shortage + deficit + addl_page_shortage;
inactq_shortage *= act_scan_laundry_weight;
pq = &vmd->vmd_pagequeues[PQ_ACTIVE];
@@ -1875,12 +1875,13 @@ static void
vm_pageout_worker(void *arg)
{
struct vm_domain *vmd;
- int domain, pass;
+ int domain, pass, shortage;
bool target_met;
domain = (uintptr_t)arg;
vmd = VM_DOMAIN(domain);
pass = 0;
+ shortage = 0;
target_met = true;
/*
@@ -1918,54 +1919,40 @@ vm_pageout_worker(void *arg)
vmd->vmd_pages_needed = false;
wakeup(&vmd->vmd_free_count);
}
-
/*
- * Do not clear vmd_pageout_wanted until we reach our free page
- * target. Otherwise, we may be awakened over and over again,
- * wasting CPU time.
+ * Might the page daemon need to run again?
*/
- if (vmd->vmd_pageout_wanted && target_met)
- vmd->vmd_pageout_wanted = false;
-
- /*
- * Might the page daemon receive a wakeup call?
- */
- if (vmd->vmd_pageout_wanted) {
+ if (vm_paging_needed(vmd, vmd->vmd_free_count)) {
/*
- * No. Either vmd_pageout_wanted was set by another
- * thread during the previous scan, which must have
- * been a level 0 scan, or vmd_pageout_wanted was
- * already set and the scan failed to free enough
- * pages. If we haven't yet performed a level >= 1
- * (page reclamation) scan, then increase the level
- * and scan again now. Otherwise, sleep a bit and
- * try again later.
+ * Yes, the scan failed to free enough pages. If
+ * we have performed a level >= 1 (page reclamation)
+ * scan, then sleep a bit and try again.
*/
vm_domain_free_unlock(vmd);
- if (pass >= 1)
+ if (pass > 1)
pause("pwait", hz / VM_INACT_SCAN_RATE);
- pass++;
} else {
/*
- * Yes. If threads are still sleeping in VM_WAIT
- * then we immediately start a new scan. Otherwise,
- * sleep until the next wakeup or until pages need to
- * have their reference stats updated.
+ * No, sleep until the next wakeup or until pages
+ * need to have their reference stats updated.
*/
- if (vmd->vmd_pages_needed) {
- vm_domain_free_unlock(vmd);
- if (pass == 0)
- pass++;
- } else if (mtx_sleep(&vmd->vmd_pageout_wanted,
+ vmd->vmd_pageout_wanted = false;
+ if (mtx_sleep(&vmd->vmd_pageout_wanted,
vm_domain_free_lockptr(vmd), PDROP | PVM,
- "psleep", hz) == 0) {
+ "psleep", hz / VM_INACT_SCAN_RATE) == 0)
VM_CNT_INC(v_pdwakeups);
- pass = 1;
- } else
- pass = 0;
}
+ shortage = pidctrl_daemon(&vmd->vmd_pid, vmd->vmd_free_count);
+ if (shortage && pass == 0)
+ pass = 1;
- target_met = vm_pageout_scan(vmd, pass);
+ target_met = vm_pageout_scan(vmd, pass, shortage);
+ /*
+ * If the target was not met we must increase the pass to
+ * more aggressively reclaim.
+ */
+ if (!target_met)
+ pass++;
}
}
@@ -1976,6 +1963,7 @@ static void
vm_pageout_init_domain(int domain)
{
struct vm_domain *vmd;
+ struct sysctl_oid *oid;
int lim, i, j;
vmd = VM_DOMAIN(domain);
@@ -2003,10 +1991,10 @@ vm_pageout_init_domain(int domain)
vmd->vmd_inactive_target = vmd->vmd_free_count / 3;
/*
- * Set the default wakeup threshold to be 10% above the minimum
- * page limit. This keeps the steady state out of shortfall.
+ * Set the default wakeup threshold to be 10% below the paging
+ * target. This keeps the steady state out of shortfall.
*/
- vmd->vmd_pageout_wakeup_thresh = (vmd->vmd_free_min / 10) * 11;
+ vmd->vmd_pageout_wakeup_thresh = (vmd->vmd_free_target / 10) * 9;
/*
* Target amount of memory to move out of the laundry queue during a
@@ -2031,6 +2019,14 @@ vm_pageout_init_domain(int domain)
for (i = 0; i < PQ_COUNT; i++)
for (j = 0; j < BPQ_COUNT; j++)
vmd->vmd_pagequeues[i].pq_bpqs[j].bpq_lim = lim;
+
+ /* Initialize the pageout daemon pid controller. */
+ pidctrl_init(&vmd->vmd_pid, hz / VM_INACT_SCAN_RATE,
+ vmd->vmd_free_target, PIDCTRL_BOUND,
+ PIDCTRL_KPD, PIDCTRL_KID, PIDCTRL_KDD);
+ oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(vmd->vmd_oid), OID_AUTO,
+ "pidctrl", CTLFLAG_RD, NULL, "");
+ pidctrl_init_sysctl(&vmd->vmd_pid, SYSCTL_CHILDREN(oid));
}
static void
Modified: user/jeff/numa/sys/vm/vm_pagequeue.h
==============================================================================
--- user/jeff/numa/sys/vm/vm_pagequeue.h Tue Feb 20 02:18:30 2018 (r329615)
+++ user/jeff/numa/sys/vm/vm_pagequeue.h Tue Feb 20 02:30:51 2018 (r329616)
@@ -86,7 +86,10 @@ struct vm_pagequeue {
} __aligned(CACHE_LINE_SIZE);
#include <vm/uma.h>
+#include <sys/pidctrl.h>
+struct sysctl_oid;
+
struct vm_domain {
struct vm_pagequeue vmd_pagequeues[PQ_COUNT];
struct mtx_padalign vmd_free_mtx;
@@ -97,6 +100,7 @@ struct vm_domain {
long vmd_segs; /* bitmask of the segments */
/* Paging control variables, locked by domain_free_mtx. */
+ struct pidctrl vmd_pid; /* Pageout controller. */
u_int vmd_free_count;
boolean_t vmd_oom;
int vmd_oom_seq;
@@ -129,6 +133,10 @@ struct vm_domain {
u_int vmd_pageout_wakeup_thresh;/* (c) min pages to wake pagedaemon */
u_int vmd_interrupt_free_min; /* (c) reserved pages for int code */
u_int vmd_free_severe; /* (c) severe page depletion point */
+
+ /* Name for sysctl etc. */
+ struct sysctl_oid *vmd_oid;
+ char vmd_name[sizeof(__XSTRING(MAXMEMDOM))];
} __aligned(CACHE_LINE_SIZE);
extern struct vm_domain vm_dom[MAXMEMDOM];
More information about the svn-src-user
mailing list