[TMPFS] patch for FreeBSD 8.2-RELEASE
Maurizio Vairani
maurizio.vairani at cloverinformatica.it
Mon Oct 17 08:24:59 UTC 2011
Hi list,
Gleb Kurtsou in this thread
http://lists.freebsd.org/pipermail/freebsd-fs/2011-October/012650.html
proposes a patch for solving the well known TMPSF problem: the free
space drops down to zero when ZFS consumes the kernel memory and there
isn't enough free swap space.
Unfortunately the patch is not directly applicable to FreeBSD
8.2-RELEASE so I have modified the source code using the Gleb's patch as
reference, recompiled and installed the new driver. I am testing it for
a week on my AMD64 16G RAM server reducing the swap space from 28G to
8G, 4G or none and seems the the problem is solved.
Regards
-Maurizio
/sys/fs/tmpfs/tmpfs.h
===================================================================
--- tmpfs.h.orig 2010-12-21 18:09:00.000000000 +0100 (v 1.17.2.2.2.1)
+++ tmpfs.h 2011-10-13 15:16:26.900043000 +0200 (working copy)
@@ -304,10 +304,30 @@
#define TMPFS_NODE_LOCK(node) mtx_lock(&(node)->tn_interlock)
#define TMPFS_NODE_UNLOCK(node) mtx_unlock(&(node)->tn_interlock)
-#define TMPFS_NODE_MTX(node) (&(node)->tn_interlock)
+#define TMPFS_NODE_MTX(node) (&(node)->tn_interlock)
+
+#ifdef INVARIANTS
+#define TMPFS_ASSERT_LOCKED(node) do { \
+ MPASS(node != NULL); \
+ MPASS(node->tn_vnode != NULL); \
+ if (!VOP_ISLOCKED(node->tn_vnode) && \
+ !mtx_owned(TMPFS_NODE_MTX(node))) \
+ panic("tmpfs: node is not locked: %p", node); \
+ } while (0)
+#define TMPFS_ASSERT_ELOCKED(node) do { \
+ MPASS((node) != NULL); \
+ MPASS((node)->tn_vnode != NULL); \
+ mtx_assert(TMPFS_NODE_MTX(node), MA_OWNED); \
+ ASSERT_VOP_LOCKED((node)->tn_vnode, "tmpfs"); \
+ } while (0)
+#else
+#define TMPFS_ASSERT_LOCKED(node) (void)0
+#define TMPFS_ASSERT_ELOCKED(node) (void)0
+#endif
#define TMPFS_VNODE_ALLOCATING 1
#define TMPFS_VNODE_WANT 2
+#define TMPFS_VNODE_DOOMED 4
/*
--------------------------------------------------------------------- */
/*
@@ -467,65 +487,30 @@
* Memory management stuff.
*/
-/* Amount of memory pages to reserve for the system (e.g., to not use by
- * tmpfs).
- * XXX: Should this be tunable through sysctl, for instance? */
-#define TMPFS_PAGES_RESERVED (4 * 1024 * 1024 / PAGE_SIZE)
-
/*
- * Returns information about the number of available memory pages,
- * including physical and virtual ones.
- *
- * If 'total' is TRUE, the value returned is the total amount of memory
- * pages configured for the system (either in use or free).
- * If it is FALSE, the value returned is the amount of free memory pages.
- *
- * Remember to remove TMPFS_PAGES_RESERVED from the returned value to avoid
- * excessive memory usage.
- *
+ * Number of reserved swap pages should not be lower than
+ * swap_pager_almost_full high water mark.
*/
+#define TMPFS_SWAP_MINRESERVED 1024
+
static __inline size_t
-tmpfs_mem_info(void)
+tmpfs_pages_max(struct tmpfs_mount *tmp)
{
- size_t size;
-
- size = swap_pager_avail + cnt.v_free_count + cnt.v_inactive_count;
- size -= size > cnt.v_wire_count ? cnt.v_wire_count : size;
- return size;
+ return (tmp->tm_pages_max);
}
-/* Returns the maximum size allowed for a tmpfs file system. This macro
- * must be used instead of directly retrieving the value from tm_pages_max.
- * The reason is that the size of a tmpfs file system is dynamic: it lets
- * the user store files as long as there is enough free memory (including
- * physical memory and swap space). Therefore, the amount of memory to be
- * used is either the limit imposed by the user during mount time or the
- * amount of available memory, whichever is lower. To avoid consuming all
- * the memory for a given mount point, the system will always reserve a
- * minimum of TMPFS_PAGES_RESERVED pages, which is also taken into account
- * by this macro (see above). */
static __inline size_t
-TMPFS_PAGES_MAX(struct tmpfs_mount *tmp)
+tmpfs_pages_used(struct tmpfs_mount *tmp)
{
- size_t freepages;
-
- freepages = tmpfs_mem_info();
- freepages -= freepages < TMPFS_PAGES_RESERVED ?
- freepages : TMPFS_PAGES_RESERVED;
-
- return MIN(tmp->tm_pages_max, freepages + tmp->tm_pages_used);
+ const size_t node_size = sizeof(struct tmpfs_node) +
+ sizeof(struct tmpfs_dirent);
+ size_t meta_pages;
+
+ meta_pages = howmany((uintmax_t)tmp->tm_nodes_inuse * node_size,
+ PAGE_SIZE);
+ return (meta_pages + tmp->tm_pages_used);
}
-/* Returns the available space for the given file system. */
-#define TMPFS_META_PAGES(tmp) (howmany((tmp)->tm_nodes_inuse *
(sizeof(struct tmpfs_node) \
- + sizeof(struct tmpfs_dirent)), PAGE_SIZE))
-#define TMPFS_FILE_PAGES(tmp) ((tmp)->tm_pages_used)
-
-#define TMPFS_PAGES_AVAIL(tmp) (TMPFS_PAGES_MAX(tmp) > \
- TMPFS_META_PAGES(tmp)+TMPFS_FILE_PAGES(tmp)? \
- TMPFS_PAGES_MAX(tmp) - TMPFS_META_PAGES(tmp) \
- - TMPFS_FILE_PAGES(tmp):0)
-
#endif
/*
--------------------------------------------------------------------- */
/sys/fs/tmpfs/tmpfs_subr.c
===================================================================
--- tmpfs_subr.c.orig 2010-12-21 18:09:00.000000000 +0100 (v
1.23.2.2.2.1)
+++ tmpfs_subr.c 2011-10-06 14:31:26.007163000 +0200 (working copy)
@@ -41,6 +41,7 @@
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/stat.h>
+#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/vnode.h>
#include <sys/vmmeter.h>
@@ -55,6 +56,60 @@
#include <fs/tmpfs/tmpfs_fifoops.h>
#include <fs/tmpfs/tmpfs_vnops.h>
+static long tmpfs_swap_reserved = TMPFS_SWAP_MINRESERVED * 2;
+
+SYSCTL_NODE(_vfs, OID_AUTO, tmpfs, CTLFLAG_RW, 0, "tmpfs memory file
system");
+
+static int
+sysctl_swap_reserved(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ long pages, bytes;
+
+ pages = *(long *)arg1;
+ bytes = pages * PAGE_SIZE;
+
+ error = sysctl_handle_long(oidp, &bytes, 0, req);
+ if (error || !req->newptr)
+ return (error);
+
+ pages = bytes / PAGE_SIZE;
+ if (pages < TMPFS_SWAP_MINRESERVED)
+ return (EINVAL);
+
+ *(long *)arg1 = pages;
+ return (0);
+}
+
+SYSCTL_PROC(_vfs_tmpfs, OID_AUTO, swap_reserved, CTLTYPE_LONG|CTLFLAG_RW,
+ &tmpfs_swap_reserved, 0, sysctl_swap_reserved, "L", "reserved swap
space");
+
+static __inline size_t
+tmpfs_pages_avail(struct tmpfs_mount *tmp, size_t req_pages)
+{
+ vm_ooffset_t avail;
+
+ if (tmpfs_pages_max(tmp) < tmpfs_pages_used(tmp) + req_pages)
+ return (0);
+
+ if (!vm_page_count_target())
+ return (1);
+
+ /*
+ * Fail if pagedaemon wasn't able to free desired number of pages and
+ * we are running out of swap.
+ */
+ avail = swap_pager_avail - vm_paging_target() - req_pages;
+ if (avail < tmpfs_swap_reserved) { /* avail is signed */
+ printf("tmpfs: low memory: available %jd, "
+ "paging target %d, requested %zd\n",
+ (intmax_t)swap_pager_avail, vm_paging_target(), req_pages);
+ return (0);
+ }
+
+ return (1);
+}
+
/*
--------------------------------------------------------------------- */
/*
@@ -95,6 +150,8 @@
if (tmp->tm_nodes_inuse > tmp->tm_nodes_max)
return (ENOSPC);
+ if (tmpfs_pages_avail(tmp, 1) == 0)
+ return (ENOSPC);
nnode = (struct tmpfs_node *)uma_zalloc_arg(
tmp->tm_node_pool, tmp, M_WAITOK);
@@ -882,7 +939,7 @@
newpages = round_page(newsize) / PAGE_SIZE;
if (newpages > oldpages &&
- newpages - oldpages > TMPFS_PAGES_AVAIL(tmp)) {
+ tmpfs_pages_avail(tmp, newpages - oldpages) == 0) {
error = ENOSPC;
goto out;
}
/sys/fs/tmpfs/tmpfs_vfsops.c
===================================================================
--- tmpfs_vfsops.c.orig 2010-12-21 18:09:00.000000000 +0100 (v
1.21.2.1.6.1)
+++ tmpfs_vfsops.c 2011-10-07 14:10:15.137747000 +0200 (working copy)
@@ -85,53 +85,6 @@
#define SWI_MAXMIB 3
-static u_int
-get_swpgtotal(void)
-{
- struct xswdev xsd;
- char *sname = "vm.swap_info";
- int soid[SWI_MAXMIB], oid[2];
- u_int unswdev, total, dmmax, nswapdev;
- size_t mibi, len;
-
- total = 0;
-
- len = sizeof(dmmax);
- if (kernel_sysctlbyname(curthread, "vm.dmmax", &dmmax, &len,
- NULL, 0, NULL, 0) != 0)
- return total;
-
- len = sizeof(nswapdev);
- if (kernel_sysctlbyname(curthread, "vm.nswapdev",
- &nswapdev, &len,
- NULL, 0, NULL, 0) != 0)
- return total;
-
- mibi = (SWI_MAXMIB - 1) * sizeof(int);
- oid[0] = 0;
- oid[1] = 3;
-
- if (kernel_sysctl(curthread, oid, 2,
- soid, &mibi, (void *)sname, strlen(sname),
- NULL, 0) != 0)
- return total;
-
- mibi = (SWI_MAXMIB - 1);
- for (unswdev = 0; unswdev < nswapdev; ++unswdev) {
- soid[mibi] = unswdev;
- len = sizeof(struct xswdev);
- if (kernel_sysctl(curthread,
- soid, mibi + 1, &xsd, &len, NULL, 0,
- NULL, 0) != 0)
- return total;
- if (len == sizeof(struct xswdev))
- total += (xsd.xsw_nblks - dmmax);
- }
-
- /* Not Reached */
- return total;
-}
-
/*
--------------------------------------------------------------------- */
static int
tmpfs_node_ctor(void *mem, int size, void *arg, int flags)
@@ -179,14 +132,13 @@
static int
tmpfs_mount(struct mount *mp)
{
+ const size_t nodes_per_page = howmany(PAGE_SIZE,
+ sizeof(struct tmpfs_dirent) + sizeof(struct tmpfs_node));
struct tmpfs_mount *tmp;
struct tmpfs_node *root;
- size_t pages, mem_size;
- ino_t nodes;
+ u_quad_t pages;
+ u_quad_t nodes_max, size_max, maxfilesize;
int error;
- /* Size counters. */
- ino_t nodes_max;
- size_t size_max;
/* Root node attributes. */
uid_t root_uid;
@@ -223,42 +175,55 @@
if (mp->mnt_cred->cr_ruid != 0 ||
vfs_scanopt(mp->mnt_optnew, "mode", "%ho", &root_mode) != 1)
root_mode = va.va_mode;
- if (vfs_scanopt(mp->mnt_optnew, "inodes", "%d", &nodes_max) != 1)
+ if (vfs_scanopt(mp->mnt_optnew, "inodes", "%qu", &nodes_max) != 1)
nodes_max = 0;
if (vfs_scanopt(mp->mnt_optnew, "size", "%qu", &size_max) != 1)
size_max = 0;
-
- /* Do not allow mounts if we do not have enough memory to preserve
- * the minimum reserved pages. */
- mem_size = cnt.v_free_count + cnt.v_inactive_count + get_swpgtotal();
- mem_size -= mem_size > cnt.v_wire_count ? cnt.v_wire_count : mem_size;
- if (mem_size < TMPFS_PAGES_RESERVED)
+ if (vfs_scanopt(mp->mnt_optnew, "maxfilesize", "%qu", &maxfilesize)
!= 0)
+ maxfilesize = 0;
+ /*
+ * XXX Deny mounts if pagedaemon wasn't able to recovery desired
+ * number of pages.
+ */
+ if (vm_page_count_target())
return ENOSPC;
/* Get the maximum number of memory pages this file system is
* allowed to use, based on the maximum size the user passed in
- * the mount structure. A value of zero is treated as if the
- * maximum available space was requested. */
- if (size_max < PAGE_SIZE || size_max >= SIZE_MAX)
- pages = SIZE_MAX;
+ * the mount structure. Use half of RAM by default. */
+ if (size_max < PAGE_SIZE*4 || size_max > SIZE_MAX - PAGE_SIZE)
+ pages = cnt.v_page_count / 2;
else
pages = howmany(size_max, PAGE_SIZE);
MPASS(pages > 0);
+ MPASS(pages < SIZE_MAX);
- if (nodes_max <= 3)
- nodes = 3 + pages * PAGE_SIZE / 1024;
+ if (pages < SIZE_MAX / PAGE_SIZE)
+ size_max = pages * PAGE_SIZE;
else
- nodes = nodes_max;
- MPASS(nodes >= 3);
+ size_max = SIZE_MAX;
+
+ if (nodes_max <= 3) {
+ if (pages < UINT32_MAX / nodes_per_page)
+ nodes_max = pages * nodes_per_page;
+ else
+ nodes_max = UINT32_MAX;
+ }
+ if (nodes_max > UINT32_MAX)
+ nodes_max = UINT32_MAX;
+ MPASS(nodes_max >= 3);
+
+ if (maxfilesize < PAGE_SIZE || maxfilesize > size_max)
+ maxfilesize = size_max;
/* Allocate the tmpfs mount structure and fill it. */
tmp = (struct tmpfs_mount *)malloc(sizeof(struct tmpfs_mount),
M_TMPFSMNT, M_WAITOK | M_ZERO);
mtx_init(&tmp->allnode_lock, "tmpfs allnode lock", NULL, MTX_DEF);
- tmp->tm_nodes_max = nodes;
+ tmp->tm_nodes_max = nodes_max;
tmp->tm_nodes_inuse = 0;
- tmp->tm_maxfilesize = (u_int64_t)(cnt.v_page_count +
get_swpgtotal()) * PAGE_SIZE;
+ tmp->tm_maxfilesize = maxfilesize;
LIST_INIT(&tmp->tm_nodes_used);
tmp->tm_pages_max = pages;
@@ -427,22 +392,23 @@
static int
tmpfs_statfs(struct mount *mp, struct statfs *sbp)
{
- fsfilcnt_t freenodes;
struct tmpfs_mount *tmp;
+ size_t used;
tmp = VFS_TO_TMPFS(mp);
sbp->f_iosize = PAGE_SIZE;
sbp->f_bsize = PAGE_SIZE;
- sbp->f_blocks = TMPFS_PAGES_MAX(tmp);
- sbp->f_bavail = sbp->f_bfree = TMPFS_PAGES_AVAIL(tmp);
-
- freenodes = MIN(tmp->tm_nodes_max - tmp->tm_nodes_inuse,
- TMPFS_PAGES_AVAIL(tmp) * PAGE_SIZE / sizeof(struct tmpfs_node));
-
- sbp->f_files = freenodes + tmp->tm_nodes_inuse;
- sbp->f_ffree = freenodes;
+ sbp->f_blocks = tmpfs_pages_max(tmp);
+ used = tmpfs_pages_used(tmp);
+ if (tmpfs_pages_max(tmp) <= used)
+ sbp->f_bavail = 0;
+ else
+ sbp->f_bavail = tmpfs_pages_max(tmp) - used;
+ sbp->f_bfree = sbp->f_bavail;
+ sbp->f_files = tmp->tm_nodes_max;
+ sbp->f_ffree = tmp->tm_nodes_max - tmp->tm_nodes_inuse;
/* sbp->f_owner = tmp->tn_uid; */
return 0;
More information about the freebsd-fs
mailing list