pipe() resource exhaustion
Mateusz Guzik
mjguzik at gmail.com
Tue Apr 8 12:12:33 UTC 2014
On Tue, Apr 08, 2014 at 01:02:06PM +0200, Eduardo Morras wrote:
> On Mon, 7 Apr 2014 07:25:22 -0500
> Mark Felder <feld at freebsd.org> wrote:
>
> > On 2014-04-07 06:02, Ivan Voras wrote:
> > > Hello,
> > >
> > > Last time I mentioned this it didn't get any attention, so I'll try
> > > again. By accident (via a buggy synergy server process) I found
> > > that a simple userland process can exhaust kernel pipe memory
> > > (kern.ipc.pipekva
> > > sysctl) which as a consequence has that new processes which use pipe
> > > cannot be started, which includes "su", by which an administrator
> > > could kill such a process.
> > >
> >
> > That's a pretty painful local denial of service :(
>
> Yes it is. Perhaps there should be 8% fd reserved for root, su and setuid family syscalls like in filesystem space or postgresql reserved connections for db admin.
>
There is an fd reserve already. Report talks about problems with
creating a new *pipe*, not any fd in general.
That said, supporting a reserve for this one sounds like a good idea and
not that hard to implement - one can either play with atomics and double
check or just place a mutex-protected check in pipespace_new (before
vm_map_find).
I implemented the second one, which turned out to be surprisingly ugly.
For now it abuses PRIV_MAXPROC and has a reserve taken out of the blue.
Thoughts?
diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c
index 6ba52e3..04c4559 100644
--- a/sys/kern/sys_pipe.c
+++ b/sys/kern/sys_pipe.c
@@ -102,6 +102,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/mutex.h>
+#include <sys/priv.h>
#include <sys/ttycom.h>
#include <sys/stat.h>
#include <sys/malloc.h>
@@ -200,6 +201,7 @@ static struct filterops pipe_wfiltops = {
#define MAXPIPESIZE (2*PIPE_SIZE/3)
static long amountpipekva;
+static struct mtx amountpipekva_mtx;
static int pipefragretry;
static int pipeallocfail;
static int piperesizefail;
@@ -256,6 +258,8 @@ pipeinit(void *dummy __unused)
KASSERT(pipeino_unr != NULL, ("pipe fake inodes not initialized"));
pipedev_ino = devfs_alloc_cdp_inode();
KASSERT(pipedev_ino > 0, ("pipe dev inode not initialized"));
+
+ mtx_init(&amountpipekva_mtx, "pipe kva mutex", NULL, MTX_DEF);
}
static int
@@ -515,24 +519,29 @@ pipespace_new(cpipe, size)
KASSERT(!mtx_owned(PIPE_MTX(cpipe)), ("pipespace: pipe mutex locked"));
KASSERT(!(cpipe->pipe_state & PIPE_DIRECTW),
("pipespace: resize of direct writes not allowed"));
+
+ mtx_lock(&amountpipekva_mtx);
retry:
cnt = cpipe->pipe_buffer.cnt;
if (cnt > size)
size = cnt;
size = round_page(size);
- buffer = (caddr_t) vm_map_min(pipe_map);
- error = vm_map_find(pipe_map, NULL, 0,
- (vm_offset_t *) &buffer, size, 0, VMFS_ANY_SPACE,
- VM_PROT_ALL, VM_PROT_ALL, 0);
- if (error != KERN_SUCCESS) {
+ if (amountpipekva + size + 10 * PAGE_SIZE >= maxpipekva) {
+ if (priv_check_cred(curthread->td_ucred, PRIV_MAXPROC, 0) == 0 &&
+ amountpipekva + size <= maxpipekva)
+ goto alloc;
+recalc:
if ((cpipe->pipe_buffer.buffer == NULL) &&
(size > SMALL_PIPE_SIZE)) {
size = SMALL_PIPE_SIZE;
pipefragretry++;
goto retry;
}
+
+ mtx_unlock(&amountpipekva_mtx);
+
if (cpipe->pipe_buffer.buffer == NULL) {
pipeallocfail++;
if (ppsratecheck(&lastfail, &curfail, 1))
@@ -542,6 +551,20 @@ retry:
}
return (ENOMEM);
}
+alloc:
+ atomic_add_long(&amountpipekva, size);
+ mtx_unlock(&amountpipekva_mtx);
+
+ buffer = (caddr_t) vm_map_min(pipe_map);
+
+ error = vm_map_find(pipe_map, NULL, 0,
+ (vm_offset_t *) &buffer, size, 0, VMFS_ANY_SPACE,
+ VM_PROT_ALL, VM_PROT_ALL, 0);
+ if (error != KERN_SUCCESS) {
+ mtx_lock(&amountpipekva_mtx);
+ atomic_subtract_long(&amountpipekva, size);
+ goto recalc;
+ }
/* copy data, then free old resources if we're resizing */
if (cnt > 0) {
@@ -563,7 +586,6 @@ retry:
cpipe->pipe_buffer.in = cnt;
cpipe->pipe_buffer.out = 0;
cpipe->pipe_buffer.cnt = cnt;
- atomic_add_long(&amountpipekva, cpipe->pipe_buffer.size);
return (0);
}
More information about the freebsd-hackers
mailing list