svn commit: r185435 - in head: lib/libc/sys lib/libkvm share/man/man4 sys/compat/freebsd32 sys/kern sys/net sys/netinet sys/netinet6 sys/security/mac_bsdextended sys/sys usr.bin/cpuset usr.sbin/jai...

Bjoern A. Zeeb bz at FreeBSD.org
Sat Nov 29 06:32:15 PST 2008


Author: bz
Date: Sat Nov 29 14:32:14 2008
New Revision: 185435
URL: http://svn.freebsd.org/changeset/base/185435

Log:
  MFp4:
    Bring in updated jail support from bz_jail branch.
  
  This enhances the current jail implementation to permit multiple
  addresses per jail. In addtion to IPv4, IPv6 is supported as well.
  Due to updated checks it is even possible to have jails without
  an IP address at all, which basically gives one a chroot with
  restricted process view, no networking,..
  
  SCTP support was updated and supports IPv6 in jails as well.
  
  Cpuset support permits jails to be bound to specific processor
  sets after creation.
  
  Jails can have an unrestricted (no duplicate protection, etc.) name
  in addition to the hostname. The jail name cannot be changed from
  within a jail and is considered to be used for management purposes
  or as audit-token in the future.
  
  DDB 'show jails' command was added to aid debugging.
  
  Proper compat support permits 32bit jail binaries to be used on 64bit
  systems to manage jails. Also backward compatibility was preserved where
  possible: for jail v1 syscalls, as well as with user space management
  utilities.
  
  Both jail as well as prison version were updated for the new features.
  A gap was intentionally left as the intermediate versions had been
  used by various patches floating around the last years.
  
  Bump __FreeBSD_version for the afore mentioned and in kernel changes.
  
  Special thanks to:
  - Pawel Jakub Dawidek (pjd) for his multi-IPv4 patches
    and Olivier Houchard (cognet) for initial single-IPv6 patches.
  - Jeff Roberson (jeff) and Randall Stewart (rrs) for their
    help, ideas and review on cpuset and SCTP support.
  - Robert Watson (rwatson) for lots and lots of help, discussions,
    suggestions and review of most of the patch at various stages.
  - John Baldwin (jhb) for his help.
  - Simon L. Nielsen (simon) as early adopter testing changes
    on cluster machines as well as all the testers and people
    who provided feedback the last months on freebsd-jail and
    other channels.
  - My employer, CK Software GmbH, for the support so I could work on this.
  
  Reviewed by:	(see above)
  MFC after:	3 months (this is just so that I get the mail)
  X-MFC Before:   7.2-RELEASE if possible

Modified:
  head/lib/libc/sys/cpuset_getaffinity.2
  head/lib/libc/sys/jail.2
  head/lib/libkvm/kvm_proc.c
  head/share/man/man4/ddb.4
  head/sys/compat/freebsd32/freebsd32.h
  head/sys/compat/freebsd32/freebsd32_misc.c
  head/sys/compat/freebsd32/syscalls.master
  head/sys/kern/kern_cpuset.c
  head/sys/kern/kern_exit.c
  head/sys/kern/kern_fork.c
  head/sys/kern/kern_jail.c
  head/sys/kern/uipc_socket.c
  head/sys/net/if.c
  head/sys/net/rtsock.c
  head/sys/netinet/in_pcb.c
  head/sys/netinet/raw_ip.c
  head/sys/netinet/sctp_pcb.c
  head/sys/netinet/sctp_usrreq.c
  head/sys/netinet/tcp_usrreq.c
  head/sys/netinet/udp_usrreq.c
  head/sys/netinet6/in6_pcb.c
  head/sys/netinet6/in6_src.c
  head/sys/netinet6/raw_ip6.c
  head/sys/netinet6/udp6_usrreq.c
  head/sys/security/mac_bsdextended/mac_bsdextended.c
  head/sys/sys/cpuset.h
  head/sys/sys/jail.h
  head/sys/sys/param.h
  head/usr.bin/cpuset/cpuset.1
  head/usr.bin/cpuset/cpuset.c
  head/usr.sbin/jail/Makefile
  head/usr.sbin/jail/jail.8
  head/usr.sbin/jail/jail.c
  head/usr.sbin/jexec/Makefile
  head/usr.sbin/jexec/jexec.8
  head/usr.sbin/jexec/jexec.c
  head/usr.sbin/jls/Makefile
  head/usr.sbin/jls/jls.8
  head/usr.sbin/jls/jls.c

Modified: head/lib/libc/sys/cpuset_getaffinity.2
==============================================================================
--- head/lib/libc/sys/cpuset_getaffinity.2	Sat Nov 29 14:26:22 2008	(r185434)
+++ head/lib/libc/sys/cpuset_getaffinity.2	Sat Nov 29 14:32:14 2008	(r185435)
@@ -25,7 +25,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd March 29, 2008
+.Dd November 29, 2008
 .Dt CPUSET 2
 .Os
 .Sh NAME
@@ -46,7 +46,7 @@
 and
 .Fn cpuset_setaffinity
 allow the manipulation of sets of CPUs available to processes, threads, 
-interrupts and other resources.
+interrupts, jails and other resources.
 These functions may manipulate sets of CPUs that contain many processes
 or per-object anonymous masks that effect only a single object.
 .Pp

Modified: head/lib/libc/sys/jail.2
==============================================================================
--- head/lib/libc/sys/jail.2	Sat Nov 29 14:26:22 2008	(r185434)
+++ head/lib/libc/sys/jail.2	Sat Nov 29 14:32:14 2008	(r185435)
@@ -8,7 +8,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd August 3, 2008
+.Dd November 29, 2008
 .Dt JAIL 2
 .Os
 .Sh NAME
@@ -32,15 +32,20 @@ The argument is a pointer to a structure
 .Bd -literal -offset indent
 struct jail {
 	u_int32_t	version;
-        char 		*path;
-        char 		*hostname;
-        u_int32_t	ip_number;
+	char		*path;
+	char		*hostname;
+	char		*jailname;
+	unsigned int	ip4s;
+	unsigned int	ip6s;
+	struct in_addr	*ip4;
+	struct in6_addr	*ip6;
 };
 .Ed
 .Pp
 .Dq Li version
 defines the version of the API in use.
-It should be set to zero at this time.
+.Dv JAIL_API_VERSION
+is defined for the current version.
 .Pp
 The
 .Dq Li path
@@ -54,8 +59,24 @@ This can be changed
 from the inside of the prison.
 .Pp
 The
-.Dq Li ip_number
-can be set to the IP number assigned to the prison.
+.Dq Li jailname
+pointer is an optional name that can be assigned to the jail
+for example for managment purposes.
+.Pp
+The
+.Dq Li ip4s
+and
+.Dq Li ip6s
+give the numbers of IPv4 and IPv6 addresses that will be passed
+via their respective pointers.
+.Pp
+The
+.Dq Li ip4
+and
+.Dq Li ip6
+pointers can be set to an arrays of IPv4 and IPv6 addresses to be assigned to
+the prison, or NULL if none.
+IPv4 addresses must be in network byte order.
 .Pp
 The
 .Fn jail_attach

Modified: head/lib/libkvm/kvm_proc.c
==============================================================================
--- head/lib/libkvm/kvm_proc.c	Sat Nov 29 14:26:22 2008	(r185434)
+++ head/lib/libkvm/kvm_proc.c	Sat Nov 29 14:32:14 2008	(r185435)
@@ -54,10 +54,11 @@ __FBSDID("$FreeBSD$");
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #include <sys/_task.h>
-#define	_WANT_PRISON	/* make jail.h give us 'struct prison' */
-#include <sys/jail.h>
+#include <sys/cpuset.h>
 #include <sys/user.h>
 #include <sys/proc.h>
+#define	_WANT_PRISON	/* make jail.h give us 'struct prison' */
+#include <sys/jail.h>
 #include <sys/exec.h>
 #include <sys/stat.h>
 #include <sys/sysent.h>

Modified: head/share/man/man4/ddb.4
==============================================================================
--- head/share/man/man4/ddb.4	Sat Nov 29 14:26:22 2008	(r185434)
+++ head/share/man/man4/ddb.4	Sat Nov 29 14:32:14 2008	(r185435)
@@ -60,7 +60,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd October 18, 2008
+.Dd November 29, 2008
 .Dt DDB 4
 .Os
 .Sh NAME
@@ -686,6 +686,15 @@ Dump the interrupt statistics.
 Show interrupt lines and their respective kernel threads.
 .\"
 .Pp
+.It Ic show Cm jails
+Show the list of
+.Xr jail 8
+instances.
+In addition to what
+.Xr jls 8
+shows, also list kernel internal details.
+.\"
+.Pp
 .It Ic show Cm lapic
 Show information from the local APIC registers for this CPU.
 .\"

Modified: head/sys/compat/freebsd32/freebsd32.h
==============================================================================
--- head/sys/compat/freebsd32/freebsd32.h	Sat Nov 29 14:26:22 2008	(r185434)
+++ head/sys/compat/freebsd32/freebsd32.h	Sat Nov 29 14:32:14 2008	(r185435)
@@ -153,6 +153,24 @@ struct stat32 {
 	unsigned int :(8 / 2) * (16 - (int)sizeof(struct timespec32));
 };
 
+struct jail32_v0 {
+	u_int32_t	version;
+	uint32_t	path;
+	uint32_t	hostname;
+	u_int32_t	ip_number;
+};
+
+struct jail32 {
+	uint32_t	version;
+	uint32_t	path;
+	uint32_t	hostname;
+	uint32_t	jailname;
+	uint32_t	ip4s;
+	uint32_t	ip6s;
+	uint32_t	ip4;
+	uint32_t	ip6;
+};
+
 struct sigaction32 {
 	u_int32_t	sa_u;
 	int		sa_flags;

Modified: head/sys/compat/freebsd32/freebsd32_misc.c
==============================================================================
--- head/sys/compat/freebsd32/freebsd32_misc.c	Sat Nov 29 14:26:22 2008	(r185434)
+++ head/sys/compat/freebsd32/freebsd32_misc.c	Sat Nov 29 14:32:14 2008	(r185435)
@@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/fcntl.h>
 #include <sys/filedesc.h>
 #include <sys/imgact.h>
+#include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
@@ -2036,6 +2037,66 @@ done2:
 }
 
 int
+freebsd32_jail(struct thread *td, struct freebsd32_jail_args *uap)
+{
+	uint32_t version;
+	int error;
+	struct jail j;
+
+	error = copyin(uap->jail, &version, sizeof(uint32_t));
+	if (error)
+		return (error);
+	switch (version) {
+	case 0:	
+	{
+		/* FreeBSD single IPv4 jails. */
+		struct jail32_v0 j32_v0;
+
+		bzero(&j, sizeof(struct jail));
+		error = copyin(uap->jail, &j32_v0, sizeof(struct jail32_v0));
+		if (error)
+			return (error);
+		CP(j32_v0, j, version);
+		PTRIN_CP(j32_v0, j, path);
+		PTRIN_CP(j32_v0, j, hostname);
+		j.ip4s = j32_v0.ip_number;
+		break;
+	}
+
+	case 1:
+		/*
+		 * Version 1 was used by multi-IPv4 jail implementations
+		 * that never made it into the official kernel.
+		 */
+		return (EINVAL);
+
+	case 2:	/* JAIL_API_VERSION */
+	{
+		/* FreeBSD multi-IPv4/IPv6,noIP jails. */
+		struct jail32 j32;
+
+		error = copyin(uap->jail, &j32, sizeof(struct jail32));
+		if (error)
+			return (error);
+		CP(j32, j, version);
+		PTRIN_CP(j32, j, path);
+		PTRIN_CP(j32, j, hostname);
+		PTRIN_CP(j32, j, jailname);
+		CP(j32, j, ip4s);
+		CP(j32, j, ip6s);
+		PTRIN_CP(j32, j, ip4);
+		PTRIN_CP(j32, j, ip6);
+		break;
+	}
+
+	default:
+		/* Sci-Fi jails are not supported, sorry. */
+		return (EINVAL);
+	}
+	return (kern_jail(td, &j));
+}
+
+int
 freebsd32_sigaction(struct thread *td, struct freebsd32_sigaction_args *uap)
 {
 	struct sigaction32 s32;

Modified: head/sys/compat/freebsd32/syscalls.master
==============================================================================
--- head/sys/compat/freebsd32/syscalls.master	Sat Nov 29 14:26:22 2008	(r185434)
+++ head/sys/compat/freebsd32/syscalls.master	Sat Nov 29 14:32:14 2008	(r185435)
@@ -572,7 +572,7 @@
 				    off_t *sbytes, int flags); }
 337	AUE_NULL	NOPROTO	{ int kldsym(int fileid, int cmd, \
 				    void *data); }
-338	AUE_JAIL	NOPROTO	{ int jail(struct jail *jail); }
+338	AUE_JAIL	STD	{ int freebsd32_jail(struct jail32 *jail); }
 339	AUE_NULL	UNIMPL	pioctl
 340	AUE_SIGPROCMASK	NOPROTO	{ int sigprocmask(int how, \
 				    const sigset_t *set, sigset_t *oset); }

Modified: head/sys/kern/kern_cpuset.c
==============================================================================
--- head/sys/kern/kern_cpuset.c	Sat Nov 29 14:26:22 2008	(r185434)
+++ head/sys/kern/kern_cpuset.c	Sat Nov 29 14:32:14 2008	(r185435)
@@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/limits.h>
 #include <sys/bus.h>
 #include <sys/interrupt.h>
+#include <sys/jail.h>		/* Must come after sys/proc.h */
 
 #include <vm/uma.h>
 
@@ -208,7 +209,7 @@ cpuset_rel_complete(struct cpuset *set)
  * Find a set based on an id.  Returns it with a ref.
  */
 static struct cpuset *
-cpuset_lookup(cpusetid_t setid)
+cpuset_lookup(cpusetid_t setid, struct thread *td)
 {
 	struct cpuset *set;
 
@@ -221,6 +222,28 @@ cpuset_lookup(cpusetid_t setid)
 	if (set)
 		cpuset_ref(set);
 	mtx_unlock_spin(&cpuset_lock);
+
+	KASSERT(td != NULL, ("[%s:%d] td is NULL", __func__, __LINE__));
+	if (set != NULL && jailed(td->td_ucred)) {
+		struct cpuset *rset, *jset;
+		struct prison *pr;
+
+		rset = cpuset_refroot(set);
+
+		pr = td->td_ucred->cr_prison;
+		mtx_lock(&pr->pr_mtx);
+		cpuset_ref(pr->pr_cpuset);
+		jset = pr->pr_cpuset;
+		mtx_unlock(&pr->pr_mtx);
+
+		if (jset->cs_id != rset->cs_id) {
+			cpuset_rel(set);
+			set = NULL;
+		}
+		cpuset_rel(jset);
+		cpuset_rel(rset);
+	}
+
 	return (set);
 }
 
@@ -412,12 +435,38 @@ cpuset_which(cpuwhich_t which, id_t id, 
 			set = cpuset_refbase(curthread->td_cpuset);
 			thread_unlock(curthread);
 		} else
-			set = cpuset_lookup(id);
+			set = cpuset_lookup(id, curthread);
 		if (set) {
 			*setp = set;
 			return (0);
 		}
 		return (ESRCH);
+	case CPU_WHICH_JAIL:
+	{
+		/* Find `set' for prison with given id. */
+		struct prison *pr;
+
+		sx_slock(&allprison_lock);
+		pr = prison_find(id);
+		sx_sunlock(&allprison_lock);
+		if (pr == NULL)
+			return (ESRCH);
+		if (jailed(curthread->td_ucred)) {
+			if (curthread->td_ucred->cr_prison == pr) {
+				cpuset_ref(pr->pr_cpuset);
+				set = pr->pr_cpuset;
+			}
+		} else {
+			cpuset_ref(pr->pr_cpuset);
+			set = pr->pr_cpuset;
+		}
+		mtx_unlock(&pr->pr_mtx);
+		if (set) {
+			*setp = set;
+			return (0);
+		}
+		return (ESRCH);
+	}
 	case CPU_WHICH_IRQ:
 		return (0);
 	default:
@@ -664,6 +713,59 @@ cpuset_thread0(void)
 }
 
 /*
+ * Create a cpuset, which would be cpuset_create() but
+ * mark the new 'set' as root.
+ *
+ * We are not going to reparent the td to it. Use cpuset_reparentproc() for that.
+ *
+ * In case of no error, returns the set in *setp locked with a reference.
+ */
+int
+cpuset_create_root(struct thread *td, struct cpuset **setp)
+{
+	struct cpuset *root;
+	struct cpuset *set;
+	int error;
+
+	KASSERT(td != NULL, ("[%s:%d] invalid td", __func__, __LINE__));
+	KASSERT(setp != NULL, ("[%s:%d] invalid setp", __func__, __LINE__));
+
+	thread_lock(td);
+	root = cpuset_refroot(td->td_cpuset);
+	thread_unlock(td);
+
+	error = cpuset_create(setp, td->td_cpuset, &root->cs_mask);
+	cpuset_rel(root);
+	if (error)
+		return (error);
+
+	KASSERT(*setp != NULL, ("[%s:%d] cpuset_create returned invalid data",
+	    __func__, __LINE__));
+
+	/* Mark the set as root. */
+	set = *setp;
+	set->cs_flags |= CPU_SET_ROOT;
+
+	return (0);
+}
+
+int
+cpuset_setproc_update_set(struct proc *p, struct cpuset *set)
+{
+	int error;
+
+	KASSERT(p != NULL, ("[%s:%d] invalid proc", __func__, __LINE__));
+	KASSERT(set != NULL, ("[%s:%d] invalid set", __func__, __LINE__));
+
+	cpuset_ref(set);
+	error = cpuset_setproc(p->p_pid, set, NULL);
+	if (error)
+		return (error);
+	cpuset_rel(set);
+	return (0);
+}
+
+/*
  * This is called once the final set of system cpus is known.  Modifies
  * the root set and all children and mark the root readonly.  
  */
@@ -728,7 +830,7 @@ cpuset_setid(struct thread *td, struct c
 	 */
 	if (uap->which != CPU_WHICH_PID)
 		return (EINVAL);
-	set = cpuset_lookup(uap->setid);
+	set = cpuset_lookup(uap->setid, td);
 	if (set == NULL)
 		return (ESRCH);
 	error = cpuset_setproc(uap->id, set, NULL);
@@ -767,6 +869,7 @@ cpuset_getid(struct thread *td, struct c
 		PROC_UNLOCK(p);
 		break;
 	case CPU_WHICH_CPUSET:
+	case CPU_WHICH_JAIL:
 		break;
 	case CPU_WHICH_IRQ:
 		return (EINVAL);
@@ -829,6 +932,7 @@ cpuset_getaffinity(struct thread *td, st
 			thread_unlock(ttd);
 			break;
 		case CPU_WHICH_CPUSET:
+		case CPU_WHICH_JAIL:
 			break;
 		case CPU_WHICH_IRQ:
 			error = EINVAL;
@@ -856,6 +960,7 @@ cpuset_getaffinity(struct thread *td, st
 			}
 			break;
 		case CPU_WHICH_CPUSET:
+		case CPU_WHICH_JAIL:
 			CPU_COPY(&set->cs_mask, mask);
 			break;
 		case CPU_WHICH_IRQ:
@@ -936,6 +1041,7 @@ cpuset_setaffinity(struct thread *td, st
 			PROC_UNLOCK(p);
 			break;
 		case CPU_WHICH_CPUSET:
+		case CPU_WHICH_JAIL:
 			break;
 		case CPU_WHICH_IRQ:
 			error = EINVAL;
@@ -958,7 +1064,8 @@ cpuset_setaffinity(struct thread *td, st
 			error = cpuset_setproc(uap->id, NULL, mask);
 			break;
 		case CPU_WHICH_CPUSET:
-			error = cpuset_which(CPU_WHICH_CPUSET, uap->id, &p,
+		case CPU_WHICH_JAIL:
+			error = cpuset_which(uap->which, uap->id, &p,
 			    &ttd, &set);
 			if (error == 0) {
 				error = cpuset_modify(set, mask);

Modified: head/sys/kern/kern_exit.c
==============================================================================
--- head/sys/kern/kern_exit.c	Sat Nov 29 14:26:22 2008	(r185434)
+++ head/sys/kern/kern_exit.c	Sat Nov 29 14:32:14 2008	(r185435)
@@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/pioctl.h>
+#include <sys/jail.h>
 #include <sys/tty.h>
 #include <sys/wait.h>
 #include <sys/vmmeter.h>
@@ -453,6 +454,10 @@ exit1(struct thread *td, int rv)
 	p->p_xstat = rv;
 	p->p_xthread = td;
 
+	/* In case we are jailed tell the prison that we are gone. */
+	if (jailed(p->p_ucred))
+		prison_proc_free(p->p_ucred->cr_prison);
+
 #ifdef KDTRACE_HOOKS
 	/*
 	 * Tell the DTrace fasttrap provider about the exit if it

Modified: head/sys/kern/kern_fork.c
==============================================================================
--- head/sys/kern/kern_fork.c	Sat Nov 29 14:26:22 2008	(r185434)
+++ head/sys/kern/kern_fork.c	Sat Nov 29 14:32:14 2008	(r185435)
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
+#include <sys/jail.h>
 #include <sys/pioctl.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
@@ -452,6 +453,11 @@ again:
 	    __rangeof(struct proc, p_startzero, p_endzero));
 
 	p2->p_ucred = crhold(td->td_ucred);
+
+	/* In case we are jailed tell the prison that we exist. */
+	if (jailed(p2->p_ucred))
+		prison_proc_hold(p2->p_ucred->cr_prison);
+
 	PROC_UNLOCK(p2);
 
 	/*

Modified: head/sys/kern/kern_jail.c
==============================================================================
--- head/sys/kern/kern_jail.c	Sat Nov 29 14:26:22 2008	(r185434)
+++ head/sys/kern/kern_jail.c	Sat Nov 29 14:32:14 2008	(r185435)
@@ -1,5 +1,7 @@
 /*-
- * Copyright (c) 1999 Poul-Henning Kamp. All rights reserved.
+ * Copyright (c) 1999 Poul-Henning Kamp.
+ * Copyright (c) 2008 Bjoern A. Zeeb.
+ * All rights reserved.
  * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -26,6 +28,9 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include "opt_ddb.h"
+#include "opt_inet.h"
+#include "opt_inet6.h"
 #include "opt_mac.h"
 
 #include <sys/param.h>
@@ -54,6 +59,12 @@ __FBSDID("$FreeBSD$");
 #include <sys/osd.h>
 #include <net/if.h>
 #include <netinet/in.h>
+#ifdef DDB
+#include <ddb/ddb.h>
+#ifdef INET6
+#include <netinet6/in6_var.h>
+#endif /* INET6 */
+#endif /* DDB */
 
 #include <security/mac/mac_framework.h>
 
@@ -70,7 +81,7 @@ SYSCTL_INT(_security_jail, OID_AUTO, set
 int	jail_socket_unixiproute_only = 1;
 SYSCTL_INT(_security_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW,
     &jail_socket_unixiproute_only, 0,
-    "Processes in jail are limited to creating UNIX/IPv4/route sockets only");
+    "Processes in jail are limited to creating UNIX/IP/route sockets only");
 
 int	jail_sysvipc_allowed = 0;
 SYSCTL_INT(_security_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW,
@@ -97,6 +108,11 @@ SYSCTL_INT(_security_jail, OID_AUTO, mou
     &jail_mount_allowed, 0,
     "Processes in jail can mount/unmount jail-friendly file systems");
 
+int	jail_max_af_ips = 255;
+SYSCTL_INT(_security_jail, OID_AUTO, jail_max_af_ips, CTLFLAG_RW,
+    &jail_max_af_ips, 0,
+    "Number of IP addresses a jail may have at most per address family");
+
 /* allprison, lastprid, and prisoncount are protected by allprison_lock. */
 struct	prisonlist allprison;
 struct	sx allprison_lock;
@@ -106,6 +122,12 @@ int	prisoncount = 0;
 static void		 init_prison(void *);
 static void		 prison_complete(void *context, int pending);
 static int		 sysctl_jail_list(SYSCTL_HANDLER_ARGS);
+#ifdef INET
+static int		_prison_check_ip4(struct prison *, struct in_addr *);
+#endif
+#ifdef INET6
+static int		_prison_check_ip6(struct prison *, struct in6_addr *);
+#endif
 
 static void
 init_prison(void *data __unused)
@@ -117,6 +139,276 @@ init_prison(void *data __unused)
 
 SYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL);
 
+#ifdef INET
+static int
+qcmp_v4(const void *ip1, const void *ip2)
+{
+	in_addr_t iaa, iab;
+
+	/*
+	 * We need to compare in HBO here to get the list sorted as expected
+	 * by the result of the code.  Sorting NBO addresses gives you
+	 * interesting results.  If you do not understand, do not try.
+	 */
+	iaa = ntohl(((const struct in_addr *)ip1)->s_addr);
+	iab = ntohl(((const struct in_addr *)ip2)->s_addr);
+
+	/*
+	 * Do not simply return the difference of the two numbers, the int is
+	 * not wide enough.
+	 */
+	if (iaa > iab)
+		return (1);
+	else if (iaa < iab)
+		return (-1);
+	else
+		return (0);
+}
+#endif
+
+#ifdef INET6
+static int
+qcmp_v6(const void *ip1, const void *ip2)
+{
+	const struct in6_addr *ia6a, *ia6b;
+	int i, rc;
+
+	ia6a = (const struct in6_addr *)ip1;
+	ia6b = (const struct in6_addr *)ip2;
+
+	rc = 0;
+	for (i=0; rc == 0 && i < sizeof(struct in6_addr); i++) {
+		if (ia6a->s6_addr[i] > ia6b->s6_addr[i])
+			rc = 1;
+		else if (ia6a->s6_addr[i] < ia6b->s6_addr[i])
+			rc = -1;
+	}
+	return (rc);
+}
+#endif
+
+#if defined(INET) || defined(INET6)
+static int
+prison_check_conflicting_ips(struct prison *p)
+{
+	struct prison *pr;
+	int i;
+
+	sx_assert(&allprison_lock, SX_LOCKED);
+
+	if (p->pr_ip4s == 0 && p->pr_ip6s == 0)
+		return (0);
+
+	LIST_FOREACH(pr, &allprison, pr_list) {
+		/*
+		 * Skip 'dying' prisons to avoid problems when
+		 * restarting multi-IP jails.
+		 */
+		if (pr->pr_state == PRISON_STATE_DYING)
+			continue;
+
+		/*
+		 * We permit conflicting IPs if there is no
+		 * more than 1 IP on eeach jail.
+		 * In case there is one duplicate on a jail with
+		 * more than one IP stop checking and return error.
+		 */
+#ifdef INET
+		if ((p->pr_ip4s >= 1 && pr->pr_ip4s > 1) ||
+		    (p->pr_ip4s > 1 && pr->pr_ip4s >= 1)) {
+			for (i = 0; i < p->pr_ip4s; i++) {
+				if (_prison_check_ip4(pr, &p->pr_ip4[i]))
+					return (EINVAL);
+			}
+		}
+#endif
+#ifdef INET6
+		if ((p->pr_ip6s >= 1 && pr->pr_ip6s > 1) ||
+		    (p->pr_ip6s > 1 && pr->pr_ip6s >= 1)) {
+			for (i = 0; i < p->pr_ip6s; i++) {
+				if (_prison_check_ip6(pr, &p->pr_ip6[i]))
+					return (EINVAL);
+			}
+		}
+#endif
+	}
+
+	return (0);
+}
+
+static int
+jail_copyin_ips(struct jail *j)
+{
+#ifdef INET
+	struct in_addr  *ip4;
+#endif
+#ifdef INET6
+	struct in6_addr *ip6;
+#endif
+	int error, i;
+
+	/*
+	 * Copy in addresses, check for duplicate addresses and do some
+	 * simple 0 and broadcast checks. If users give other bogus addresses
+	 * it is their problem.
+	 *
+	 * IP addresses are all sorted but ip[0] to preserve the primary IP
+	 * address as given from userland.  This special IP is used for
+	 * unbound outgoing connections as well for "loopback" traffic.
+	 */
+#ifdef INET
+	ip4 = NULL;
+#endif
+#ifdef INET6
+	ip6 = NULL;
+#endif
+#ifdef INET
+	if (j->ip4s > 0) {
+		ip4 = (struct in_addr *)malloc(j->ip4s * sizeof(struct in_addr),
+		    M_PRISON, M_WAITOK | M_ZERO);
+		error = copyin(j->ip4, ip4, j->ip4s * sizeof(struct in_addr));
+		if (error)
+			goto e_free_ip;
+		/* Sort all but the first IPv4 address. */
+		if (j->ip4s > 1)
+			qsort((ip4 + 1), j->ip4s - 1,
+			    sizeof(struct in_addr), qcmp_v4);
+
+		/*
+		 * We do not have to care about byte order for these checks
+		 * so we will do them in NBO.
+		 */
+		for (i=0; i<j->ip4s; i++) {
+			if (ip4[i].s_addr == htonl(INADDR_ANY) ||
+			    ip4[i].s_addr == htonl(INADDR_BROADCAST)) {
+				error = EINVAL;
+				goto e_free_ip;
+			}
+			if ((i+1) < j->ip4s &&
+			    (ip4[0].s_addr == ip4[i+1].s_addr ||
+			    ip4[i].s_addr == ip4[i+1].s_addr)) {
+				error = EINVAL;
+				goto e_free_ip;
+			}
+		}
+
+		j->ip4 = ip4;
+	}
+#endif
+#ifdef INET6
+	if (j->ip6s > 0) {
+		ip6 = (struct in6_addr *)malloc(j->ip6s * sizeof(struct in6_addr),
+		    M_PRISON, M_WAITOK | M_ZERO);
+		error = copyin(j->ip6, ip6, j->ip6s * sizeof(struct in6_addr));
+		if (error)
+			goto e_free_ip;
+		/* Sort all but the first IPv6 address. */
+		if (j->ip6s > 1)
+			qsort((ip6 + 1), j->ip6s - 1,
+			    sizeof(struct in6_addr), qcmp_v6);
+		for (i=0; i<j->ip6s; i++) {
+			if (IN6_IS_ADDR_UNSPECIFIED(&ip6[i])) {
+				error = EINVAL;
+				goto e_free_ip;
+			}
+			if ((i+1) < j->ip6s &&
+			    (IN6_ARE_ADDR_EQUAL(&ip6[0], &ip6[i+1]) ||
+			    IN6_ARE_ADDR_EQUAL(&ip6[i], &ip6[i+1]))) {
+				error = EINVAL;
+				goto e_free_ip;
+			}
+		}
+
+		j->ip6 = ip6;
+	}
+#endif
+	return (0);
+
+e_free_ip:
+#ifdef INET6
+	free(ip6, M_PRISON);
+#endif
+#ifdef INET
+	free(ip4, M_PRISON);
+#endif
+	return (error);
+}
+#endif /* INET || INET6 */
+
+static int
+jail_handle_ips(struct jail *j)
+{
+#if defined(INET) || defined(INET6)
+	int error;
+#endif
+
+	/*
+	 * Finish conversion for older versions, copyin and setup IPs.
+	 */
+	switch (j->version) {
+	case 0:	
+	{
+#ifdef INET
+		/* FreeBSD single IPv4 jails. */
+		struct in_addr *ip4;
+
+		if (j->ip4s == INADDR_ANY || j->ip4s == INADDR_BROADCAST)
+			return (EINVAL);
+		ip4 = (struct in_addr *)malloc(sizeof(struct in_addr),
+		    M_PRISON, M_WAITOK | M_ZERO);
+
+		/*
+		 * Jail version 0 still used HBO for the IPv4 address.
+		 */
+		ip4->s_addr = htonl(j->ip4s);
+		j->ip4s = 1;
+		j->ip4 = ip4;
+		break;
+#else
+		return (EINVAL);
+#endif
+	}
+
+	case 1:
+		/*
+		 * Version 1 was used by multi-IPv4 jail implementations
+		 * that never made it into the official kernel.
+		 * We should never hit this here; jail() should catch it.
+		 */
+		return (EINVAL);
+
+	case 2:	/* JAIL_API_VERSION */
+		/* FreeBSD multi-IPv4/IPv6,noIP jails. */
+#if defined(INET) || defined(INET6)
+#ifdef INET
+		if (j->ip4s > jail_max_af_ips)
+			return (EINVAL);
+#else
+		if (j->ip4s != 0)
+			return (EINVAL);
+#endif
+#ifdef INET6
+		if (j->ip6s > jail_max_af_ips)
+			return (EINVAL);
+#else
+		if (j->ip6s != 0)
+			return (EINVAL);
+#endif
+		error = jail_copyin_ips(j);
+		if (error)
+			return (error);
+#endif
+		break;
+
+	default:
+		/* Sci-Fi jails are not supported, sorry. */
+		return (EINVAL);
+	}
+
+	return (0);
+}
+
+
 /*
  * struct jail_args {
  *	struct jail *jail;
@@ -125,22 +417,72 @@ SYSINIT(prison, SI_SUB_INTRINSIC, SI_ORD
 int
 jail(struct thread *td, struct jail_args *uap)
 {
+	uint32_t version;
+	int error;
+	struct jail j;
+
+	error = copyin(uap->jail, &version, sizeof(uint32_t));
+	if (error)
+		return (error);
+
+	switch (version) {
+	case 0:	
+		/* FreeBSD single IPv4 jails. */
+	{
+		struct jail_v0 j0;
+
+		bzero(&j, sizeof(struct jail));
+		error = copyin(uap->jail, &j0, sizeof(struct jail_v0));
+		if (error)
+			return (error);
+		j.version = j0.version;
+		j.path = j0.path;
+		j.hostname = j0.hostname;
+		j.ip4s = j0.ip_number;
+		break;
+	}
+
+	case 1:
+		/*
+		 * Version 1 was used by multi-IPv4 jail implementations
+		 * that never made it into the official kernel.
+		 */
+		return (EINVAL);
+
+	case 2:	/* JAIL_API_VERSION */
+		/* FreeBSD multi-IPv4/IPv6,noIP jails. */
+		error = copyin(uap->jail, &j, sizeof(struct jail));
+		if (error)
+			return (error);
+		break;
+
+	default:
+		/* Sci-Fi jails are not supported, sorry. */
+		return (EINVAL);
+	}
+	return (kern_jail(td, &j));
+}
+
+int
+kern_jail(struct thread *td, struct jail *j)
+{
 	struct nameidata nd;
 	struct prison *pr, *tpr;
-	struct jail j;
 	struct jail_attach_args jaa;
 	int vfslocked, error, tryprid;
 
-	error = copyin(uap->jail, &j, sizeof(j));
+	KASSERT(j != NULL, ("%s: j is NULL", __func__));
+
+	/* Handle addresses - convert old structs, copyin, check IPs. */
+	error = jail_handle_ips(j);
 	if (error)
 		return (error);
-	if (j.version != 0)
-		return (EINVAL);
 
+	/* Allocate struct prison and fill it with life. */
 	pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
 	mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF);
 	pr->pr_ref = 1;
-	error = copyinstr(j.path, &pr->pr_path, sizeof(pr->pr_path), 0);
+	error = copyinstr(j->path, &pr->pr_path, sizeof(pr->pr_path), NULL);
 	if (error)
 		goto e_killmtx;
 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE,
@@ -153,16 +495,50 @@ jail(struct thread *td, struct jail_args
 	VOP_UNLOCK(nd.ni_vp, 0);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	VFS_UNLOCK_GIANT(vfslocked);
-	error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0);
+	error = copyinstr(j->hostname, &pr->pr_host, sizeof(pr->pr_host), NULL);
 	if (error)
 		goto e_dropvnref;
-	pr->pr_ip = j.ip_number;
+	if (j->jailname != NULL) {
+		error = copyinstr(j->jailname, &pr->pr_name,
+		    sizeof(pr->pr_name), NULL);
+		if (error)
+			goto e_dropvnref;
+	}
+	if (j->ip4s > 0) {
+		pr->pr_ip4 = j->ip4;
+		pr->pr_ip4s = j->ip4s;
+	}
+#ifdef INET6
+	if (j->ip6s > 0) {
+		pr->pr_ip6 = j->ip6;
+		pr->pr_ip6s = j->ip6s;
+	}
+#endif
 	pr->pr_linux = NULL;
 	pr->pr_securelevel = securelevel;
 	bzero(&pr->pr_osd, sizeof(pr->pr_osd));
 
-	/* Determine next pr_id and add prison to allprison list. */
+	/*
+	 * Pre-set prison state to ALIVE upon cration.  This is needed so we
+	 * can later attach the process to it, etc (avoiding another extra
+	 * state for ther process of creation, complicating things).
+	 */
+	pr->pr_state = PRISON_STATE_ALIVE;
+
+	/* Allocate a dedicated cpuset for each jail. */
+	error = cpuset_create_root(td, &pr->pr_cpuset);
+	if (error)
+		goto e_dropvnref;
+
 	sx_xlock(&allprison_lock);
+	/* Make sure we cannot run into problems with ambiguous bind()ings. */
+	error = prison_check_conflicting_ips(pr);
+	if (error) {
+		sx_xunlock(&allprison_lock);
+		goto e_dropcpuset;
+	}
+
+	/* Determine next pr_id and add prison to allprison list. */
 	tryprid = lastprid + 1;
 	if (tryprid == JAIL_MAX)
 		tryprid = 1;
@@ -173,7 +549,7 @@ next:
 			if (tryprid == JAIL_MAX) {
 				sx_xunlock(&allprison_lock);
 				error = EAGAIN;
-				goto e_dropvnref;
+				goto e_dropcpuset;
 			}
 			goto next;
 		}
@@ -196,6 +572,8 @@ e_dropprref:
 	LIST_REMOVE(pr, pr_list);
 	prisoncount--;
 	sx_xunlock(&allprison_lock);
+e_dropcpuset:
+	cpuset_rel(pr->pr_cpuset);
 e_dropvnref:
 	vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
 	vrele(pr->pr_root);
@@ -203,6 +581,12 @@ e_dropvnref:
 e_killmtx:
 	mtx_destroy(&pr->pr_mtx);
 	free(pr, M_PRISON);
+#ifdef INET6
+	free(j->ip6, M_PRISON);
+#endif
+#ifdef INET
+	free(j->ip4, M_PRISON);
+#endif
 	return (error);
 }
 
@@ -238,10 +622,27 @@ jail_attach(struct thread *td, struct ja
 		sx_sunlock(&allprison_lock);
 		return (EINVAL);
 	}
+
+	/*
+	 * Do not allow a process to attach to a prison that is not
+	 * considered to be "ALIVE".
+	 */

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-all mailing list