git: 2efbd480f1d3 - main - rc: add service jails framework

From: Alexander Leidinger <netchild_at_FreeBSD.org>
Date: Wed, 22 May 2024 13:41:54 UTC
The branch main has been updated by netchild:

URL: https://cgit.FreeBSD.org/src/commit/?id=2efbd480f1d37cbadc5af4476a87e5336c493910

commit 2efbd480f1d37cbadc5af4476a87e5336c493910
Author:     Alexander Leidinger <netchild@FreeBSD.org>
AuthorDate: 2024-05-22 13:28:11 +0000
Commit:     Alexander Leidinger <netchild@FreeBSD.org>
CommitDate: 2024-05-22 13:41:48 +0000

    rc: add service jails framework
    
    This takes a rc.d-service and starts it in a jail which shares the same
    root-path as the host (or parent jail) and may inherit the network from
    the host (or parent jail). Per service there is the possibility to
    specify some arguments which give more permissions (e.g. netv4, netv6,
    sysvipc...).
    Reviewed by:    bcr (man page)
    Relnotes:       yes
    Differential Revision:  https://reviews.freebsd.org/D40370
---
 libexec/rc/rc.subr       | 155 +++++++++++++++++++++++++++++++++++++++++++----
 share/man/man5/rc.conf.5 | 112 +++++++++++++++++++++++++++++++++-
 2 files changed, 255 insertions(+), 12 deletions(-)

diff --git a/libexec/rc/rc.subr b/libexec/rc/rc.subr
index 6ed9d24fdf5c..23651cbf8e58 100644
--- a/libexec/rc/rc.subr
+++ b/libexec/rc/rc.subr
@@ -50,6 +50,9 @@ PROTECT="/usr/bin/protect"
 ID="/usr/bin/id"
 IDCMD="if [ -x $ID ]; then $ID -un; fi"
 PS="/bin/ps -ww"
+SERVICE=/usr/sbin/service
+JAIL_CMD=/usr/sbin/jail
+_svcj_generic_params="path=/ mount.nodevfs host=inherit"
 JID=0
 CPUSET="/bin/cpuset"
 
@@ -521,6 +524,16 @@ _find_processes()
 		    $_procname|$_procnamebn|${_procnamebn}:|"(${_procnamebn})"|"[${_procnamebn}]")'
 	fi
 
+	if checkyesno ${name}_svcj && [ "${_rc_svcj}" != jailing ]; then
+		JID=$(/usr/sbin/jls -j svcj-${name} jid 2>/dev/null)
+
+		case ${JID} in
+		''|*[!0-9]*)
+			# svcj-jail doesn't exist, fallback to host-check
+			JID=0
+			;;
+		esac
+	fi
 	_proccheck="\
 		$PS 2>/dev/null -o pid= -o jid= -o command= $_psargs"' |
 		while read _npid _jid '"$_fp_args"'; do
@@ -1138,6 +1151,18 @@ run_rc_command()
 		_cpusetcmd="$CPUSET -l $_cpuset"
 	fi
 
+	# If a specific jail has a specific svcj request, honor it (YES/NO).
+	# If not (variable empty), evaluate the global svcj catch-all.
+	# A global YES can be overriden by a specific NO, and a global NO is overriden
+	# by a specific YES.
+	eval _svcj=\$${name}_svcj
+	if [ -z "$_svcj" ]; then
+		_svcj=${svcj_all_enable}
+		if [ -z "$_svcj" ]; then
+			eval ${name}_svcj=NO
+		fi
+	fi
+
 					# setup pid check command
 	if [ -n "$_procname" ]; then
 		if [ -n "$pidfile" ]; then
@@ -1174,7 +1199,7 @@ run_rc_command()
 	    _prepend=\$${name}_prepend	_login_class=\${${name}_login_class:-daemon} \
 	    _limits=\$${name}_limits	_oomprotect=\$${name}_oomprotect \
 	    _setup=\$${name}_setup	_env_file=\$${name}_env_file \
-	    _umask=\$${name}_umask
+	    _umask=\$${name}_umask	_svcj_options=\$${name}_svcj_options
 
 	if [ -n "$_env_file" ] && [ -r "${_env_file}" ]; then	# load env from file
 		set -a
@@ -1188,6 +1213,45 @@ run_rc_command()
 		fi
 	fi
 
+	if [ -n "$_svcj_options" ]; then	# translate service jail options
+		_svcj_cmd_options=""
+
+		for _svcj_option in $_svcj_options; do
+			case "$_svcj_option" in
+				mlock)
+					_svcj_cmd_options="allow.mlock ${_svcj_cmd_options}"
+					;;
+				netv4)
+					_svcj_cmd_options="ip4=inherit allow.reserved_ports ${_svcj_cmd_options}"
+					;;
+				netv6)
+					_svcj_cmd_options="ip6=inherit allow.reserved_ports ${_svcj_cmd_options}"
+					;;
+				net_basic)
+					_svcj_cmd_options="ip4=inherit ip6=inherit allow.reserved_ports ${_svcj_cmd_options}"
+					;;
+				net_raw)
+					_svcj_cmd_options="allow.raw_sockets ${_svcj_cmd_options}"
+					;;
+				net_all)
+					_svcj_cmd_options="allow.socket_af allow.raw_sockets allow.reserved_ports ip4=inherit ip6=inherit ${_svcj_cmd_options}"
+					;;
+				nfsd)
+					_svcj_cmd_options="allow.nfsd enforce_statfs=1 ${_svcj_cmd_options}"
+					;;
+				sysvipc)
+					_svcj_cmd_options="sysvmsg=inherit sysvsem=inherit sysvshm=inherit  ${_svcj_cmd_options}"
+					;;
+				vmm)
+					_svcj_cmd_options="allow.vmm ${_svcj_cmd_options}"
+					;;
+				*)
+					echo ${name}: unknown service jail option: $_svcj_option
+					;;
+			esac
+		done
+	fi
+
 	[ -z "$autoboot" ] && eval $_pidcmd	# determine the pid if necessary
 
 	for _elem in $_keywords; do
@@ -1235,9 +1299,49 @@ run_rc_command()
 			if [ -n "$_env" ]; then
 				eval "export -- $_env"
 			fi
-			_run_rc_precmd || return 1
-			_run_rc_doit "$_cpusetcmd $_cmd $rc_extra_args" || return 1
-			_run_rc_postcmd
+
+			if [ "${_rc_svcj}" != jailing ]; then
+				_run_rc_precmd || return 1
+			fi
+			if ! checkyesno ${name}_svcj; then
+				_run_rc_doit "$_cpusetcmd $_cmd $rc_extra_args" || return 1
+			else
+				case "$rc_arg" in
+				start)
+					if [ "${_rc_svcj}" != jailing ]; then
+						_return=1
+						$JAIL_CMD -c $_svcj_generic_params $_svcj_cmd_options \
+						    exec.start="${SERVICE} -E _rc_svcj=jailing ${name} ${_rc_prefix}start $rc_extra_args" \
+						    exec.stop="${SERVICE} -E _rc_svcj=jailing ${name} ${_rc_prefix}stop $rc_extra_args" \
+						    exec.consolelog="/var/log/svcj_${name}_console.log" \
+						    name=svcj-${name} && _return=0
+					else
+						_run_rc_doit "$_cpusetcmd $_cmd $rc_extra_args" || _return=1
+					fi
+					;;
+				stop)
+					if [ "${_rc_svcj}" != jailing ]; then
+						$SERVICE -E _rc_svcj=jailing -j svcj-${name} ${name} ${_rc_prefix}stop $rc_extra_args || _return=1
+						$JAIL_CMD -r svcj-${name} 2>/dev/null
+					else
+						_run_rc_doit "$_cpusetcmd $_cmd $rc_extra_args" || _return=1
+					fi
+					;;
+				restart|status) ;; # no special case needed for svcj or handled somewhere else
+				*)
+					eval _rc_svcj_extra_cmd=\$${name}_${rc_arg}_svcj_enable
+					: ${_rc_svcj_extra_cmd:=NO}
+					if checkyesno _rc_svcj_extra_cmd && [ "${_rc_svcj}" != jailing ]; then
+						$SERVICE -v -E _rc_svcj=jailing -j svcj-${name} ${name} ${_rc_prefix}${rc_arg} $rc_extra_args || _return=1
+					else
+						_run_rc_doit "$_cpusetcmd $_cmd $rc_extra_args" || _return=1
+					fi
+					;;
+				esac
+			fi
+			if [ "${_rc_svcj}" != jailing ]; then
+				_run_rc_postcmd
+			fi
 			return $_return
 		fi
 
@@ -1295,9 +1399,21 @@ run_rc_command()
 				return 1
 			fi
 
-			if ! _run_rc_precmd; then
-				warn "failed precmd routine for ${name}"
-				return 1
+			if [ "${_rc_svcj}" != jailing ]; then
+				if ! _run_rc_precmd; then
+					warn "failed precmd routine for ${name}"
+					return 1
+				fi
+			fi
+
+			if checkyesno ${name}_svcj; then
+				if [ "${_rc_svcj}" != jailing ]; then
+					$JAIL_CMD -c $_svcj_generic_params $_svcj_cmd_options\
+					    exec.start="${SERVICE} -E _rc_svcj=jailing ${name} ${_rc_prefix}start $rc_extra_args" \
+					    exec.stop="${SERVICE} -E _rc_svcj=jailing ${name} ${_rc_prefix}stop $rc_extra_args" \
+					    exec.consolelog="/var/log/svcj_${name}_console.log" \
+					    name=svcj-${name} || return 1
+				fi
 			fi
 
 					# setup the full command to run
@@ -1341,16 +1457,28 @@ $_cpusetcmd $command $rc_flags $command_args"
 					# Prepend default limits
 			_doit="$_cd limits -C $_login_class $_limits $_doit"
 
+
+			local _really_run_it=true
+			if checkyesno ${name}_svcj; then
+				if [ "${_rc_svcj}" != jailing ]; then
+					_really_run_it=false
+				fi
+			fi
+
+			if [ "$_really_run_it" = true ]; then
 					# run the full command
 					#
-			if ! _run_rc_doit "$_doit"; then
-				warn "failed to start ${name}"
-				return 1
+				if ! _run_rc_doit "$_doit"; then
+					warn "failed to start ${name}"
+					return 1
+				fi
 			fi
 
+			if [ "${_rc_svcj}" != jailing ]; then
 					# finally, run postcmd
 					#
-			_run_rc_postcmd
+				_run_rc_postcmd
+			fi
 			;;
 
 		stop)
@@ -1372,6 +1500,11 @@ $_cpusetcmd $command $rc_flags $command_args"
 					# and run postcmd.
 			wait_for_pids $rc_pid
 
+			if checkyesno ${name}_svcj; then
+				# remove service jail
+				$JAIL_CMD -r svcj-${name} 2>/dev/null
+			fi
+
 			_run_rc_postcmd
 			;;
 
diff --git a/share/man/man5/rc.conf.5 b/share/man/man5/rc.conf.5
index 51a48341be60..9f32cfa5ab82 100644
--- a/share/man/man5/rc.conf.5
+++ b/share/man/man5/rc.conf.5
@@ -22,7 +22,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd May 18, 2024
+.Dd May 22, 2024
 .Dt RC.CONF 5
 .Os
 .Sh NAME
@@ -263,6 +263,19 @@ value.
 .It Ao Ar name Ac Ns Va _user
 .Pq Vt str
 Run the service under this user account.
+.It Ao Ar name Ac Ns Va _svcj
+.Pq Vt bool
+If set to
+.Dq Li YES ,
+auto-jail the service with inherited filesystem and other
+jail properties depending on
+.Ao Ar name Ac Ns Va _svcj_options .
+.It Ao Ar name Ac Ns Va _svcj_options
+.Pq Vt str
+A list of jail properties for the service.
+See
+.Sx SERVICE JAILS
+for a list of valid properties.
 .It Va apm_enable
 .Pq Vt bool
 If set to
@@ -392,6 +405,12 @@ is set to
 these are the flags to pass to the
 .Xr powerd 8
 daemon.
+.It Va svcj_all_enable
+Enable auto-jailing of all services which are not explicitly
+excluded.
+See
+.Sx SERVICE JAILS
+for more info.
 .It Va tmpmfs
 Controls the creation of a
 .Pa /tmp
@@ -4917,6 +4936,97 @@ daemon.
 these are the flags to pass to the
 .Xr sendmail 8
 .El
+.Sh SERVICE JAILS
+The service jails part of the rc system automatically puts a service
+into a jail.
+This jail inherits the filesystem and various other parts of the
+parent (if you allow child-jails in your jails, service jails
+can be used in jails) depending on the content of the
+.Ao Ar name Ac Ns Va _svcj_options
+variable.
+Typically this variable is set inside rc scripts, but it can be
+overriden in the rc config.
+Valid options for
+.Ao Ar name Ac Ns Va _svcj_options
+are:
+.Bl -tag -width indent-two
+.It mlock
+Allows to lock memory pages into the physical memory.
+.It netv4
+Inherit the IPv4 address and allows to open reserved ports.
+This can not be combined with
+.Pa netv6 .
+.It netv6
+Inherit the IPv6 address and allows to open reserved ports.
+This can not be combined with
+.Pa netv4 .
+.It net_basic
+Inherits the IPv4 and IPv6 addresses and allows to open
+reserved ports.
+.It net_raw
+Allow to open raw sockets.
+This option can be combined with
+.Pa netv4 ,
+.Pa netv6 ,
+.Pa net_basic .
+.It net_all
+Inherits the IPv4 and IPv6 addresses, allows to open reserved
+ports, allows to open raw sockets, and allows to open sockets
+of protocol stacks that have not had jail functionality added
+to them.
+.It nfsd
+Allows to run nfsd and affiliated daemons.
+.It sysvipc
+Allows access to SysV semaphores, SysV shared memory and
+SysV messages.
+.It vmm
+Allows access to
+.Xr vmm 4 .
+This option is only available when
+.Xr vmm 4
+is enabled in the kernel.
+.El
+
+All non-network options can be combined with all other options.
+
+If the
+.Ao Ar name Ac Ns Va _svcj
+variable is set to
+.Dq Li YES ,
+this particular service is started in a
+service jail named
+.Va svcj- Ns Ar name Ac .
+
+The
+.Va svcj_all_enable
+variable allows to enable service jails for all services of the
+system at once.
+Services which have
+.Ao Ar name Ac Ns Va _svcj
+set to
+.Dq Li NO
+are excluded.
+Some services may set
+.Ao Ar name Ac Ns Va _svcj
+to
+.Dq Li NO
+in the script to either prevent service jails for this
+service at all, or may set it to
+.Dq Li NO
+if it is not set in the
+rc config, to exclude it from
+.Va svcj_all_enable
+but allow to explicitely enable it.
+The sshd service for example would not see other jails, if
+it would run as a service jail.
+This may or may not be what is needed, and as such it is
+excluded from
+.Va svcj_all_enable
+but can be enabled via setting
+.Va sshd_svcj
+to
+.Dq Li YES .
+.El
 .Sh FILES
 .Bl -tag -width "/etc/defaults/rc.conf" -compact
 .It Pa /etc/defaults/rc.conf