git: 5b194a4ae319 - main - MAC/do: Sequential consistency for configuration retrieval
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 29 May 2026 16:01:55 UTC
The branch main has been updated by olce:
URL: https://cgit.FreeBSD.org/src/commit/?id=5b194a4ae3190a9954544058dfc0790fd9a16172
commit 5b194a4ae3190a9954544058dfc0790fd9a16172
Author: Olivier Certner <olce@FreeBSD.org>
AuthorDate: 2026-04-29 17:14:13 +0000
Commit: Olivier Certner <olce@FreeBSD.org>
CommitDate: 2026-05-29 15:34:04 +0000
MAC/do: Sequential consistency for configuration retrieval
Since the inception of mac_do(4), find_conf(), used to retrieve the
applicable configuration, has been weakly consistent with respect to
concurrent modifications to configuration inheritance that influence its
result (and it has been sequentially consistent with respect to other
configuration modifications, which the initial executable paths feature
and introduction of implicit parameters broke and which will be fixed in
a subsequent commit).
Indeed, find_conf() climbs the jail tree to find an applicable
configuration, which is not an atomic operation. It examines the
current jail's configuration pointer for each browsed jail, which does
not prevent concurrent modifications of the configuration pointer for
jails below or above it. Modifications above the current jail are not
a problem, since if climbing needs to continue (i.e., the current jail
inherits), these modifications will be seen if performed before that
check (and may or may not be seen if performed after that check).
However, modifications below the current jail impair sequential
consistency, because they could be done before other modifications at
the current jail or higher up, and the latter could still be picked up
by the rest of the climb, effectively ignoring that the former should
have blocked the climb earlier, making them look as if they had happened
after for the climbing thread.
As a concrete example of this situation, let's examine a scenario where
some jail A is the parent of some jail B, and B inherits its
configuration from A. An administrator may want to relax the rules only
for jail A but not B. To this end, he first copies the current rules on
B over to A and then relaxes the rules on A. He can intuitively and
reasonably expect that changing B's rules first will prevent A's relaxed
rules to leak to threads in B. Unfortunately, that is not the case: As
explained in the previous paragraph, there can be a time window where
threads from B can still pick up A's new configuration just after it has
been installed. This arguably makes changing inheritance in mac_do(4)
in a fully secure fashion almost impossible.
If preserving fine-grained locking of prisons, we could prevent this
problem by having find_conf(), once it has climbed to a non-NULL pointer
(actual, non-inherited configuration), do another climb to check that it
can reach the same configuration on the same jail again. If the new
climb gives another pointer or jail, it could make it the new candidate
and do a climb check again until the situation stabilizes. A climb
check detects whether changes in jails below that of the candidate
configuration object happened, catching in particular such changes that
happened before changes to the candidate object. However, that process
alone would still be subject to ABA problems, and we would additionally
need to tag each prison with some modification timestamp (global, or
local but necessitating allocating memory during the check) to fix them.
In the end, we considered this direction to be unnecessarily complex,
given that configuration changes are to be rare events and most uses
will just be configuration determination.
Consequently, switch protecting jail configurations with a single
read-mostly lock.
While here, adapt set_conf() to accept NULL as the new configuration
object, and have remove_conf() call it, which removes duplicated code.
While here, add a comment explaining why we do not need to take any more
locks when climbing the jail tree.
Reviewed by: bapt
MFC after: 1 month
Sponsored by: The FreeBSD Foundation
Pull Request: https://ron-dev.freebsd.org/FreeBSD/src/pulls/38
---
sys/security/mac_do/mac_do.c | 112 ++++++++++++++++++++++++-------------------
1 file changed, 64 insertions(+), 48 deletions(-)
diff --git a/sys/security/mac_do/mac_do.c b/sys/security/mac_do/mac_do.c
index 4e7a65ae2cae..3775466326f4 100644
--- a/sys/security/mac_do/mac_do.c
+++ b/sys/security/mac_do/mac_do.c
@@ -24,6 +24,7 @@
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/refcount.h>
+#include <sys/rmlock.h>
#include <sys/socket.h>
#include <sys/stdarg.h>
#include <sys/sx.h>
@@ -74,6 +75,8 @@ _Static_assert(MAX_RULE_STRING_SIZE > 0,
_Static_assert(MAX_EXEC_PATHS_SIZE > 0,
"MAX_EXEC_PATHS_SIZE: No space for the NUL terminator!");
+struct rmlock mac_do_rml;
+
static unsigned osd_jail_slot;
static unsigned osd_thread_slot;
@@ -1228,28 +1231,46 @@ drop_conf(struct conf *const conf)
*
* If 'hpr' is not NULL, it is used to return a pointer to the (unlocked) prison
* holding the applicable configuration.
+ *
+ * The find_conf_unlocked() variant needs 'mac_do_rml' to be (read- or write-)
+ * locked. The find_conf() variant will take a read lock for the duration of
+ * the search.
+ *
+ * The configuration returned by this function is sequentially consistent with
+ * other concurrent reads and configuration modifications, even in the presence
+ * of concurrent changes of configurations higher up in the jail tree (whether
+ * they "change" the value of some parameters, install a new configuration where
+ * there wasn't any, breaking inheritance from higher up, or remove an existing
+ * one, establishing inheritance from higher up).
*/
static struct conf *
find_conf(struct prison *const pr, struct prison **const hpr)
{
- struct prison *cpr, *ppr;
+ struct prison *cpr, *ppr; /* Current and parent. */
struct conf *conf;
+ struct rm_priotracker rmpt;
+ rm_rlock(&mac_do_rml, &rmpt);
+ /*
+ * We do not need to take any locks here to climb the prison tree as
+ * either the start prison ('pr') is that of the current thread (and our
+ * ancestors are necessarily stable), or it is a prison passed by the jail
+ * machinery to an OSD method, in which case the prison tree lock is
+ * already being held.
+ */
cpr = pr;
for (;;) {
- prison_lock(cpr);
- conf = osd_jail_get(cpr, osd_jail_slot);
+ conf = osd_jail_get_unlocked(cpr, osd_jail_slot);
if (conf != NULL)
break;
- prison_unlock(cpr);
ppr = cpr->pr_parent;
/*
- * 'prison0' normally always have a mac_do(4) configuration
- * because we installed one on module load/activation and
- * nothing can destroy it as 'prison0' is not a regular jail and
- * the 'mac.do' parameter cannot be set to 'inherit' on it,
- * which is the only way to clear an existing configuration.
+ * 'prison0' always has a mac_do(4) configuration because we
+ * installed one on module load/activation and nothing can
+ * destroy it as 'prison0' is not a regular jail and the
+ * 'mac.do' parameter cannot be set to 'inherit' on it, which is
+ * the only way to clear an existing configuration.
*/
KASSERT(ppr != NULL,
("MAC/do: 'prison0' must always have a configuration."));
@@ -1257,10 +1278,11 @@ find_conf(struct prison *const pr, struct prison **const hpr)
}
hold_conf(conf);
- prison_unlock(cpr);
+ rm_runlock(&mac_do_rml, &rmpt);
if (hpr != NULL)
*hpr = cpr;
+
return (conf);
}
@@ -1308,59 +1330,50 @@ dealloc_jail_osd(void *const value)
}
/*
- * Remove the rules specifically associated to a prison.
- *
- * In practice, this means that the rules become inherited (from the closest
- * ancestor that has some).
+ * Assign an already-built configuration to a jail.
*
- * Destroys the 'osd_jail_slot' slot of the passed jail.
+ * Takes care of write-locking 'mac_do_rm', which should be unlocked on entry
+ * and will be unlocked on exit.
*/
static void
-remove_conf(struct prison *const pr)
+set_conf(struct prison *const pr, struct conf *const conf)
{
+ void **const rsv = conf != NULL ? osd_reserve(osd_jail_slot) : NULL;
struct conf *old_conf;
- int error __unused;
+ int error __diagused;
- prison_lock(pr);
- /*
- * We burden ourselves with extracting rules first instead of just
- * letting osd_jail_del() call dealloc_jail_osd() as we want to
- * decrement their use count, and possibly free them, outside of the
- * prison lock.
- */
- old_conf = osd_jail_get(pr, osd_jail_slot);
- error = osd_jail_set(pr, osd_jail_slot, NULL);
- /* osd_set() never allocates memory when 'value' is NULL, nor fails. */
- MPASS(error == 0);
- /*
- * This completely frees the OSD slot, but doesn't call the destructor
- * since we've just put NULL in the slot.
- */
- osd_jail_del(pr, osd_jail_slot);
- prison_unlock(pr);
+ if (conf != NULL)
+ hold_conf(conf);
+
+ rm_wlock(&mac_do_rml);
+ old_conf = osd_jail_get_unlocked(pr, osd_jail_slot);
+ error = osd_jail_set_reserved(pr, osd_jail_slot, rsv, conf);
+ KASSERT(error == 0, ("MAC/do: osd_jail_set_reserved() failed "
+ "with 'conf' = %p and 'rsv' = %p", conf, rsv));
+ if (conf == NULL)
+ /*
+ * This completely frees the OSD slot, but doesn't call the
+ * destructor since we've just put NULL into the slot.
+ */
+ osd_jail_del(pr, osd_jail_slot);
+ rm_wunlock(&mac_do_rml);
if (old_conf != NULL)
drop_conf(old_conf);
}
/*
- * Assign an already-built configuration to a jail.
+ * Remove the rules specifically associated to a prison.
+ *
+ * In practice, this means that the rules become inherited (from the closest
+ * ancestor that has some).
+ *
+ * Destroys the 'osd_jail_slot' slot of the passed jail.
*/
static void
-set_conf(struct prison *const pr, struct conf *const conf)
+remove_conf(struct prison *const pr)
{
- struct conf *old_conf;
- void **rsv;
-
- hold_conf(conf);
- rsv = osd_reserve(osd_jail_slot);
-
- prison_lock(pr);
- old_conf = osd_jail_get(pr, osd_jail_slot);
- osd_jail_set_reserved(pr, osd_jail_slot, rsv, conf);
- prison_unlock(pr);
- if (old_conf != NULL)
- drop_conf(old_conf);
+ set_conf(pr, NULL);
}
static struct conf *
@@ -2527,6 +2540,8 @@ mac_do_init(struct mac_policy_conf *mpc)
struct conf *const default_conf = new_default_conf();
struct prison *pr;
+ rm_init(&mac_do_rml, "mac_do(4)");
+
osd_jail_slot = osd_jail_register(dealloc_jail_osd, osd_methods);
set_conf(&prison0, default_conf);
sx_slock(&allprison_lock);
@@ -2547,6 +2562,7 @@ mac_do_destroy(struct mac_policy_conf *mpc)
*/
osd_thread_deregister(osd_thread_slot);
osd_jail_deregister(osd_jail_slot);
+ rm_destroy(&mac_do_rml);
}
static struct mac_policy_ops do_ops = {