svn commit: r357614 - in head/sys: kern sys

Pawel Biernacki kaktus at FreeBSD.org
Thu Feb 6 12:45:59 UTC 2020


Author: kaktus
Date: Thu Feb  6 12:45:58 2020
New Revision: 357614
URL: https://svnweb.freebsd.org/changeset/base/357614

Log:
  sysctl(9): add CTLFLAG_NEEDGIANT flag
  
  Add CTLFLAG_NEEDGIANT flag (modelled after D_NEEDGIANT) that will be used to
  mark sysctls that still require locking Giant.
  
  Rewrite sysctl_handle_string() to use internal locking instead of locking
  Giant.
  
  Mark SYSCTL_STRING, SYSCTL_OPAQUE and their variants as MPSAFE.
  
  Add infrastructure support for enforcing proper use of CTLFLAG_NEEDGIANT
  and CTLFLAG_MPSAFE flags with SYSCTL_PROC and SYSCTL_NODE, not enabled yet.
  
  Reviewed by:	kib (mentor)
  Approved by:	kib (mentor)
  Differential Revision:	https://reviews.freebsd.org/D23378

Modified:
  head/sys/kern/kern_sysctl.c
  head/sys/sys/sysctl.h

Modified: head/sys/kern/kern_sysctl.c
==============================================================================
--- head/sys/kern/kern_sysctl.c	Thu Feb  6 10:11:41 2020	(r357613)
+++ head/sys/kern/kern_sysctl.c	Thu Feb  6 12:45:58 2020	(r357614)
@@ -96,9 +96,13 @@ static MALLOC_DEFINE(M_SYSCTLTMP, "sysctltmp", "sysctl
  * The sysctlmemlock is used to limit the amount of user memory wired for
  * sysctl requests.  This is implemented by serializing any userland
  * sysctl requests larger than a single page via an exclusive lock.
+ *
+ * The sysctlstringlock is used to protect concurrent access to writable
+ * string nodes in sysctl_handle_string().
  */
 static struct rmlock sysctllock;
 static struct sx __exclusive_cache_line sysctlmemlock;
+static struct sx sysctlstringlock;
 
 #define	SYSCTL_WLOCK()		rm_wlock(&sysctllock)
 #define	SYSCTL_WUNLOCK()	rm_wunlock(&sysctllock)
@@ -170,10 +174,16 @@ sysctl_root_handler_locked(struct sysctl_oid *oid, voi
 	else
 		SYSCTL_WUNLOCK();
 
-	if (!(oid->oid_kind & CTLFLAG_MPSAFE))
+	/*
+	 * Treat set CTLFLAG_NEEDGIANT and unset CTLFLAG_MPSAFE flags the same,
+	 * untill we're ready to remove all traces of Giant from sysctl(9).
+	 */
+	if ((oid->oid_kind & CTLFLAG_NEEDGIANT) ||
+	    (!(oid->oid_kind & CTLFLAG_MPSAFE)))
 		mtx_lock(&Giant);
 	error = oid->oid_handler(oid, arg1, arg2, req);
-	if (!(oid->oid_kind & CTLFLAG_MPSAFE))
+	if ((oid->oid_kind & CTLFLAG_NEEDGIANT) ||
+	    (!(oid->oid_kind & CTLFLAG_MPSAFE)))
 		mtx_unlock(&Giant);
 
 	KFAIL_POINT_ERROR(_debug_fail_point, sysctl_running, error);
@@ -917,6 +927,7 @@ sysctl_register_all(void *arg)
 	struct sysctl_oid **oidp;
 
 	sx_init(&sysctlmemlock, "sysctl mem");
+	sx_init(&sysctlstringlock, "sysctl string handler");
 	SYSCTL_INIT();
 	SYSCTL_WLOCK();
 	SET_FOREACH(oidp, sysctl_set)
@@ -1632,48 +1643,69 @@ sysctl_handle_64(SYSCTL_HANDLER_ARGS)
 int
 sysctl_handle_string(SYSCTL_HANDLER_ARGS)
 {
+	char *tmparg;
 	size_t outlen;
 	int error = 0, ro_string = 0;
 
 	/*
+	 * If the sysctl isn't writable, microoptimise and treat it as a
+	 * const string.
 	 * A zero-length buffer indicates a fixed size read-only
 	 * string.  In ddb, don't worry about trying to make a malloced
 	 * snapshot.
 	 */
-	if (arg2 == 0 || kdb_active) {
+	if (!(oidp->oid_kind & CTLFLAG_WR) || arg2 == 0 || kdb_active) {
 		arg2 = strlen((char *)arg1) + 1;
 		ro_string = 1;
 	}
 
 	if (req->oldptr != NULL) {
-		char *tmparg;
-
 		if (ro_string) {
 			tmparg = arg1;
+			outlen = strlen(tmparg) + 1;
 		} else {
-			/* try to make a coherent snapshot of the string */
 			tmparg = malloc(arg2, M_SYSCTLTMP, M_WAITOK);
+			sx_slock(&sysctlstringlock);
 			memcpy(tmparg, arg1, arg2);
+			sx_sunlock(&sysctlstringlock);
+			outlen = strlen(tmparg) + 1;
 		}
 
-		outlen = strnlen(tmparg, arg2 - 1) + 1;
 		error = SYSCTL_OUT(req, tmparg, outlen);
 
 		if (!ro_string)
 			free(tmparg, M_SYSCTLTMP);
 	} else {
-		outlen = strnlen((char *)arg1, arg2 - 1) + 1;
+		if (!ro_string)
+			sx_slock(&sysctlstringlock);
+		outlen = strlen((char *)arg1) + 1;
+		if (!ro_string)
+			sx_sunlock(&sysctlstringlock);
 		error = SYSCTL_OUT(req, NULL, outlen);
 	}
 	if (error || !req->newptr)
 		return (error);
 
-	if ((req->newlen - req->newidx) >= arg2) {
+	if (req->newlen - req->newidx >= arg2 ||
+	    req->newlen - req->newidx <= 0) {
 		error = EINVAL;
 	} else {
-		arg2 = (req->newlen - req->newidx);
-		error = SYSCTL_IN(req, arg1, arg2);
+		arg2 = req->newlen - req->newidx;
+		tmparg = malloc(arg2, M_SYSCTLTMP, M_WAITOK);
+
+		error = copyin((const char *)req->newptr + req->newidx,
+		    tmparg, arg2);
+		if (error) {
+			free(tmparg, M_SYSCTLTMP);
+			return (error);
+		}
+
+		sx_xlock(&sysctlstringlock);
+		memcpy(arg1, tmparg, arg2);
 		((char *)arg1)[arg2] = '\0';
+		sx_xunlock(&sysctlstringlock);
+		free(tmparg, M_SYSCTLTMP);
+		req->newidx += arg2;
 	}
 	return (error);
 }

Modified: head/sys/sys/sysctl.h
==============================================================================
--- head/sys/sys/sysctl.h	Thu Feb  6 10:11:41 2020	(r357613)
+++ head/sys/sys/sysctl.h	Thu Feb  6 12:45:58 2020	(r357614)
@@ -105,6 +105,13 @@ struct ctlname {
 #define	CTLFLAG_STATS	0x00002000	/* Statistics, not a tuneable */
 #define	CTLFLAG_NOFETCH	0x00001000	/* Don't fetch tunable from getenv() */
 #define	CTLFLAG_CAPRW	(CTLFLAG_CAPRD|CTLFLAG_CAPWR)
+/*
+ * This is transient flag to be used until all sysctl handlers are converted
+ * to not lock Giant.
+ * One, and only one of CTLFLAG_MPSAFE or CTLFLAG_NEEDGIANT is required
+ * for SYSCTL_PROC and SYSCTL_NODE.
+ */
+#define	CTLFLAG_NEEDGIANT 0x00000800	/* Handler require Giant */
 
 /*
  * Secure level.   Note that CTLFLAG_SECURE == CTLFLAG_SECURE1.
@@ -263,6 +270,14 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
 #define	__DESCR(d) ""
 #endif
 
+#ifdef notyet
+#define	SYSCTL_ENFORCE_FLAGS(x)						\
+    _Static_assert(((CTLFLAG_MPSAFE ^ CTLFLAG_NEEDGIANT) & (x)),	\
+        "Has to be either CTLFLAG_MPSAFE or CTLFLAG_NEEDGIANT")
+#else
+#define	SYSCTL_ENFORCE_FLAGS(x)
+#endif
+
 /* This macro is only for internal use */
 #define	SYSCTL_OID_RAW(id, parent_child_head, nbr, name, kind, a1, a2, handler, fmt, descr, label) \
 	struct sysctl_oid id = {					\
@@ -278,7 +293,8 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
 		.oid_descr = __DESCR(descr),				\
 		.oid_label = (label),					\
 	};								\
-	DATA_SET(sysctl_set, id)
+	DATA_SET(sysctl_set, id);					\
+	SYSCTL_ENFORCE_FLAGS(kind)
 
 /* This constructs a static "raw" MIB oid. */
 #define	SYSCTL_OID(parent, nbr, name, kind, a1, a2, handler, fmt, descr) \
@@ -297,7 +313,11 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
 	nbr, #name, kind, a1, a2, handler, fmt, descr, label)
 
 #define	SYSCTL_ADD_OID(ctx, parent, nbr, name, kind, a1, a2, handler, fmt, descr) \
-	sysctl_add_oid(ctx, parent, nbr, name, kind, a1, a2, handler, fmt, __DESCR(descr), NULL)
+({									\
+	SYSCTL_ENFORCE_FLAGS(kind);					\
+	sysctl_add_oid(ctx, parent, nbr, name, kind, a1, a2,handler,	\
+	    fmt, __DESCR(descr), NULL);					\
+})
 
 /* This constructs a root node from which other nodes can hang. */
 #define	SYSCTL_ROOT_NODE(nbr, name, access, handler, descr)	\
@@ -325,6 +345,7 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
 ({									\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_NODE);	\
+	SYSCTL_ENFORCE_FLAGS(access);					\
 	sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_NODE|(access),	\
 	    NULL, 0, handler, "N", __DESCR(descr), label);		\
 })
@@ -333,6 +354,7 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
 ({									\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_NODE);	\
+	SYSCTL_ENFORCE_FLAGS(access);					\
 	sysctl_add_oid(ctx, &sysctl__children, nbr, name,		\
 	    CTLTYPE_NODE|(access),					\
 	    NULL, 0, handler, "N", __DESCR(descr), NULL);		\
@@ -340,7 +362,8 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
 
 /* Oid for a string.  len can be 0 to indicate '\0' termination. */
 #define	SYSCTL_STRING(parent, nbr, name, access, arg, len, descr)	\
-	SYSCTL_OID(parent, nbr, name, CTLTYPE_STRING|(access),		\
+	SYSCTL_OID(parent, nbr, name,					\
+	    CTLTYPE_STRING | CTLFLAG_MPSAFE | (access),			\
 	    arg, len, sysctl_handle_string, "A", descr);		\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_STRING)
@@ -350,7 +373,8 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
 	char *__arg = (arg);						\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_STRING);	\
-	sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_STRING|(access),	\
+	sysctl_add_oid(ctx, parent, nbr, name,				\
+	    CTLTYPE_STRING | CTLFLAG_MPSAFE | (access),			\
 	    __arg, len, sysctl_handle_string, "A", __DESCR(descr),	\
 	    NULL); \
 })
@@ -741,7 +765,8 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
 
 /* Oid for an opaque object.  Specified by a pointer and a length. */
 #define	SYSCTL_OPAQUE(parent, nbr, name, access, ptr, len, fmt, descr)	\
-	SYSCTL_OID(parent, nbr, name, CTLTYPE_OPAQUE|(access),		\
+	SYSCTL_OID(parent, nbr, name,					\
+	    CTLTYPE_OPAQUE | CTLFLAG_MPSAFE | (access),			\
 	    ptr, len, sysctl_handle_opaque, fmt, descr);		\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_OPAQUE)
@@ -750,13 +775,15 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
 ({									\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_OPAQUE);	\
-	sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_OPAQUE|(access),	\
+	sysctl_add_oid(ctx, parent, nbr, name,				\
+	    CTLTYPE_OPAQUE | CTLFLAG_MPSAFE | (access),			\
 	    ptr, len, sysctl_handle_opaque, fmt, __DESCR(descr), NULL);	\
 })
 
 /* Oid for a struct.  Specified by a pointer and a type. */
 #define	SYSCTL_STRUCT(parent, nbr, name, access, ptr, type, descr)	\
-	SYSCTL_OID(parent, nbr, name, CTLTYPE_OPAQUE|(access),		\
+	SYSCTL_OID(parent, nbr, name,					\
+	    CTLTYPE_OPAQUE | CTLFLAG_MPSAFE | (access),			\
 	    ptr, sizeof(struct type), sysctl_handle_opaque,		\
 	    "S," #type, descr);						\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
@@ -766,7 +793,8 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
 ({									\
 	CTASSERT(((access) & CTLTYPE) == 0 ||				\
 	    ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_OPAQUE);	\
-	sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_OPAQUE|(access),	\
+	sysctl_add_oid(ctx, parent, nbr, name,				\
+	    CTLTYPE_OPAQUE | CTLFLAG_MPSAFE | (access),			\
 	    (ptr), sizeof(struct type),					\
 	    sysctl_handle_opaque, "S," #type, __DESCR(descr), NULL);	\
 })
@@ -780,6 +808,7 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
 #define	SYSCTL_ADD_PROC(ctx, parent, nbr, name, access, ptr, arg, handler, fmt, descr) \
 ({									\
 	CTASSERT(((access) & CTLTYPE) != 0);				\
+	SYSCTL_ENFORCE_FLAGS(access);					\
 	sysctl_add_oid(ctx, parent, nbr, name, (access),		\
 	    (ptr), (arg), (handler), (fmt), __DESCR(descr), NULL);	\
 })


More information about the svn-src-all mailing list