svn commit: r361993 - in head/sys: kern sys

Mateusz Guzik mjg at FreeBSD.org
Tue Jun 9 23:03:50 UTC 2020


Author: mjg
Date: Tue Jun  9 23:03:48 2020
New Revision: 361993
URL: https://svnweb.freebsd.org/changeset/base/361993

Log:
  cred: distribute reference count per thread
  
  This avoids dirtying creds in the common case, see the comment in kern_prot.c
  for details.
  
  Reviewed by:	kib
  Differential Revision:	https://reviews.freebsd.org/D24007

Modified:
  head/sys/kern/init_main.c
  head/sys/kern/kern_fork.c
  head/sys/kern/kern_prot.c
  head/sys/kern/kern_thread.c
  head/sys/sys/proc.h
  head/sys/sys/ucred.h

Modified: head/sys/kern/init_main.c
==============================================================================
--- head/sys/kern/init_main.c	Tue Jun  9 22:42:54 2020	(r361992)
+++ head/sys/kern/init_main.c	Tue Jun  9 23:03:48 2020	(r361993)
@@ -541,7 +541,10 @@ proc0_init(void *dummy __unused)
 	/* End hack. creds get properly set later with thread_cow_get_proc */
 	curthread->td_ucred = NULL;
 	newcred->cr_prison = &prison0;
+	newcred->cr_users++; /* avoid assertion failure */
 	proc_set_cred_init(p, newcred);
+	newcred->cr_users--;
+	crfree(newcred);
 #ifdef AUDIT
 	audit_cred_kproc0(newcred);
 #endif
@@ -810,8 +813,9 @@ create_init(const void *udata __unused)
 #endif
 	proc_set_cred(initproc, newcred);
 	td = FIRST_THREAD_IN_PROC(initproc);
-	crfree(td->td_ucred);
-	td->td_ucred = crhold(initproc->p_ucred);
+	crcowfree(td);
+	td->td_realucred = crcowget(initproc->p_ucred);
+	td->td_ucred = td->td_realucred;
 	PROC_UNLOCK(initproc);
 	sx_xunlock(&proctree_lock);
 	crfree(oldcred);

Modified: head/sys/kern/kern_fork.c
==============================================================================
--- head/sys/kern/kern_fork.c	Tue Jun  9 22:42:54 2020	(r361992)
+++ head/sys/kern/kern_fork.c	Tue Jun  9 23:03:48 2020	(r361993)
@@ -961,7 +961,7 @@ fork1(struct thread *td, struct fork_req *fr)
 	 * XXX: This is ugly; when we copy resource usage, we need to bump
 	 *      per-cred resource counters.
 	 */
-	proc_set_cred_init(newproc, crhold(td->td_ucred));
+	proc_set_cred_init(newproc, td->td_ucred);
 
 	/*
 	 * Initialize resource accounting for the child process.
@@ -998,8 +998,7 @@ fail0:
 #endif
 	racct_proc_exit(newproc);
 fail1:
-	crfree(newproc->p_ucred);
-	newproc->p_ucred = NULL;
+	proc_unset_cred(newproc);
 fail2:
 	if (vm2 != NULL)
 		vmspace_free(vm2);

Modified: head/sys/kern/kern_prot.c
==============================================================================
--- head/sys/kern/kern_prot.c	Tue Jun  9 22:42:54 2020	(r361992)
+++ head/sys/kern/kern_prot.c	Tue Jun  9 23:03:48 2020	(r361993)
@@ -1841,6 +1841,98 @@ p_canwait(struct thread *td, struct proc *p)
 }
 
 /*
+ * Credential management.
+ *
+ * struct ucred objects are rarely allocated but gain and lose references all
+ * the time (e.g., on struct file alloc/dealloc) turning refcount updates into
+ * a significant source of cache-line ping ponging. Common cases are worked
+ * around by modifying thread-local counter instead if the cred to operate on
+ * matches td_realucred.
+ *
+ * The counter is split into 2 parts:
+ * - cr_users -- total count of all struct proc and struct thread objects
+ *   which have given cred in p_ucred and td_ucred respectively
+ * - cr_ref -- the actual ref count, only valid if cr_users == 0
+ *
+ * If users == 0 then cr_ref behaves similarly to refcount(9), in particular if
+ * the count reaches 0 the object is freeable.
+ * If users > 0 and curthread->td_realucred == cred, then updates are performed
+ * against td_ucredref.
+ * In other cases updates are performed against cr_ref.
+ *
+ * Changing td_realucred into something else decrements cr_users and transfers
+ * accumulated updates.
+ */
+struct ucred *
+crcowget(struct ucred *cr)
+{
+
+	mtx_lock(&cr->cr_mtx);
+	KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
+	    __func__, cr->cr_users, cr));
+	cr->cr_users++;
+	cr->cr_ref++;
+	mtx_unlock(&cr->cr_mtx);
+	return (cr);
+}
+
+static struct ucred *
+crunuse(struct thread *td)
+{
+	struct ucred *cr, *crold;
+
+	cr = td->td_ucred;
+	mtx_lock(&cr->cr_mtx);
+	cr->cr_ref += td->td_ucredref;
+	td->td_ucredref = 0;
+	KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
+	    __func__, cr->cr_users, cr));
+	cr->cr_users--;
+	if (cr->cr_users == 0) {
+		KASSERT(cr->cr_ref > 0, ("%s: ref %d not > 0 on cred %p",
+		    __func__, cr->cr_ref, cr));
+		crold = cr;
+	} else {
+		cr->cr_ref--;
+		crold = NULL;
+	}
+	mtx_unlock(&cr->cr_mtx);
+	return (crold);
+}
+
+void
+crcowfree(struct thread *td)
+{
+	struct ucred *cr;
+
+	cr = crunuse(td);
+	if (cr != NULL)
+		crfree(cr);
+}
+
+struct ucred *
+crcowsync(void)
+{
+	struct thread *td;
+	struct proc *p;
+	struct ucred *crnew, *crold;
+
+	td = curthread;
+	p = td->td_proc;
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+
+	MPASS(td->td_realucred == td->td_ucred);
+	if (td->td_realucred == p->p_ucred)
+		return (NULL);
+
+	crnew = crcowget(p->p_ucred);
+	crold = crunuse(td);
+	td->td_realucred = crnew;
+	td->td_ucred = td->td_realucred;
+	return (crold);
+}
+
+/*
  * Allocate a zeroed cred structure.
  */
 struct ucred *
@@ -1849,7 +1941,8 @@ crget(void)
 	struct ucred *cr;
 
 	cr = malloc(sizeof(*cr), M_CRED, M_WAITOK | M_ZERO);
-	refcount_init(&cr->cr_ref, 1);
+	mtx_init(&cr->cr_mtx, "cred", NULL, MTX_DEF);
+	cr->cr_ref = 1;
 #ifdef AUDIT
 	audit_cred_init(cr);
 #endif
@@ -1868,8 +1961,18 @@ crget(void)
 struct ucred *
 crhold(struct ucred *cr)
 {
+	struct thread *td;
 
-	refcount_acquire(&cr->cr_ref);
+	td = curthread;
+	if (__predict_true(td->td_realucred == cr)) {
+		KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
+		    __func__, cr->cr_users, cr));
+		td->td_ucredref++;
+		return (cr);
+	}
+	mtx_lock(&cr->cr_mtx);
+	cr->cr_ref++;
+	mtx_unlock(&cr->cr_mtx);
 	return (cr);
 }
 
@@ -1879,36 +1982,51 @@ crhold(struct ucred *cr)
 void
 crfree(struct ucred *cr)
 {
+	struct thread *td;
 
-	KASSERT(cr->cr_ref > 0, ("bad ucred refcount: %d", cr->cr_ref));
-	KASSERT(cr->cr_ref != 0xdeadc0de, ("dangling reference to ucred"));
-	if (refcount_release(&cr->cr_ref)) {
-		/*
-		 * Some callers of crget(), such as nfs_statfs(),
-		 * allocate a temporary credential, but don't
-		 * allocate a uidinfo structure.
-		 */
-		if (cr->cr_uidinfo != NULL)
-			uifree(cr->cr_uidinfo);
-		if (cr->cr_ruidinfo != NULL)
-			uifree(cr->cr_ruidinfo);
-		/*
-		 * Free a prison, if any.
-		 */
-		if (cr->cr_prison != NULL)
-			prison_free(cr->cr_prison);
-		if (cr->cr_loginclass != NULL)
-			loginclass_free(cr->cr_loginclass);
+	td = curthread;
+	if (td->td_realucred == cr) {
+		KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
+		    __func__, cr->cr_users, cr));
+		td->td_ucredref--;
+		return;
+	}
+	mtx_lock(&cr->cr_mtx);
+	KASSERT(cr->cr_users >= 0, ("%s: users %d not >= 0 on cred %p",
+	    __func__, cr->cr_users, cr));
+	cr->cr_ref--;
+	if (cr->cr_users > 0) {
+		mtx_unlock(&cr->cr_mtx);
+		return;
+	}
+	KASSERT(cr->cr_ref >= 0, ("%s: ref %d not >= 0 on cred %p",
+	    __func__, cr->cr_ref, cr));
+	if (cr->cr_ref > 0) {
+		mtx_unlock(&cr->cr_mtx);
+		return;
+	}
+	/*
+	 * Some callers of crget(), such as nfs_statfs(), allocate a temporary
+	 * credential, but don't allocate a uidinfo structure.
+	 */
+	if (cr->cr_uidinfo != NULL)
+		uifree(cr->cr_uidinfo);
+	if (cr->cr_ruidinfo != NULL)
+		uifree(cr->cr_ruidinfo);
+	if (cr->cr_prison != NULL)
+		prison_free(cr->cr_prison);
+	if (cr->cr_loginclass != NULL)
+		loginclass_free(cr->cr_loginclass);
 #ifdef AUDIT
-		audit_cred_destroy(cr);
+	audit_cred_destroy(cr);
 #endif
 #ifdef MAC
-		mac_cred_destroy(cr);
+	mac_cred_destroy(cr);
 #endif
-		if (cr->cr_groups != cr->cr_smallgroups)
-			free(cr->cr_groups, M_CRED);
-		free(cr, M_CRED);
-	}
+	mtx_destroy(&cr->cr_mtx);
+	if (cr->cr_groups != cr->cr_smallgroups)
+		free(cr->cr_groups, M_CRED);
+	free(cr, M_CRED);
 }
 
 /*
@@ -1982,7 +2100,7 @@ void
 proc_set_cred_init(struct proc *p, struct ucred *newcred)
 {
 
-	p->p_ucred = newcred;
+	p->p_ucred = crcowget(newcred);
 }
 
 /*
@@ -1998,10 +2116,20 @@ proc_set_cred_init(struct proc *p, struct ucred *newcr
 void
 proc_set_cred(struct proc *p, struct ucred *newcred)
 {
+	struct ucred *cr;
 
-	MPASS(p->p_ucred != NULL);
+	cr = p->p_ucred;
+	MPASS(cr != NULL);
 	PROC_LOCK_ASSERT(p, MA_OWNED);
+	KASSERT(newcred->cr_users == 0, ("%s: users %d not 0 on cred %p",
+	    __func__, newcred->cr_users, newcred));
+	mtx_lock(&cr->cr_mtx);
+	KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
+	    __func__, cr->cr_users, cr));
+	cr->cr_users--;
+	mtx_unlock(&cr->cr_mtx);
 	p->p_ucred = newcred;
+	newcred->cr_users = 1;
 	PROC_UPDATE_COW(p);
 }
 
@@ -2010,9 +2138,17 @@ proc_unset_cred(struct proc *p)
 {
 	struct ucred *cr;
 
-	MPASS(p->p_state == PRS_ZOMBIE);
+	MPASS(p->p_state == PRS_ZOMBIE || p->p_state == PRS_NEW);
 	cr = p->p_ucred;
 	p->p_ucred = NULL;
+	KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
+	    __func__, cr->cr_users, cr));
+	mtx_lock(&cr->cr_mtx);
+	cr->cr_users--;
+	if (cr->cr_users == 0)
+		KASSERT(cr->cr_ref > 0, ("%s: ref %d not > 0 on cred %p",
+		    __func__, cr->cr_ref, cr));
+	mtx_unlock(&cr->cr_mtx);
 	crfree(cr);
 }
 

Modified: head/sys/kern/kern_thread.c
==============================================================================
--- head/sys/kern/kern_thread.c	Tue Jun  9 22:42:54 2020	(r361992)
+++ head/sys/kern/kern_thread.c	Tue Jun  9 23:03:48 2020	(r361993)
@@ -82,9 +82,9 @@ _Static_assert(offsetof(struct thread, td_flags) == 0x
     "struct thread KBI td_flags");
 _Static_assert(offsetof(struct thread, td_pflags) == 0x104,
     "struct thread KBI td_pflags");
-_Static_assert(offsetof(struct thread, td_frame) == 0x498,
+_Static_assert(offsetof(struct thread, td_frame) == 0x4a8,
     "struct thread KBI td_frame");
-_Static_assert(offsetof(struct thread, td_emuldata) == 0x6a0,
+_Static_assert(offsetof(struct thread, td_emuldata) == 0x6b0,
     "struct thread KBI td_emuldata");
 _Static_assert(offsetof(struct proc, p_flag) == 0xb0,
     "struct proc KBI p_flag");
@@ -102,9 +102,9 @@ _Static_assert(offsetof(struct thread, td_flags) == 0x
     "struct thread KBI td_flags");
 _Static_assert(offsetof(struct thread, td_pflags) == 0xa0,
     "struct thread KBI td_pflags");
-_Static_assert(offsetof(struct thread, td_frame) == 0x2fc,
+_Static_assert(offsetof(struct thread, td_frame) == 0x304,
     "struct thread KBI td_frame");
-_Static_assert(offsetof(struct thread, td_emuldata) == 0x340,
+_Static_assert(offsetof(struct thread, td_emuldata) == 0x348,
     "struct thread KBI td_emuldata");
 _Static_assert(offsetof(struct proc, p_flag) == 0x68,
     "struct proc KBI p_flag");
@@ -463,7 +463,8 @@ thread_cow_get_proc(struct thread *newtd, struct proc 
 {
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
-	newtd->td_ucred = crhold(p->p_ucred);
+	newtd->td_realucred = crcowget(p->p_ucred);
+	newtd->td_ucred = newtd->td_realucred;
 	newtd->td_limit = lim_hold(p->p_limit);
 	newtd->td_cowgen = p->p_cowgen;
 }
@@ -472,7 +473,9 @@ void
 thread_cow_get(struct thread *newtd, struct thread *td)
 {
 
-	newtd->td_ucred = crhold(td->td_ucred);
+	MPASS(td->td_realucred == td->td_ucred);
+	newtd->td_realucred = crcowget(td->td_realucred);
+	newtd->td_ucred = newtd->td_realucred;
 	newtd->td_limit = lim_hold(td->td_limit);
 	newtd->td_cowgen = td->td_cowgen;
 }
@@ -481,8 +484,8 @@ void
 thread_cow_free(struct thread *td)
 {
 
-	if (td->td_ucred != NULL)
-		crfree(td->td_ucred);
+	if (td->td_realucred != NULL)
+		crcowfree(td);
 	if (td->td_limit != NULL)
 		lim_free(td->td_limit);
 }
@@ -495,13 +498,9 @@ thread_cow_update(struct thread *td)
 	struct plimit *oldlimit;
 
 	p = td->td_proc;
-	oldcred = NULL;
 	oldlimit = NULL;
 	PROC_LOCK(p);
-	if (td->td_ucred != p->p_ucred) {
-		oldcred = td->td_ucred;
-		td->td_ucred = crhold(p->p_ucred);
-	}
+	oldcred = crcowsync();
 	if (td->td_limit != p->p_limit) {
 		oldlimit = td->td_limit;
 		td->td_limit = lim_hold(p->p_limit);

Modified: head/sys/sys/proc.h
==============================================================================
--- head/sys/sys/proc.h	Tue Jun  9 22:42:54 2020	(r361992)
+++ head/sys/sys/proc.h	Tue Jun  9 23:03:48 2020	(r361993)
@@ -267,7 +267,8 @@ struct thread {
 	struct lock_list_entry *td_sleeplocks; /* (k) Held sleep locks. */
 	int		td_intr_nesting_level; /* (k) Interrupt recursion. */
 	int		td_pinned;	/* (k) Temporary cpu pin count. */
-	struct ucred	*td_ucred;	/* (k) Reference to credentials. */
+	struct ucred	*td_realucred;	/* (k) Reference to credentials. */
+	struct ucred	*td_ucred;	/* (k) Used credentials, temporarily switchable. */
 	struct plimit	*td_limit;	/* (k) Resource limits. */
 	int		td_slptick;	/* (t) Time at sleep. */
 	int		td_blktick;	/* (t) Time spent blocked. */
@@ -305,6 +306,7 @@ struct thread {
 	int		td_errno;	/* (k) Error from last syscall. */
 	size_t		td_vslock_sz;	/* (k) amount of vslock-ed space */
 	struct kcov_info *td_kcov_info;	/* (*) Kernel code coverage data */
+	u_int		td_ucredref;	/* (k) references on td_realucred */
 #define	td_endzero td_sigmask
 
 /* Copied during fork1() or create_thread(). */

Modified: head/sys/sys/ucred.h
==============================================================================
--- head/sys/sys/ucred.h	Tue Jun  9 22:42:54 2020	(r361992)
+++ head/sys/sys/ucred.h	Tue Jun  9 23:03:48 2020	(r361993)
@@ -35,6 +35,10 @@
 #ifndef _SYS_UCRED_H_
 #define	_SYS_UCRED_H_
 
+#if defined(_KERNEL) || defined(_WANT_UCRED)
+#include <sys/_lock.h>
+#include <sys/_mutex.h>
+#endif
 #include <bsm/audit.h>
 
 struct loginclass;
@@ -46,10 +50,19 @@ struct loginclass;
  *
  * Please do not inspect cr_uid directly to determine superuserness.  The
  * priv(9) interface should be used to check for privilege.
+ *
+ * Lock reference:
+ *      c - cr_mtx
+ *
+ * Unmarked fields are constant after creation.
+ *
+ * See "Credential management" comment in kern_prot.c for more information.
  */
 #if defined(_KERNEL) || defined(_WANT_UCRED)
 struct ucred {
-	u_int	cr_ref;			/* reference count */
+	struct mtx cr_mtx;
+	u_int	cr_ref;			/* (c) reference count */
+	u_int	cr_users;		/* (c) proc + thread using this cred */
 #define	cr_startcopy cr_uid
 	uid_t	cr_uid;			/* effective user id */
 	uid_t	cr_ruid;		/* real user id */
@@ -115,8 +128,11 @@ void	proc_set_cred_init(struct proc *p, struct ucred *
 void	proc_set_cred(struct proc *p, struct ucred *cr);
 void	proc_unset_cred(struct proc *p);
 void	crfree(struct ucred *cr);
+struct ucred	*crcowsync(void);
 struct ucred	*crget(void);
 struct ucred	*crhold(struct ucred *cr);
+struct ucred	*crcowget(struct ucred *cr);
+void	crcowfree(struct thread *td);
 void	cru2x(struct ucred *cr, struct xucred *xcr);
 void	cru2xt(struct thread *td, struct xucred *xcr);
 void	crsetgroups(struct ucred *cr, int n, gid_t *groups);


More information about the svn-src-all mailing list