Page MenuHomeFreeBSD

D24007.diff
No OneTemporary

D24007.diff

Index: head/sys/kern/init_main.c
===================================================================
--- head/sys/kern/init_main.c
+++ head/sys/kern/init_main.c
@@ -541,7 +541,10 @@
/* End hack. creds get properly set later with thread_cow_get_proc */
curthread->td_ucred = NULL;
newcred->cr_prison = &prison0;
+ newcred->cr_users++; /* avoid assertion failure */
proc_set_cred_init(p, newcred);
+ newcred->cr_users--;
+ crfree(newcred);
#ifdef AUDIT
audit_cred_kproc0(newcred);
#endif
@@ -810,8 +813,9 @@
#endif
proc_set_cred(initproc, newcred);
td = FIRST_THREAD_IN_PROC(initproc);
- crfree(td->td_ucred);
- td->td_ucred = crhold(initproc->p_ucred);
+ crcowfree(td);
+ td->td_realucred = crcowget(initproc->p_ucred);
+ td->td_ucred = td->td_realucred;
PROC_UNLOCK(initproc);
sx_xunlock(&proctree_lock);
crfree(oldcred);
Index: head/sys/kern/kern_fork.c
===================================================================
--- head/sys/kern/kern_fork.c
+++ head/sys/kern/kern_fork.c
@@ -961,7 +961,7 @@
* XXX: This is ugly; when we copy resource usage, we need to bump
* per-cred resource counters.
*/
- proc_set_cred_init(newproc, crhold(td->td_ucred));
+ proc_set_cred_init(newproc, td->td_ucred);
/*
* Initialize resource accounting for the child process.
@@ -998,8 +998,7 @@
#endif
racct_proc_exit(newproc);
fail1:
- crfree(newproc->p_ucred);
- newproc->p_ucred = NULL;
+ proc_unset_cred(newproc);
fail2:
if (vm2 != NULL)
vmspace_free(vm2);
Index: head/sys/kern/kern_prot.c
===================================================================
--- head/sys/kern/kern_prot.c
+++ head/sys/kern/kern_prot.c
@@ -1841,6 +1841,98 @@
}
/*
+ * Credential management.
+ *
+ * struct ucred objects are rarely allocated but gain and lose references all
+ * the time (e.g., on struct file alloc/dealloc) turning refcount updates into
+ * a significant source of cache-line ping ponging. Common cases are worked
+ * around by modifying thread-local counter instead if the cred to operate on
+ * matches td_realucred.
+ *
+ * The counter is split into 2 parts:
+ * - cr_users -- total count of all struct proc and struct thread objects
+ * which have given cred in p_ucred and td_ucred respectively
+ * - cr_ref -- the actual ref count, only valid if cr_users == 0
+ *
+ * If users == 0 then cr_ref behaves similarly to refcount(9), in particular if
+ * the count reaches 0 the object is freeable.
+ * If users > 0 and curthread->td_realucred == cred, then updates are performed
+ * against td_ucredref.
+ * In other cases updates are performed against cr_ref.
+ *
+ * Changing td_realucred into something else decrements cr_users and transfers
+ * accumulated updates.
+ */
+struct ucred *
+crcowget(struct ucred *cr)
+{
+
+ mtx_lock(&cr->cr_mtx);
+ KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
+ __func__, cr->cr_users, cr));
+ cr->cr_users++;
+ cr->cr_ref++;
+ mtx_unlock(&cr->cr_mtx);
+ return (cr);
+}
+
+static struct ucred *
+crunuse(struct thread *td)
+{
+ struct ucred *cr, *crold;
+
+ cr = td->td_ucred;
+ mtx_lock(&cr->cr_mtx);
+ cr->cr_ref += td->td_ucredref;
+ td->td_ucredref = 0;
+ KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
+ __func__, cr->cr_users, cr));
+ cr->cr_users--;
+ if (cr->cr_users == 0) {
+ KASSERT(cr->cr_ref > 0, ("%s: ref %d not > 0 on cred %p",
+ __func__, cr->cr_ref, cr));
+ crold = cr;
+ } else {
+ cr->cr_ref--;
+ crold = NULL;
+ }
+ mtx_unlock(&cr->cr_mtx);
+ return (crold);
+}
+
+void
+crcowfree(struct thread *td)
+{
+ struct ucred *cr;
+
+ cr = crunuse(td);
+ if (cr != NULL)
+ crfree(cr);
+}
+
+struct ucred *
+crcowsync(void)
+{
+ struct thread *td;
+ struct proc *p;
+ struct ucred *crnew, *crold;
+
+ td = curthread;
+ p = td->td_proc;
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+
+ MPASS(td->td_realucred == td->td_ucred);
+ if (td->td_realucred == p->p_ucred)
+ return (NULL);
+
+ crnew = crcowget(p->p_ucred);
+ crold = crunuse(td);
+ td->td_realucred = crnew;
+ td->td_ucred = td->td_realucred;
+ return (crold);
+}
+
+/*
* Allocate a zeroed cred structure.
*/
struct ucred *
@@ -1849,7 +1941,8 @@
struct ucred *cr;
cr = malloc(sizeof(*cr), M_CRED, M_WAITOK | M_ZERO);
- refcount_init(&cr->cr_ref, 1);
+ mtx_init(&cr->cr_mtx, "cred", NULL, MTX_DEF);
+ cr->cr_ref = 1;
#ifdef AUDIT
audit_cred_init(cr);
#endif
@@ -1868,8 +1961,18 @@
struct ucred *
crhold(struct ucred *cr)
{
+ struct thread *td;
- refcount_acquire(&cr->cr_ref);
+ td = curthread;
+ if (__predict_true(td->td_realucred == cr)) {
+ KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
+ __func__, cr->cr_users, cr));
+ td->td_ucredref++;
+ return (cr);
+ }
+ mtx_lock(&cr->cr_mtx);
+ cr->cr_ref++;
+ mtx_unlock(&cr->cr_mtx);
return (cr);
}
@@ -1879,36 +1982,51 @@
void
crfree(struct ucred *cr)
{
+ struct thread *td;
- KASSERT(cr->cr_ref > 0, ("bad ucred refcount: %d", cr->cr_ref));
- KASSERT(cr->cr_ref != 0xdeadc0de, ("dangling reference to ucred"));
- if (refcount_release(&cr->cr_ref)) {
- /*
- * Some callers of crget(), such as nfs_statfs(),
- * allocate a temporary credential, but don't
- * allocate a uidinfo structure.
- */
- if (cr->cr_uidinfo != NULL)
- uifree(cr->cr_uidinfo);
- if (cr->cr_ruidinfo != NULL)
- uifree(cr->cr_ruidinfo);
- /*
- * Free a prison, if any.
- */
- if (cr->cr_prison != NULL)
- prison_free(cr->cr_prison);
- if (cr->cr_loginclass != NULL)
- loginclass_free(cr->cr_loginclass);
+ td = curthread;
+ if (td->td_realucred == cr) {
+ KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
+ __func__, cr->cr_users, cr));
+ td->td_ucredref--;
+ return;
+ }
+ mtx_lock(&cr->cr_mtx);
+ KASSERT(cr->cr_users >= 0, ("%s: users %d not >= 0 on cred %p",
+ __func__, cr->cr_users, cr));
+ cr->cr_ref--;
+ if (cr->cr_users > 0) {
+ mtx_unlock(&cr->cr_mtx);
+ return;
+ }
+ KASSERT(cr->cr_ref >= 0, ("%s: ref %d not >= 0 on cred %p",
+ __func__, cr->cr_ref, cr));
+ if (cr->cr_ref > 0) {
+ mtx_unlock(&cr->cr_mtx);
+ return;
+ }
+ /*
+ * Some callers of crget(), such as nfs_statfs(), allocate a temporary
+ * credential, but don't allocate a uidinfo structure.
+ */
+ if (cr->cr_uidinfo != NULL)
+ uifree(cr->cr_uidinfo);
+ if (cr->cr_ruidinfo != NULL)
+ uifree(cr->cr_ruidinfo);
+ if (cr->cr_prison != NULL)
+ prison_free(cr->cr_prison);
+ if (cr->cr_loginclass != NULL)
+ loginclass_free(cr->cr_loginclass);
#ifdef AUDIT
- audit_cred_destroy(cr);
+ audit_cred_destroy(cr);
#endif
#ifdef MAC
- mac_cred_destroy(cr);
+ mac_cred_destroy(cr);
#endif
- if (cr->cr_groups != cr->cr_smallgroups)
- free(cr->cr_groups, M_CRED);
- free(cr, M_CRED);
- }
+ mtx_destroy(&cr->cr_mtx);
+ if (cr->cr_groups != cr->cr_smallgroups)
+ free(cr->cr_groups, M_CRED);
+ free(cr, M_CRED);
}
/*
@@ -1982,7 +2100,7 @@
proc_set_cred_init(struct proc *p, struct ucred *newcred)
{
- p->p_ucred = newcred;
+ p->p_ucred = crcowget(newcred);
}
/*
@@ -1998,10 +2116,20 @@
void
proc_set_cred(struct proc *p, struct ucred *newcred)
{
+ struct ucred *cr;
- MPASS(p->p_ucred != NULL);
+ cr = p->p_ucred;
+ MPASS(cr != NULL);
PROC_LOCK_ASSERT(p, MA_OWNED);
+ KASSERT(newcred->cr_users == 0, ("%s: users %d not 0 on cred %p",
+ __func__, newcred->cr_users, newcred));
+ mtx_lock(&cr->cr_mtx);
+ KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
+ __func__, cr->cr_users, cr));
+ cr->cr_users--;
+ mtx_unlock(&cr->cr_mtx);
p->p_ucred = newcred;
+ newcred->cr_users = 1;
PROC_UPDATE_COW(p);
}
@@ -2010,9 +2138,17 @@
{
struct ucred *cr;
- MPASS(p->p_state == PRS_ZOMBIE);
+ MPASS(p->p_state == PRS_ZOMBIE || p->p_state == PRS_NEW);
cr = p->p_ucred;
p->p_ucred = NULL;
+ KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
+ __func__, cr->cr_users, cr));
+ mtx_lock(&cr->cr_mtx);
+ cr->cr_users--;
+ if (cr->cr_users == 0)
+ KASSERT(cr->cr_ref > 0, ("%s: ref %d not > 0 on cred %p",
+ __func__, cr->cr_ref, cr));
+ mtx_unlock(&cr->cr_mtx);
crfree(cr);
}
Index: head/sys/kern/kern_thread.c
===================================================================
--- head/sys/kern/kern_thread.c
+++ head/sys/kern/kern_thread.c
@@ -82,9 +82,9 @@
"struct thread KBI td_flags");
_Static_assert(offsetof(struct thread, td_pflags) == 0x104,
"struct thread KBI td_pflags");
-_Static_assert(offsetof(struct thread, td_frame) == 0x498,
+_Static_assert(offsetof(struct thread, td_frame) == 0x4a8,
"struct thread KBI td_frame");
-_Static_assert(offsetof(struct thread, td_emuldata) == 0x6a0,
+_Static_assert(offsetof(struct thread, td_emuldata) == 0x6b0,
"struct thread KBI td_emuldata");
_Static_assert(offsetof(struct proc, p_flag) == 0xb0,
"struct proc KBI p_flag");
@@ -102,9 +102,9 @@
"struct thread KBI td_flags");
_Static_assert(offsetof(struct thread, td_pflags) == 0xa0,
"struct thread KBI td_pflags");
-_Static_assert(offsetof(struct thread, td_frame) == 0x2fc,
+_Static_assert(offsetof(struct thread, td_frame) == 0x304,
"struct thread KBI td_frame");
-_Static_assert(offsetof(struct thread, td_emuldata) == 0x340,
+_Static_assert(offsetof(struct thread, td_emuldata) == 0x348,
"struct thread KBI td_emuldata");
_Static_assert(offsetof(struct proc, p_flag) == 0x68,
"struct proc KBI p_flag");
@@ -463,7 +463,8 @@
{
PROC_LOCK_ASSERT(p, MA_OWNED);
- newtd->td_ucred = crhold(p->p_ucred);
+ newtd->td_realucred = crcowget(p->p_ucred);
+ newtd->td_ucred = newtd->td_realucred;
newtd->td_limit = lim_hold(p->p_limit);
newtd->td_cowgen = p->p_cowgen;
}
@@ -472,7 +473,9 @@
thread_cow_get(struct thread *newtd, struct thread *td)
{
- newtd->td_ucred = crhold(td->td_ucred);
+ MPASS(td->td_realucred == td->td_ucred);
+ newtd->td_realucred = crcowget(td->td_realucred);
+ newtd->td_ucred = newtd->td_realucred;
newtd->td_limit = lim_hold(td->td_limit);
newtd->td_cowgen = td->td_cowgen;
}
@@ -481,8 +484,8 @@
thread_cow_free(struct thread *td)
{
- if (td->td_ucred != NULL)
- crfree(td->td_ucred);
+ if (td->td_realucred != NULL)
+ crcowfree(td);
if (td->td_limit != NULL)
lim_free(td->td_limit);
}
@@ -495,13 +498,9 @@
struct plimit *oldlimit;
p = td->td_proc;
- oldcred = NULL;
oldlimit = NULL;
PROC_LOCK(p);
- if (td->td_ucred != p->p_ucred) {
- oldcred = td->td_ucred;
- td->td_ucred = crhold(p->p_ucred);
- }
+ oldcred = crcowsync();
if (td->td_limit != p->p_limit) {
oldlimit = td->td_limit;
td->td_limit = lim_hold(p->p_limit);
Index: head/sys/sys/proc.h
===================================================================
--- head/sys/sys/proc.h
+++ head/sys/sys/proc.h
@@ -267,7 +267,8 @@
struct lock_list_entry *td_sleeplocks; /* (k) Held sleep locks. */
int td_intr_nesting_level; /* (k) Interrupt recursion. */
int td_pinned; /* (k) Temporary cpu pin count. */
- struct ucred *td_ucred; /* (k) Reference to credentials. */
+ struct ucred *td_realucred; /* (k) Reference to credentials. */
+ struct ucred *td_ucred; /* (k) Used credentials, temporarily switchable. */
struct plimit *td_limit; /* (k) Resource limits. */
int td_slptick; /* (t) Time at sleep. */
int td_blktick; /* (t) Time spent blocked. */
@@ -305,6 +306,7 @@
int td_errno; /* (k) Error from last syscall. */
size_t td_vslock_sz; /* (k) amount of vslock-ed space */
struct kcov_info *td_kcov_info; /* (*) Kernel code coverage data */
+ u_int td_ucredref; /* (k) references on td_realucred */
#define td_endzero td_sigmask
/* Copied during fork1() or create_thread(). */
Index: head/sys/sys/ucred.h
===================================================================
--- head/sys/sys/ucred.h
+++ head/sys/sys/ucred.h
@@ -35,6 +35,10 @@
#ifndef _SYS_UCRED_H_
#define _SYS_UCRED_H_
+#if defined(_KERNEL) || defined(_WANT_UCRED)
+#include <sys/_lock.h>
+#include <sys/_mutex.h>
+#endif
#include <bsm/audit.h>
struct loginclass;
@@ -46,10 +50,19 @@
*
* Please do not inspect cr_uid directly to determine superuserness. The
* priv(9) interface should be used to check for privilege.
+ *
+ * Lock reference:
+ * c - cr_mtx
+ *
+ * Unmarked fields are constant after creation.
+ *
+ * See "Credential management" comment in kern_prot.c for more information.
*/
#if defined(_KERNEL) || defined(_WANT_UCRED)
struct ucred {
- u_int cr_ref; /* reference count */
+ struct mtx cr_mtx;
+ u_int cr_ref; /* (c) reference count */
+ u_int cr_users; /* (c) proc + thread using this cred */
#define cr_startcopy cr_uid
uid_t cr_uid; /* effective user id */
uid_t cr_ruid; /* real user id */
@@ -115,8 +128,11 @@
void proc_set_cred(struct proc *p, struct ucred *cr);
void proc_unset_cred(struct proc *p);
void crfree(struct ucred *cr);
+struct ucred *crcowsync(void);
struct ucred *crget(void);
struct ucred *crhold(struct ucred *cr);
+struct ucred *crcowget(struct ucred *cr);
+void crcowfree(struct thread *td);
void cru2x(struct ucred *cr, struct xucred *xcr);
void cru2xt(struct thread *td, struct xucred *xcr);
void crsetgroups(struct ucred *cr, int n, gid_t *groups);

File Metadata

Mime Type
text/plain
Expires
Wed, Feb 5, 4:45 PM (22 h, 10 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16477361
Default Alt Text
D24007.diff (12 KB)

Event Timeline