Call for testers: New PID allocator patch for -CURRENT
Tim Robbins
tjr at freebsd.org
Thu Jan 29 17:43:48 PST 2004
On Thu, Jan 29, 2004 at 12:04:42PM -0800, David Schultz wrote:
> On Thu, Jan 29, 2004, Xin LI wrote:
> > Greetings, everyone
> >
> > A friend of mine has ported NetBSD's new PID allocation[1] code to FreeBSD,
> > you can obtain the patch here:
> >
> > http://www.arbornet.org/~junsu/pid.diff
> >
> > Some of you may be already aware of this for he has posted[2] this to both
> > current@ and hackers@ last September.
> >
> > Currently the patchset has been updated to make it applicable to -HEAD.
> >
> > A number of problems were found and fixed after the first post with many
> > from the FreeBSD community, and we think it's time to post it again and,
> > if you are interested, please give it a chance to run on your own (test)
> > box.
>
> Thanks for the reminder. Your patch uses a very clever idea. I
> messed around with the original patch in your PR a few months ago.
> It would take more work to prove that your proposed patch improves
> performance[1] and is a good approach, and to review and clean up
> the implementation.
[...]
> [1] That shouldn't be hard, given that the present algorithm takes
> O(N) amortized time in the worst case, and can examine as many
> as PID_MAX^2 pids if the number of processes in the system is
> close to PID_MAX.
[...]
In my testing, the current pid allocator turned out to be more efficient
than I had expected. Even after reducing PID_MAX to 10,000 and
fragmenting the pid-space by having sleeping processes at every N'th
pid, it didn't run much slower than the hash-based alternative I was
testing it against.
This is the hash-based pid allocator, if anyone's interested:
Change 43361 by tjr at tjr_dev2 on 2003/12/03 01:30:24
Improve scalability of process ID allocation:
- Use hashtables to check whether a pid is in used to avoid traversing
the entire allproc and zombproc lists and acquiring each item's proc
lock in fork1().
- Add a hashtable for session IDs, sesshashtbl, similar to pidhashtbl
and pgrphashtbl.
- Keep zombies on pidhash chains. Weed them out in pfind(). Rewrite
zpfind() to use pidhash.
PID allocation now scales as a function of the number of used pids
between lastpid+1 and the first free one, instead of the number of
processes in the system. This new allocator is expected to be slightly
slower than the existing one's best-case performance, but should
easily outperform it when the PID space becomes fragmented.
Affected files ...
... //depot/user/tjr/freebsd-tjr/src/sys/kern/kern_exit.c#11 edit
... //depot/user/tjr/freebsd-tjr/src/sys/kern/kern_fork.c#14 edit
... //depot/user/tjr/freebsd-tjr/src/sys/kern/kern_proc.c#21 edit
... //depot/user/tjr/freebsd-tjr/src/sys/sys/proc.h#27 edit
Differences ...
==== //depot/user/tjr/freebsd-tjr/src/sys/kern/kern_exit.c#11 (text+ko) ====
@@ -391,13 +391,12 @@
crfree(td->td_ucred);
/*
- * Remove proc from allproc queue and pidhash chain.
+ * Remove proc from allproc queue.
* Place onto zombproc. Unlink from parent's child list.
*/
sx_xlock(&allproc_lock);
LIST_REMOVE(p, p_list);
LIST_INSERT_HEAD(&zombproc, p, p_list);
- LIST_REMOVE(p, p_hash);
sx_xunlock(&allproc_lock);
sx_xlock(&proctree_lock);
@@ -653,6 +652,7 @@
*/
sx_xlock(&allproc_lock);
LIST_REMOVE(p, p_list); /* off zombproc */
+ LIST_REMOVE(p, p_hash); /* off pidhash chain */
sx_xunlock(&allproc_lock);
LIST_REMOVE(p, p_sibling);
leavepgrp(p);
==== //depot/user/tjr/freebsd-tjr/src/sys/kern/kern_fork.c#14 (text+ko) ====
@@ -158,9 +158,7 @@
* Random component to lastpid generation. We mix in a random factor to make
* it a little harder to predict. We sanity check the modulus value to avoid
* doing it in critical paths. Don't let it be too small or we pointlessly
- * waste randomness entropy, and don't let it be impossibly large. Using a
- * modulus that is too big causes a LOT more process table scans and slows
- * down fork processing as the pidchecked caching is defeated.
+ * waste randomness entropy, and don't let it be impossibly large.
*/
static int randompid = 0;
@@ -199,8 +197,8 @@
struct proc *p1, *p2, *pptr;
uid_t uid;
struct proc *newproc;
- int ok, trypid;
- static int curfail, pidchecked = 0;
+ int ok, trypid, wrapped;
+ static int curfail;
static struct timeval lastfail;
struct filedesc *fd;
struct filedesc_to_leader *fdtol;
@@ -208,6 +206,9 @@
struct kse *ke2;
struct ksegrp *kg2;
struct sigacts *newsigacts;
+ struct proc *pi;
+ struct pgrp *pgi;
+ struct session *si;
int error;
/* Can't copy and clear. */
@@ -323,8 +324,7 @@
nprocs++;
/*
- * Find an unused process ID. We remember a range of unused IDs
- * ready to use (from lastpid+1 through pidchecked-1).
+ * Find an unused process ID.
*
* If RFHIGHPID is set (used during system boot), do not allocate
* low-numbered pids.
@@ -337,63 +337,44 @@
if (randompid)
trypid += arc4random() % randompid;
}
-retry:
- /*
- * If the process ID prototype has wrapped around,
- * restart somewhat above 0, as the low-numbered procs
- * tend to include daemons that don't exit.
- */
- if (trypid >= PID_MAX) {
- trypid = trypid % PID_MAX;
- if (trypid < 100)
- trypid += 100;
- pidchecked = 0;
- }
- if (trypid >= pidchecked) {
- int doingzomb = 0;
-
- pidchecked = PID_MAX;
+ wrapped = 0;
+ while (!wrapped || trypid < lastpid + 1) {
+ /*
+ * If the process ID prototype has wrapped around,
+ * restart somewhat above 0, as the low-numbered procs
+ * tend to include daemons that don't exit.
+ */
+ if (trypid >= PID_MAX) {
+ trypid = trypid % PID_MAX;
+ if (trypid < 100)
+ trypid += 100;
+ wrapped = 1;
+ }
/*
- * Scan the active and zombie procs to check whether this pid
- * is in use. Remember the lowest pid that's greater
- * than trypid, so we can avoid checking for a while.
+ * Check that the prospective ID hasn't already been used.
+ * Use the hash tables to avoid scanning allproc.
*/
- p2 = LIST_FIRST(&allproc);
-again:
- for (; p2 != NULL; p2 = LIST_NEXT(p2, p_list)) {
- PROC_LOCK(p2);
- while (p2->p_pid == trypid ||
- p2->p_pgrp->pg_id == trypid ||
- p2->p_session->s_sid == trypid) {
- trypid++;
- if (trypid >= pidchecked) {
- PROC_UNLOCK(p2);
- goto retry;
- }
- }
- if (p2->p_pid > trypid && pidchecked > p2->p_pid)
- pidchecked = p2->p_pid;
- if (p2->p_pgrp->pg_id > trypid &&
- pidchecked > p2->p_pgrp->pg_id)
- pidchecked = p2->p_pgrp->pg_id;
- if (p2->p_session->s_sid > trypid &&
- pidchecked > p2->p_session->s_sid)
- pidchecked = p2->p_session->s_sid;
- PROC_UNLOCK(p2);
+ LIST_FOREACH(pi, PIDHASH(trypid), p_hash) {
+ if (pi->p_pid == trypid)
+ goto trynext;
+ }
+ LIST_FOREACH(pgi, PGRPHASH(trypid), pg_hash) {
+ if (pgi->pg_id == trypid)
+ goto trynext;
}
- if (!doingzomb) {
- doingzomb = 1;
- p2 = LIST_FIRST(&zombproc);
- goto again;
+ LIST_FOREACH(si, SESSHASH(trypid), s_hash) {
+ if (si->s_sid == trypid)
+ goto trynext;
}
+ break;
+trynext:
+ trypid++;
}
/*
* RFHIGHPID does not mess with the lastpid counter during boot.
*/
- if (flags & RFHIGHPID)
- pidchecked = 0;
- else
+ if (!(flags & RFHIGHPID))
lastpid = trypid;
p2 = newproc;
==== //depot/user/tjr/freebsd-tjr/src/sys/kern/kern_proc.c#21 (text+ko) ====
@@ -90,6 +90,8 @@
u_long pidhash;
struct pgrphashhead *pgrphashtbl;
u_long pgrphash;
+struct sesshashhead *sesshashtbl;
+u_long sesshash;
struct proclist allproc;
struct proclist zombproc;
struct sx allproc_lock;
@@ -123,6 +125,7 @@
LIST_INIT(&zombproc);
pidhashtbl = hashinit(maxproc / 4, M_PROC, &pidhash);
pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash);
+ sesshashtbl = hashinit(maxproc / 4, M_PROC, &sesshash);
proc_zone = uma_zcreate("PROC", sched_sizeof_proc(),
proc_ctor, proc_dtor, proc_init, proc_fini,
UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
@@ -254,7 +257,7 @@
sx_slock(&allproc_lock);
LIST_FOREACH(p, PIDHASH(pid), p_hash)
- if (p->p_pid == pid) {
+ if (p->p_pid == pid && p->p_state != PRS_ZOMBIE) {
PROC_LOCK(p);
break;
}
@@ -328,6 +331,7 @@
sess->s_ttyp = NULL;
bcopy(p->p_session->s_login, sess->s_login,
sizeof(sess->s_login));
+ LIST_INSERT_HEAD(SESSHASH(sess->s_sid), sess, s_hash);
pgrp->pg_session = sess;
KASSERT(p == curproc,
("enterpgrp: mksession and p != curproc"));
@@ -473,8 +477,9 @@
SESS_UNLOCK(savesess);
PGRP_UNLOCK(pgrp);
if (savesess->s_count == 0) {
+ LIST_REMOVE(savesess, s_hash);
mtx_destroy(&savesess->s_mtx);
- FREE(pgrp->pg_session, M_SESSION);
+ FREE(savesess, M_SESSION);
}
mtx_destroy(&pgrp->pg_mtx);
FREE(pgrp, M_PGRP);
@@ -829,8 +834,8 @@
struct proc *p;
sx_slock(&allproc_lock);
- LIST_FOREACH(p, &zombproc, p_list)
- if (p->p_pid == pid) {
+ LIST_FOREACH(p, PIDHASH(pid), p_hash)
+ if (p->p_pid == pid && p->p_state == PRS_ZOMBIE) {
PROC_LOCK(p);
break;
}
==== //depot/user/tjr/freebsd-tjr/src/sys/sys/proc.h#27 (text+ko) ====
@@ -73,6 +73,7 @@
* (c) const until freeing
*/
struct session {
+ LIST_ENTRY(session) s_hash; /* (e) Hash chain. */
int s_count; /* (m) Ref cnt; pgrps in session. */
struct proc *s_leader; /* (m + e) Session leader. */
struct vnode *s_ttyvp; /* (m) Vnode of controlling tty. */
@@ -799,6 +800,10 @@
extern LIST_HEAD(pgrphashhead, pgrp) *pgrphashtbl;
extern u_long pgrphash;
+#define SESSHASH(sid) (&sesshashtbl[(sid) & sesshash])
+extern LIST_HEAD(sesshashhead, session) *sesshashtbl;
+extern u_long sesshash;
+
extern struct sx allproc_lock;
extern struct sx proctree_lock;
extern struct mtx pargs_ref_lock;
More information about the freebsd-hackers
mailing list