cvs commit: src/sys/kern kern_proc.c
Don Lewis
truckman at FreeBSD.org
Sun Aug 15 17:51:56 PDT 2004
On 14 Aug, Julian Elischer wrote:
> Robert Watson wrote:
>> rwatson 2004-08-14 17:15:16 UTC
>>
>> FreeBSD src repository
>>
>> Modified files:
>> sys/kern kern_proc.c
>> Log:
>> Cause pfind() not to return processes in the PRS_NEW state. As a result,
>> threads consuming the result of pfind() will not need to check for a NULL
>> credential pointer or other signs of an incompletely created process.
>> However, this also means that pfind() cannot be used to test for the
>> existence or find such a process. Annotate pfind() to indicate that this
>> is the case. A review of curent consumers seems to indicate that this is
>> not a problem for any of them. This closes a number of race conditions
>> that could result in NULL pointer dereferences and related failure modes.
>> Other related races continue to exist, especially during iteration of the
>> allproc list without due caution.
>
> possibly part of the answer would be to not put the proc on any queues until it
> is more set up..
That's been my opinion for quite a while.
I spent some time this morning rototilling fork1() to move pid selection
and adding the child process to the allproc and other lists to one
location in the code. I also gathered all the scheduler stuff in one
spot.
I think there are more things that should be done, such as moving the
block of code projected by Giant that beings with vm_forkproc() to a
spot above where we grab proctree_lock the final time. I think the
_PHOLD() and _PRELE() calls could be moved with it. Maybe also move the
EVENTHANDLER_INVOKE() call a bit later, near the KNOTE_LOCKED() call.
This patch has survived a "make -j10 buildworld" and building some ports
on a UP machine. Unfortunately this is not a good time for making
extensive changes to the fork code.
Index: sys/kern/kern_fork.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_fork.c,v
retrieving revision 1.234
diff -u -r1.234 kern_fork.c
--- sys/kern/kern_fork.c 15 Aug 2004 06:24:41 -0000 1.234
+++ sys/kern/kern_fork.c 15 Aug 2004 21:17:15 -0000
@@ -194,9 +194,8 @@
int pages;
struct proc **procp;
{
- struct proc *p1, *p2, *pptr;
+ struct proc *p1, *p2, *p3, *pptr;
uid_t uid;
- struct proc *newproc;
int ok, trypid;
static int curfail, pidchecked = 0;
static struct timeval lastfail;
@@ -283,14 +282,11 @@
}
/* Allocate new proc. */
- newproc = uma_zalloc(proc_zone, M_WAITOK);
+ p2 = uma_zalloc(proc_zone, M_WAITOK);
#ifdef MAC
- mac_init_proc(newproc);
+ mac_init_proc(p2);
#endif
- knlist_init(&newproc->p_klist, &newproc->p_mtx);
-
- /* We have to lock the process tree while we look for a pid. */
- sx_slock(&proctree_lock);
+ knlist_init(&p2->p_klist, &p2->p_mtx);
/*
* Although process entries are dynamically created, we still keep
@@ -326,92 +322,6 @@
* are hard-limits as to the number of processes that can run.
*/
nprocs++;
-
- /*
- * Find an unused process ID. We remember a range of unused IDs
- * ready to use (from lastpid+1 through pidchecked-1).
- *
- * If RFHIGHPID is set (used during system boot), do not allocate
- * low-numbered pids.
- */
- trypid = lastpid + 1;
- if (flags & RFHIGHPID) {
- if (trypid < 10)
- trypid = 10;
- } else {
- if (randompid)
- trypid += arc4random() % randompid;
- }
-retry:
- /*
- * If the process ID prototype has wrapped around,
- * restart somewhat above 0, as the low-numbered procs
- * tend to include daemons that don't exit.
- */
- if (trypid >= PID_MAX) {
- trypid = trypid % PID_MAX;
- if (trypid < 100)
- trypid += 100;
- pidchecked = 0;
- }
- if (trypid >= pidchecked) {
- int doingzomb = 0;
-
- pidchecked = PID_MAX;
- /*
- * Scan the active and zombie procs to check whether this pid
- * is in use. Remember the lowest pid that's greater
- * than trypid, so we can avoid checking for a while.
- */
- p2 = LIST_FIRST(&allproc);
-again:
- for (; p2 != NULL; p2 = LIST_NEXT(p2, p_list)) {
- PROC_LOCK(p2);
- while (p2->p_pid == trypid ||
- (p2->p_pgrp != NULL &&
- (p2->p_pgrp->pg_id == trypid ||
- (p2->p_session != NULL &&
- p2->p_session->s_sid == trypid)))) {
- trypid++;
- if (trypid >= pidchecked) {
- PROC_UNLOCK(p2);
- goto retry;
- }
- }
- if (p2->p_pid > trypid && pidchecked > p2->p_pid)
- pidchecked = p2->p_pid;
- if (p2->p_pgrp != NULL) {
- if (p2->p_pgrp->pg_id > trypid &&
- pidchecked > p2->p_pgrp->pg_id)
- pidchecked = p2->p_pgrp->pg_id;
- if (p2->p_session != NULL &&
- p2->p_session->s_sid > trypid &&
- pidchecked > p2->p_session->s_sid)
- pidchecked = p2->p_session->s_sid;
- }
- PROC_UNLOCK(p2);
- }
- if (!doingzomb) {
- doingzomb = 1;
- p2 = LIST_FIRST(&zombproc);
- goto again;
- }
- }
- sx_sunlock(&proctree_lock);
-
- /*
- * RFHIGHPID does not mess with the lastpid counter during boot.
- */
- if (flags & RFHIGHPID)
- pidchecked = 0;
- else
- lastpid = trypid;
-
- p2 = newproc;
- p2->p_state = PRS_NEW; /* protect against others */
- p2->p_pid = trypid;
- LIST_INSERT_HEAD(&allproc, p2, p_list);
- LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
sx_xunlock(&allproc_lock);
/*
@@ -511,15 +421,7 @@
p2->p_flag = 0;
if (p1->p_flag & P_PROFIL)
startprofclock(p2);
- mtx_lock_spin(&sched_lock);
- p2->p_sflag = PS_INMEM;
- /*
- * Allow the scheduler to adjust the priority of the child and
- * parent while we hold the sched_lock.
- */
- sched_fork(td, p2);
- mtx_unlock_spin(&sched_lock);
p2->p_ucred = crhold(td->td_ucred);
td2->td_ucred = crhold(p2->p_ucred); /* XXXKSE */
@@ -551,42 +453,109 @@
if (p2->p_textvp)
vref(p2->p_textvp);
+ sx_xlock(&proctree_lock);
+ /* We have to lock the allproc list while we look for a pid. */
+ sx_xlock(&allproc_lock);
+
/*
- * Set up linkage for kernel based threading.
+ * Find an unused process ID. We remember a range of unused IDs
+ * ready to use (from lastpid+1 through pidchecked-1).
+ *
+ * If RFHIGHPID is set (used during system boot), do not allocate
+ * low-numbered pids.
*/
- if ((flags & RFTHREAD) != 0) {
- mtx_lock(&ppeers_lock);
- p2->p_peers = p1->p_peers;
- p1->p_peers = p2;
- p2->p_leader = p1->p_leader;
- mtx_unlock(&ppeers_lock);
- PROC_LOCK(p1->p_leader);
- if ((p1->p_leader->p_flag & P_WEXIT) != 0) {
- PROC_UNLOCK(p1->p_leader);
- /*
- * The task leader is exiting, so process p1 is
- * going to be killed shortly. Since p1 obviously
- * isn't dead yet, we know that the leader is either
- * sending SIGKILL's to all the processes in this
- * task or is sleeping waiting for all the peers to
- * exit. We let p1 complete the fork, but we need
- * to go ahead and kill the new process p2 since
- * the task leader may not get a chance to send
- * SIGKILL to it. We leave it on the list so that
- * the task leader will wait for this new process
- * to commit suicide.
- */
- PROC_LOCK(p2);
- psignal(p2, SIGKILL);
- PROC_UNLOCK(p2);
- } else
- PROC_UNLOCK(p1->p_leader);
+ trypid = lastpid + 1;
+ if (flags & RFHIGHPID) {
+ if (trypid < 10)
+ trypid = 10;
} else {
- p2->p_peers = NULL;
- p2->p_leader = p2;
+ if (randompid)
+ trypid += arc4random() % randompid;
+ }
+retry:
+ /*
+ * If the process ID prototype has wrapped around,
+ * restart somewhat above 0, as the low-numbered procs
+ * tend to include daemons that don't exit.
+ */
+ if (trypid >= PID_MAX) {
+ trypid = trypid % PID_MAX;
+ if (trypid < 100)
+ trypid += 100;
+ pidchecked = 0;
}
+ if (trypid >= pidchecked) {
+ int doingzomb = 0;
+
+ pidchecked = PID_MAX;
+ /*
+ * Scan the active and zombie procs to check whether this pid
+ * is in use. Remember the lowest pid that's greater
+ * than trypid, so we can avoid checking for a while.
+ */
+ p3 = LIST_FIRST(&allproc);
+again:
+ for (; p3 != NULL; p3 = LIST_NEXT(p3, p_list)) {
+ PROC_LOCK(p3);
+ while (p3->p_pid == trypid ||
+ (p3->p_pgrp != NULL &&
+ (p3->p_pgrp->pg_id == trypid ||
+ (p3->p_session != NULL &&
+ p3->p_session->s_sid == trypid)))) {
+ trypid++;
+ if (trypid >= pidchecked) {
+ PROC_UNLOCK(p3);
+ goto retry;
+ }
+ }
+ if (p3->p_pid > trypid && pidchecked > p3->p_pid)
+ pidchecked = p3->p_pid;
+ if (p3->p_pgrp != NULL) {
+ if (p3->p_pgrp->pg_id > trypid &&
+ pidchecked > p3->p_pgrp->pg_id)
+ pidchecked = p3->p_pgrp->pg_id;
+ if (p3->p_session != NULL &&
+ p3->p_session->s_sid > trypid &&
+ pidchecked > p3->p_session->s_sid)
+ pidchecked = p3->p_session->s_sid;
+ }
+ PROC_UNLOCK(p3);
+ }
+ if (!doingzomb) {
+ doingzomb = 1;
+ p3 = LIST_FIRST(&zombproc);
+ goto again;
+ }
+ }
+
+ /*
+ * RFHIGHPID does not mess with the lastpid counter during boot.
+ */
+ if (flags & RFHIGHPID)
+ pidchecked = 0;
+ else
+ lastpid = trypid;
+
+ p2->p_state = PRS_NEW; /* protect against others */
+ p2->p_pid = trypid;
+ LIST_INSERT_HEAD(&allproc, p2, p_list);
+ LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
+ sx_xunlock(&allproc_lock);
+
+ /*
+ * Attach the new process to its parent.
+ *
+ * If RFNOWAIT is set, the newly created process becomes a child
+ * of init. This effectively disassociates the child from the
+ * parent.
+ */
+ if (flags & RFNOWAIT)
+ pptr = initproc;
+ else
+ pptr = p1;
+ p2->p_pptr = pptr;
+ LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling);
- sx_xlock(&proctree_lock);
PGRP_LOCK(p1->p_pgrp);
PROC_LOCK(p2);
PROC_LOCK(p1);
@@ -645,20 +614,42 @@
_PHOLD(p1);
PROC_UNLOCK(p1);
+ sx_xunlock(&proctree_lock);
+
/*
- * Attach the new process to its parent.
- *
- * If RFNOWAIT is set, the newly created process becomes a child
- * of init. This effectively disassociates the child from the
- * parent.
+ * Set up linkage for kernel based threading.
*/
- if (flags & RFNOWAIT)
- pptr = initproc;
- else
- pptr = p1;
- p2->p_pptr = pptr;
- LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling);
- sx_xunlock(&proctree_lock);
+ if ((flags & RFTHREAD) != 0) {
+ mtx_lock(&ppeers_lock);
+ p2->p_peers = p1->p_peers;
+ p1->p_peers = p2;
+ p2->p_leader = p1->p_leader;
+ mtx_unlock(&ppeers_lock);
+ PROC_LOCK(p1->p_leader);
+ if ((p1->p_leader->p_flag & P_WEXIT) != 0) {
+ PROC_UNLOCK(p1->p_leader);
+ /*
+ * The task leader is exiting, so process p1 is
+ * going to be killed shortly. Since p1 obviously
+ * isn't dead yet, we know that the leader is either
+ * sending SIGKILL's to all the processes in this
+ * task or is sleeping waiting for all the peers to
+ * exit. We let p1 complete the fork, but we need
+ * to go ahead and kill the new process p2 since
+ * the task leader may not get a chance to send
+ * SIGKILL to it. We leave it on the list so that
+ * the task leader will wait for this new process
+ * to commit suicide.
+ */
+ PROC_LOCK(p2);
+ psignal(p2, SIGKILL);
+ PROC_UNLOCK(p2);
+ } else
+ PROC_UNLOCK(p1->p_leader);
+ } else {
+ p2->p_peers = NULL;
+ p2->p_leader = p2;
+ }
/* Inform accounting that we have forked. */
p2->p_acflag = AFORK;
@@ -700,10 +691,18 @@
/*
* Set the child start time and mark the process as being complete.
*/
+ PROC_LOCK(p2);
+ PROC_LOCK(p1);
microuptime(&p2->p_stats->p_start);
mtx_lock_spin(&sched_lock);
+ p2->p_sflag = PS_INMEM;
p2->p_state = PRS_NORMAL;
-
+ /*
+ * Allow the scheduler to adjust the priority of the child and
+ * parent while we hold the sched_lock.
+ */
+ sched_fork(td, p2);
+ PROC_UNLOCK(p2);
/*
* If RFSTOPPED not requested, make child runnable and add to
* run queue.
@@ -717,7 +716,6 @@
/*
* Now can be swapped.
*/
- PROC_LOCK(p1);
_PRELE(p1);
/*
@@ -752,15 +750,14 @@
*procp = p2;
return (0);
fail:
- sx_sunlock(&proctree_lock);
if (ppsratecheck(&lastfail, &curfail, 1))
printf("maxproc limit exceeded by uid %i, please see tuning(7) and login.conf(5).\n",
uid);
sx_xunlock(&allproc_lock);
#ifdef MAC
- mac_destroy_proc(newproc);
+ mac_destroy_proc(p2);
#endif
- uma_zfree(proc_zone, newproc);
+ uma_zfree(proc_zone, p2);
if (p1->p_flag & P_SA) {
PROC_LOCK(p1);
thread_single_end();
More information about the cvs-src
mailing list