PERFORCE change 80196 for review

Thu Jul 14 19:36:27 GMT 2005

Ouch!

I had though that I had covered that, but maybe it got uncovered in
a later revision.

John Baldwin wrote:

>http://perforce.freebsd.org/chv.cgi?CH=80196
>
>Change 80196 by jhb at jhb_slimer on 2005/07/14 18:09:51
>
>	Try to close a race between wait() free'ing the vmspace out from
>	under the last thread that is still trying to exit.
>	
>	Reported by:   ps
>
>Affected files ...
>
>.. //depot/projects/smpng/sys/kern/kern_exit.c#97 edit
>
>Differences ...
>
>==== //depot/projects/smpng/sys/kern/kern_exit.c#97 (text+ko) ====
>
>@@ -487,6 +487,9 @@
> 	 */
> 	cpu_exit(td);
> 
>+	WITNESS_WARN(WARN_PANIC, &proctree_lock.sx_object,
>+	    "process (pid %d) exiting", p->p_pid);
>+
> 	PROC_LOCK(p);
> 	PROC_LOCK(p->p_pptr);
> 	sx_xunlock(&proctree_lock);
>@@ -495,20 +498,16 @@
> 	 * We have to wait until after acquiring all locks before
> 	 * changing p_state.  We need to avoid all possible context
> 	 * switches (including ones from blocking on a mutex) while
>-	 * marked as a zombie.
>+	 * marked as a zombie.  We also have to set the zombie state
>+	 * before we release the parent process' proc lock to avoid
>+	 * a lost wakeup.  So, we first call wakeup, then we grab the
>+	 * sched lock, update the state, and release the parent process'
>+	 * proc lock.
> 	 */
>+	wakeup(p->p_pptr);
> 	mtx_lock_spin(&sched_lock);
> 	p->p_state = PRS_ZOMBIE;
>-
>-	critical_enter();
>-	mtx_unlock_spin(&sched_lock);
>-	wakeup(p->p_pptr);
>-	
> 	PROC_UNLOCK(p->p_pptr);
>-	WITNESS_WARN(WARN_PANIC, &p->p_mtx.mtx_object,
>-	    "process (pid %d) exiting", p->p_pid);
>-	mtx_lock_spin(&sched_lock);
>-	critical_exit();
> 
> 	/* Do the same timestamp bookkeeping that mi_switch() would do. */
> 	binuptime(&new_switchtime);
>@@ -626,6 +625,20 @@
> 
> 		nfound++;
> 		if (p->p_state == PRS_ZOMBIE) {
>+
>+			/*
>+			 * It is possible that the last thread of this
>+			 * process is still running on another CPU
>+			 * in thread_exit() after having dropped the process
>+			 * lock via PROC_UNLOCK() but before it has completed
>+			 * cpu_throw().  In that case, the other thread must
>+			 * still hold sched_lock, so simply by acquiring
>+			 * sched_lock once we will wait long enough for the
>+			 * thread to exit in that case.
>+			 */
>+			mtx_lock_spin(&sched_lock);
>+			mtx_unlock_spin(&sched_lock);
>+			
> 			td->td_retval[0] = p->p_pid;
> 			if (status)
> 				*status = p->p_xstat;	/* convert to int */
>  
>