proposed smp_rendezvous change
John Baldwin
jhb at freebsd.org
Tue May 17 20:35:19 UTC 2011
On Tuesday, May 17, 2011 3:16:45 pm Max Laier wrote:
> On 05/17/2011 09:56 AM, John Baldwin wrote:
> > On Tuesday, May 17, 2011 12:20:40 pm Max Laier wrote:
> >> On 05/17/2011 05:16 AM, John Baldwin wrote:
> >> ...
> >>> Index: kern/kern_switch.c
> >>> ===================================================================
> >>> --- kern/kern_switch.c (revision 221536)
> >>> +++ kern/kern_switch.c (working copy)
> >>> @@ -192,15 +192,22 @@
> >>> critical_exit(void)
> >>> {
> >>> struct thread *td;
> >>> - int flags;
> >>> + int flags, owepreempt;
> >>>
> >>> td = curthread;
> >>> KASSERT(td->td_critnest != 0,
> >>> ("critical_exit: td_critnest == 0"));
> >>>
> >>> if (td->td_critnest == 1) {
> >>> + owepreempt = td->td_owepreempt;
> >>> + td->td_owepreempt = 0;
> >>> + /*
> >>> + * XXX: Should move compiler_memory_barrier() from
> >>> + * rmlock to a header.
> >>> + */
> >>
> >> XXX: If we get an interrupt at this point and td_owepreempt was zero,
> >> the new interrupt will re-set it, because td_critnest is still non-zero.
> >>
> >> So we still end up with a thread that is leaking an owepreempt *and*
> >> lose a preemption.
> >
> > I don't see how this can still leak owepreempt. The nested interrupt
should
> > do nothing (except for possibly set owepreempt) until td_critnest is 0.
>
> Exactly. The interrupt sets owepreempt and after we return here, we set
> td_critnest to 0 and exit without clearing owepreempt. Hence we leak
> the owepreempt.
>
> > However, we can certainly lose preemptions.
> >
> > I wonder if we can abuse the high bit of td_critnest for the owepreempt
flag
> > so it is all stored in one cookie. We only set owepreempt while holding
> > thread_lock() (so interrupts are disabled), so I think we would be ok and
not
> > need atomic ops.
> >
> > Hmm, actually, the top-half code would have to use atomic ops. Nuts. Let
me
> > think some more.
>
> I think these two really belong into one single variable. Setting the
> owepreempt flag can be a normal RMW. Increasing and decreasing critnest
> must be atomic (otherwise we could lose the flag) and dropping the final
> reference would work like this:
>
> if ((curthread->td_critnest & TD_CRITNEST_MASK) == 1) {
> unsigned int owe;
> owe = atomic_readandclear(&curthread->td_critnest);
> if (owe & TD_OWEPREEMPT_FLAG) {
> /* do the switch */
> }
>
> That should do it ... I can put that into a patch, if we agree that's
> the right thing to do.
Yeah, I already have a patch to do that, but hadn't added atomic ops to
critical_enter() and critical_exit(). But it also wasn't as fancy in the
critical_exit() case. Here is what I have and I think it might actually
be ok (it doesn't use an atomic read and clear, but I think it is safe).
Hmm, actually, it will need to use the read and clear:
Index: kern/sched_ule.c
===================================================================
--- kern/sched_ule.c (revision 222024)
+++ kern/sched_ule.c (working copy)
@@ -1785,7 +1785,6 @@
td->td_oncpu = NOCPU;
if (!(flags & SW_PREEMPT))
td->td_flags &= ~TDF_NEEDRESCHED;
- td->td_owepreempt = 0;
tdq->tdq_switchcnt++;
/*
* The lock pointer in an idle thread should never change. Reset it
@@ -2066,7 +2065,7 @@
flags = SW_INVOL | SW_PREEMPT;
if (td->td_critnest > 1)
- td->td_owepreempt = 1;
+ td->td_critnest |= TD_OWEPREEMPT;
else if (TD_IS_IDLETHREAD(td))
mi_switch(flags | SWT_REMOTEWAKEIDLE, NULL);
else
@@ -2261,7 +2260,7 @@
return;
if (!sched_shouldpreempt(pri, cpri, 0))
return;
- ctd->td_owepreempt = 1;
+ ctd->td_critnest |= TD_OWEPREEMPT;
}
/*
Index: kern/kern_switch.c
===================================================================
--- kern/kern_switch.c (revision 222024)
+++ kern/kern_switch.c (working copy)
@@ -183,7 +183,7 @@
struct thread *td;
td = curthread;
- td->td_critnest++;
+ atomic_add_int(&td->td_critnest, 1);
CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td,
(long)td->td_proc->p_pid, td->td_name, td->td_critnest);
}
@@ -192,18 +192,16 @@
critical_exit(void)
{
struct thread *td;
- int flags;
+ int flags, owepreempt;
td = curthread;
KASSERT(td->td_critnest != 0,
("critical_exit: td_critnest == 0"));
- if (td->td_critnest == 1) {
- td->td_critnest = 0;
- if (td->td_owepreempt) {
- td->td_critnest = 1;
+ if (td->td_critnest && ~TD_OWEPREEMPT == 1) {
+ owepreempt = atomic_readandclear_int(&td->td_owepreempt);
+ if (owepreempt & TD_OWEPREEMPT) {
thread_lock(td);
- td->td_critnest--;
flags = SW_INVOL | SW_PREEMPT;
if (TD_IS_IDLETHREAD(td))
flags |= SWT_IDLE;
@@ -213,7 +211,7 @@
thread_unlock(td);
}
} else
- td->td_critnest--;
+ atomic_subtract_int(&td->td_critnest, 1);
CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td,
(long)td->td_proc->p_pid, td->td_name, td->td_critnest);
Index: kern/kern_synch.c
===================================================================
--- kern/kern_synch.c (revision 222024)
+++ kern/kern_synch.c (working copy)
@@ -400,9 +400,7 @@
if (!TD_ON_LOCK(td) && !TD_IS_RUNNING(td))
mtx_assert(&Giant, MA_NOTOWNED);
#endif
- KASSERT(td->td_critnest == 1 || (td->td_critnest == 2 &&
- (td->td_owepreempt) && (flags & SW_INVOL) != 0 &&
- newtd == NULL) || panicstr,
+ KASSERT(td->td_critnest == 1 || panicstr,
("mi_switch: switch in a critical section"));
KASSERT((flags & (SW_INVOL | SW_VOL)) != 0,
("mi_switch: switch must be voluntary or involuntary"));
Index: kern/sched_4bsd.c
===================================================================
--- kern/sched_4bsd.c (revision 222024)
+++ kern/sched_4bsd.c (working copy)
@@ -335,7 +335,7 @@
if (ctd->td_critnest > 1) {
CTR1(KTR_PROC, "maybe_preempt: in critical section %d",
ctd->td_critnest);
- ctd->td_owepreempt = 1;
+ ctd->td_critnest |= TD_OWEPREEMPT;
return (0);
}
/*
@@ -949,7 +949,6 @@
td->td_lastcpu = td->td_oncpu;
if (!(flags & SW_PREEMPT))
td->td_flags &= ~TDF_NEEDRESCHED;
- td->td_owepreempt = 0;
td->td_oncpu = NOCPU;
/*
@@ -1437,7 +1436,7 @@
{
thread_lock(td);
if (td->td_critnest > 1)
- td->td_owepreempt = 1;
+ td->td_critnest |= TD_OWEPREEMPT;
else
mi_switch(SW_INVOL | SW_PREEMPT | SWT_PREEMPT, NULL);
thread_unlock(td);
Index: kern/kern_rmlock.c
===================================================================
--- kern/kern_rmlock.c (revision 222024)
+++ kern/kern_rmlock.c (working copy)
@@ -366,7 +366,8 @@
* Fast path to combine two common conditions into a single
* conditional jump.
*/
- if (0 == (td->td_owepreempt | (rm->rm_writecpus & pc->pc_cpumask)))
+ if (0 == ((td->td_critnest & TD_OWEPREEMPT) |
+ (rm->rm_writecpus & pc->pc_cpumask)))
return (1);
/* We do not have a read token and need to acquire one. */
@@ -377,7 +378,7 @@
_rm_unlock_hard(struct thread *td,struct rm_priotracker *tracker)
{
- if (td->td_owepreempt) {
+ if (td->td_critnest & TD_OWEPREEMPT) {
td->td_critnest++;
critical_exit();
}
@@ -418,7 +419,7 @@
td->td_critnest--;
sched_unpin();
- if (0 == (td->td_owepreempt | tracker->rmp_flags))
+ if (0 == ((td->td_critnest & TD_OWEPREEMPT) | tracker->rmp_flags))
return;
_rm_unlock_hard(td, tracker);
Index: sys/proc.h
===================================================================
--- sys/proc.h (revision 222024)
+++ sys/proc.h (working copy)
@@ -228,7 +228,6 @@
const char *td_wmesg; /* (t) Reason for sleep. */
u_char td_lastcpu; /* (t) Last cpu we were on. */
u_char td_oncpu; /* (t) Which cpu we are on. */
- volatile u_char td_owepreempt; /* (k*) Preempt on last critical_exit */
u_char td_tsqueue; /* (t) Turnstile queue blocked on. */
short td_locks; /* (k) Count of non-spin locks. */
short td_rw_rlocks; /* (k) Count of rwlock read locks. */
@@ -465,6 +464,12 @@
#define TD_SET_CAN_RUN(td) (td)->td_state = TDS_CAN_RUN
/*
+ * The high bit of td_critnest is used to determine if a preemption is
+ * pending.
+ */
+#define TD_OWEPREEMPT ((u_int)INT_MAX + 1)
+
+/*
* Process structure.
*/
struct proc {
--
John Baldwin
More information about the freebsd-current
mailing list