svn commit: r238907 - projects/calloutng/sys/kern
Davide Italiano
davide at freebsd.org
Mon Jul 30 14:17:22 UTC 2012
On Mon, Jul 30, 2012 at 4:02 PM, Attilio Rao <attilio at freebsd.org> wrote:
> On 7/30/12, Davide Italiano <davide at freebsd.org> wrote:
>> Author: davide
>> Date: Mon Jul 30 13:50:37 2012
>> New Revision: 238907
>> URL: http://svn.freebsd.org/changeset/base/238907
>>
>> Log:
>> - Fix a LOR deadlock dropping the callout lock while executing the
>> handler
>> directly from hardware interrupt context.
>>
>> - Add migration support for callouts which runs from hw interrupt
>> context.
>>
>> - Refactor a couple of comments to reflect the new world order.
>>
>> TODO: implement proper stats for direct callouts.
>>
>> Just a note: I'd like to thank flo@ for his invaluable help in testing
>> and
>> issues, mav@ that helped me in tackling the problem and Yahoo! which
>> provided access to the machines on which tests were run.
>>
>> Reported by: avg, flo [1]
>> Discused with: mav
>>
>> Modified:
>> projects/calloutng/sys/kern/kern_timeout.c
>>
>> Modified: projects/calloutng/sys/kern/kern_timeout.c
>> ==============================================================================
>> --- projects/calloutng/sys/kern/kern_timeout.c Mon Jul 30 12:25:20
>> 2012 (r238906)
>> +++ projects/calloutng/sys/kern/kern_timeout.c Mon Jul 30 13:50:37
>> 2012 (r238907)
>> @@ -91,45 +91,62 @@ SYSCTL_INT(_debug, OID_AUTO, to_avg_mpca
>> int callwheelsize, callwheelmask;
>>
>> /*
>> - * The callout cpu migration entity represents informations necessary for
>> - * describing the migrating callout to the new callout cpu.
>> + * The callout cpu exec entities represent informations necessary for
>> + * describing the state of callouts currently running on the CPU and the
>> ones
>> + * necessary for migrating callouts to the new callout cpu. In particular,
>> + * the first entry of the array cc_exec_entity holds informations for
>> callout
>> + * running in SWI thread context, while the second one holds informations
>> + * for callout running directly from hardware interrupt context.
>> * The cached informations are very important for deferring migration when
>> * the migrating callout is already running.
>> */
>> -struct cc_mig_ent {
>> +struct cc_exec {
>> + struct callout *cc_next;
>> + struct callout *cc_curr;
>> #ifdef SMP
>> void (*ce_migration_func)(void *);
>> void *ce_migration_arg;
>> int ce_migration_cpu;
>> struct bintime ce_migration_time;
>> #endif
>> + int cc_cancel;
>> + int cc_waiting;
>> };
>>
>> /*
>> - * There is one struct callout_cpu per cpu, holding all relevant
>> + * There is one struct callou_cpu per cpu, holding all relevant
>> * state for the callout processing thread on the individual CPU.
>> */
>> struct callout_cpu {
>> - struct cc_mig_ent cc_migrating_entity;
>> + struct cc_exec cc_exec_entity[2];
>> struct mtx cc_lock;
>> struct callout *cc_callout;
>> struct callout_tailq *cc_callwheel;
>> struct callout_tailq cc_expireq;
>> struct callout_list cc_callfree;
>> - struct callout *cc_next;
>> - struct callout *cc_curr;
>> struct bintime cc_firstevent;
>> struct bintime cc_lastscan;
>> void *cc_cookie;
>> - int cc_cancel;
>> - int cc_waiting;
>> };
>>
>> +#define cc_exec_curr cc_exec_entity[0].cc_curr
>> +#define cc_exec_next cc_exec_entity[0].cc_next
>> +#define cc_exec_cancel cc_exec_entity[0].cc_cancel
>> +#define cc_exec_waiting cc_exec_entity[0].cc_waiting
>> +#define cc_exec_curr_dir cc_exec_entity[1].cc_curr
>> +#define cc_exec_next_dir cc_exec_entity[1].cc_next
>> +#define cc_exec_cancel_dir cc_exec_entity[1].cc_cancel
>> +#define cc_exec_waiting_dir cc_exec_entity[1].cc_waiting
>> +
>> #ifdef SMP
>> -#define cc_migration_func cc_migrating_entity.ce_migration_func
>> -#define cc_migration_arg cc_migrating_entity.ce_migration_arg
>> -#define cc_migration_cpu cc_migrating_entity.ce_migration_cpu
>> -#define cc_migration_time cc_migrating_entity.ce_migration_time
>> +#define cc_migration_func cc_exec_entity[0].ce_migration_func
>> +#define cc_migration_arg cc_exec_entity[0].ce_migration_arg
>> +#define cc_migration_cpu cc_exec_entity[0].ce_migration_cpu
>> +#define cc_migration_time cc_exec_entity[0].ce_migration_time
>> +#define cc_migration_func_dir cc_exec_entity[1].ce_migration_func
>> +#define cc_migration_arg_dir cc_exec_entity[1].ce_migration_arg
>> +#define cc_migration_cpu_dir cc_exec_entity[1].ce_migration_cpu
>> +#define cc_migration_time_dir cc_exec_entity[1].ce_migration_time
>>
>> struct callout_cpu cc_cpu[MAXCPU];
>> #define CPUBLOCK MAXCPU
>> @@ -156,6 +173,9 @@ struct callout_cpu cc_cpu;
>>
>> static int timeout_cpu;
>> void (*callout_new_inserted)(int cpu, struct bintime bt) = NULL;
>> +static struct callout *
>> +softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls,
>> + int *lockcalls, int *gcalls, int direct);
>>
>> static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures");
>>
>> @@ -180,15 +200,18 @@ static MALLOC_DEFINE(M_CALLOUT, "callout
>> * Resets the migration entity tied to a specific callout cpu.
>> */
>> static void
>> -cc_cme_cleanup(struct callout_cpu *cc)
>> +cc_cme_cleanup(struct callout_cpu *cc, int direct)
>> {
>> -
>> +
>> + cc->cc_exec_entity[direct].cc_curr = NULL;
>> + cc->cc_exec_entity[direct].cc_next = NULL;
>> + cc->cc_exec_entity[direct].cc_cancel = 0;
>> + cc->cc_exec_entity[direct].cc_waiting = 0;
>> #ifdef SMP
>> - cc->cc_migration_cpu = CPUBLOCK;
>> - cc->cc_migration_time.sec = 0;
>> - cc->cc_migration_time.frac = 0;
>> - cc->cc_migration_func = NULL;
>> - cc->cc_migration_arg = NULL;
>> + cc->cc_exec_entity[direct].ce_migration_cpu = CPUBLOCK;
>> + bintime_clear(&cc->cc_exec_entity[direct].ce_migration_time);
>> + cc->cc_exec_entity[direct].ce_migration_func = NULL;
>> + cc->cc_exec_entity[direct].ce_migration_arg = NULL;
>> #endif
>> }
>>
>> @@ -196,11 +219,12 @@ cc_cme_cleanup(struct callout_cpu *cc)
>> * Checks if migration is requested by a specific callout cpu.
>> */
>> static int
>> -cc_cme_migrating(struct callout_cpu *cc)
>> +cc_cme_migrating(struct callout_cpu *cc, int direct)
>> {
>>
>> #ifdef SMP
>> - return (cc->cc_migration_cpu != CPUBLOCK);
>> +
>> + return (cc->cc_exec_entity[direct].ce_migration_cpu != CPUBLOCK);
>> #else
>> return (0);
>> #endif
>> @@ -246,7 +270,8 @@ callout_cpu_init(struct callout_cpu *cc)
>> TAILQ_INIT(&cc->cc_callwheel[i]);
>> }
>> TAILQ_INIT(&cc->cc_expireq);
>> - cc_cme_cleanup(cc);
>> + for (i = 0; i < 2; i++)
>> + cc_cme_cleanup(cc, i);
>> if (cc->cc_callout == NULL)
>> return;
>> for (i = 0; i < ncallout; i++) {
>> @@ -355,7 +380,8 @@ callout_process(struct bintime *now)
>> struct callout *tmp;
>> struct callout_cpu *cc;
>> struct callout_tailq *sc;
>> - int cpu, first, future, last, need_softclock;
>> + int cpu, first, future, gcalls, mpcalls, last, lockcalls,
>> + need_softclock;
>>
>> /*
>> * Process callouts at a very low cpu priority, so we don't keep the
>> @@ -376,7 +402,8 @@ callout_process(struct bintime *now)
>> first &= callwheelmask;
>> for (;;) {
>> sc = &cc->cc_callwheel[first];
>> - TAILQ_FOREACH(tmp, sc, c_links.tqe) {
>> + tmp = TAILQ_FIRST(sc);
>> + while (tmp != NULL) {
>> next = tmp->c_time;
>> bintime_sub(&next, &tmp->c_precision);
>> if (bintime_cmp(&next, now, <=)) {
>> @@ -385,17 +412,20 @@ callout_process(struct bintime *now)
>> * directly from hardware interrupt context.
>> */
>> if (tmp->c_flags & CALLOUT_DIRECT) {
>> - tmp->c_func(tmp->c_arg);
>> TAILQ_REMOVE(sc, tmp, c_links.tqe);
>> - tmp->c_flags &= ~CALLOUT_PENDING;
>> + tmp = softclock_call_cc(tmp, cc,
>> + &mpcalls, &lockcalls, &gcalls, 1);
>> } else {
>> TAILQ_INSERT_TAIL(&cc->cc_expireq,
>> tmp, c_staiter);
>> TAILQ_REMOVE(sc, tmp, c_links.tqe);
>> tmp->c_flags |= CALLOUT_PROCESSED;
>> need_softclock = 1;
>> + tmp = TAILQ_NEXT(tmp, c_links.tqe);
>> }
>> }
>> + else
>> + tmp = TAILQ_NEXT(tmp, c_links.tqe);
>> }
>> if (first == last)
>> break;
>> @@ -561,11 +591,12 @@ callout_cc_add(struct callout *c, struct
>> }
>>
>> static void
>> -callout_cc_del(struct callout *c, struct callout_cpu *cc)
>> +callout_cc_del(struct callout *c, struct callout_cpu *cc, int direct)
>> {
>> -
>> - if (cc->cc_next == c)
>> - cc->cc_next = TAILQ_NEXT(c, c_staiter);
>> + if (direct && cc->cc_exec_next_dir == c)
>> + cc->cc_exec_next_dir = TAILQ_NEXT(c, c_links.tqe);
>> + else if (!direct && cc->cc_exec_next == c)
>> + cc->cc_exec_next = TAILQ_NEXT(c, c_staiter);
>> if (c->c_flags & CALLOUT_LOCAL_ALLOC) {
>> c->c_func = NULL;
>> SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
>> @@ -574,13 +605,13 @@ callout_cc_del(struct callout *c, struct
>>
>> static struct callout *
>> softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls,
>> - int *lockcalls, int *gcalls)
>> + int *lockcalls, int *gcalls, int direct)
>> {
>> void (*c_func)(void *);
>> void *c_arg;
>> struct lock_class *class;
>> struct lock_object *c_lock;
>> - int c_flags, sharedlock;
>> + int c_flags, flags, sharedlock;
>> #ifdef SMP
>> struct callout_cpu *new_cc;
>> void (*new_func)(void *);
>> @@ -595,7 +626,14 @@ softclock_call_cc(struct callout *c, str
>> static timeout_t *lastfunc;
>> #endif
>>
>> - cc->cc_next = TAILQ_NEXT(c, c_staiter);
>> + if (direct)
>> + cc->cc_exec_next_dir = TAILQ_NEXT(c, c_links.tqe);
>> + else {
>> + if ((c->c_flags & CALLOUT_PROCESSED) == 0)
>> + cc->cc_exec_next = TAILQ_NEXT(c, c_links.tqe);
>> + else
>> + cc->cc_exec_next = TAILQ_NEXT(c, c_staiter);
>> + }
>> class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL;
>> sharedlock = (c->c_flags & CALLOUT_SHAREDLOCK) ? 0 : 1;
>> c_lock = c->c_lock;
>> @@ -606,8 +644,8 @@ softclock_call_cc(struct callout *c, str
>> c->c_flags = CALLOUT_LOCAL_ALLOC;
>> else
>> c->c_flags &= ~CALLOUT_PENDING;
>> - cc->cc_curr = c;
>> - cc->cc_cancel = 0;
>> + cc->cc_exec_entity[direct].cc_curr = c;
>> + cc->cc_exec_entity[direct].cc_cancel = 0;
>> CC_UNLOCK(cc);
>> if (c_lock != NULL) {
>> class->lc_lock(c_lock, sharedlock);
>> @@ -615,13 +653,12 @@ softclock_call_cc(struct callout *c, str
>> * The callout may have been cancelled
>> * while we switched locks.
>> */
>> - if (cc->cc_cancel) {
>> + if (cc->cc_exec_entity[direct].cc_cancel) {
>> class->lc_unlock(c_lock);
>> goto skip;
>> }
>> /* The callout cannot be stopped now. */
>> - cc->cc_cancel = 1;
>> -
>> + cc->cc_exec_entity[direct].cc_cancel = 1;
>> if (c_lock == &Giant.lock_object) {
>> (*gcalls)++;
>> CTR3(KTR_CALLOUT, "callout %p func %p arg %p",
>> @@ -639,11 +676,13 @@ softclock_call_cc(struct callout *c, str
>> #ifdef DIAGNOSTIC
>> binuptime(&bt1);
>> #endif
>> - THREAD_NO_SLEEPING();
>> + if (!direct)
>> + THREAD_NO_SLEEPING();
>> SDT_PROBE(callout_execute, kernel, , callout_start, c, 0, 0, 0, 0);
>> c_func(c_arg);
>> SDT_PROBE(callout_execute, kernel, , callout_end, c, 0, 0, 0, 0);
>> - THREAD_SLEEPING_OK();
>> + if (!direct)
>> + THREAD_SLEEPING_OK();
>
> I didn't look into the full patch, however I have a question about
> this: was it necessary because now this is running in interrupt
> context so it can catch the nested sleeping check case? Or there is
> something else?
> I think it would be a good thing to keep up THREAD_NO_SLEEPING() also
> in the direct dispatch case.
>
> Attilio
>
>
> --
> Peace can only be achieved by understanding - A. Einstein
Thanks for the comment, Attilio.
Yes, it's exactly what you thought. If direct flag is equal to one
you're sure you're processing a callout which runs directly from
hardware interrupt context. In this case, the running thread cannot
sleep and it's likely you have TDP_NOSLEEPING flags set, failing the
KASSERT() in THREAD_NO_SLEEPING() and leading to panic if kernel is
compiled with INVARIANTS.
In case you're running from SWI context (direct equals to zero) code
remains the same as before.
I think what I'm doing works due the assumption thread running never
sleeps. Do you suggest some other way to handle this?
Davide
More information about the svn-src-projects
mailing list