svn commit: r228969 - head/sys/netinet
John Baldwin
jhb at freebsd.org
Mon Apr 2 14:31:50 UTC 2012
On Sunday, April 01, 2012 8:05:00 am Mikolaj Golub wrote:
> Hi,
>
> On 12/29/11, John Baldwin <jhb at freebsd.org> wrote:
> > Author: jhb
> > Date: Thu Dec 29 20:41:16 2011
> > New Revision: 228969
> > URL: http://svn.freebsd.org/changeset/base/228969
> >
> > Log:
> > Defer the work of freeing IPv4 multicast options from a socket to an
> > asychronous task. This avoids tearing down multicast state including
> > sending IGMP leave messages and reprogramming MAC filters while holding
> > the per-protocol global pcbinfo lock that is used in the receive path of
> > packet processing.
> >
> > Reviewed by: rwatson
> > MFC after: 1 month
> >
> > Modified:
> > head/sys/netinet/in_mcast.c
> > head/sys/netinet/ip_var.h
> >
> > Modified: head/sys/netinet/in_mcast.c
> > ==============================================================================
> > --- head/sys/netinet/in_mcast.c Thu Dec 29 19:01:29 2011 (r228968)
> > +++ head/sys/netinet/in_mcast.c Thu Dec 29 20:41:16 2011 (r228969)
> > @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
> > #include <sys/protosw.h>
> > #include <sys/sysctl.h>
> > #include <sys/ktr.h>
> > +#include <sys/taskqueue.h>
> > #include <sys/tree.h>
> >
> > #include <net/if.h>
> > @@ -144,6 +145,8 @@ static void inm_purge(struct in_multi *)
> > static void inm_reap(struct in_multi *);
> > static struct ip_moptions *
> > inp_findmoptions(struct inpcb *);
> > +static void inp_freemoptions_internal(struct ip_moptions *);
> > +static void inp_gcmoptions(void *, int);
> > static int inp_get_source_filters(struct inpcb *, struct sockopt *);
> > static int inp_join_group(struct inpcb *, struct sockopt *);
> > static int inp_leave_group(struct inpcb *, struct sockopt *);
> > @@ -179,6 +182,10 @@ static SYSCTL_NODE(_net_inet_ip_mcast, O
> > CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters,
> > "Per-interface stack-wide source filters");
> >
> > +static STAILQ_HEAD(, ip_moptions) imo_gc_list =
> > + STAILQ_HEAD_INITIALIZER(imo_gc_list);
> > +static struct task imo_gc_task = TASK_INITIALIZER(0, inp_gcmoptions, NULL);
> > +
> > /*
> > * Inline function which wraps assertions for a valid ifp.
> > * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp
> > @@ -1518,17 +1525,29 @@ inp_findmoptions(struct inpcb *inp)
> > }
> >
> > /*
> > - * Discard the IP multicast options (and source filters).
> > + * Discard the IP multicast options (and source filters). To minimize
> > + * the amount of work done while holding locks such as the INP's
> > + * pcbinfo lock (which is used in the receive path), the free
> > + * operation is performed asynchronously in a separate task.
> > *
> > * SMPng: NOTE: assumes INP write lock is held.
> > */
> > void
> > inp_freemoptions(struct ip_moptions *imo)
> > {
> > - struct in_mfilter *imf;
> > - size_t idx, nmships;
> >
> > KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
> > + IN_MULTI_LOCK();
> > + STAILQ_INSERT_TAIL(&imo_gc_list, imo, imo_link);
> > + IN_MULTI_UNLOCK();
> > + taskqueue_enqueue(taskqueue_thread, &imo_gc_task);
> > +}
> > +
> > +static void
> > +inp_freemoptions_internal(struct ip_moptions *imo)
> > +{
> > + struct in_mfilter *imf;
> > + size_t idx, nmships;
> >
> > nmships = imo->imo_num_memberships;
> > for (idx = 0; idx < nmships; ++idx) {
> > @@ -1546,6 +1565,22 @@ inp_freemoptions(struct ip_moptions *imo
> > free(imo, M_IPMOPTS);
> > }
> >
> > +static void
> > +inp_gcmoptions(void *context, int pending)
> > +{
> > + struct ip_moptions *imo;
> > +
> > + IN_MULTI_LOCK();
> > + while (!STAILQ_EMPTY(&imo_gc_list)) {
> > + imo = STAILQ_FIRST(&imo_gc_list);
> > + STAILQ_REMOVE_HEAD(&imo_gc_list, imo_link);
> > + IN_MULTI_UNLOCK();
> > + inp_freemoptions_internal(imo);
> > + IN_MULTI_LOCK();
> > + }
> > + IN_MULTI_UNLOCK();
> > +}
> > +
> > /*
> > * Atomically get source filters on a socket for an IPv4 multicast group.
> > * Called with INP lock held; returns with lock released.
> >
> > Modified: head/sys/netinet/ip_var.h
> > ==============================================================================
> > --- head/sys/netinet/ip_var.h Thu Dec 29 19:01:29 2011 (r228968)
> > +++ head/sys/netinet/ip_var.h Thu Dec 29 20:41:16 2011 (r228969)
> > @@ -93,6 +93,7 @@ struct ip_moptions {
> > u_short imo_max_memberships; /* max memberships this socket */
> > struct in_multi **imo_membership; /* group memberships */
> > struct in_mfilter *imo_mfilters; /* source filters */
> > + STAILQ_ENTRY(ip_moptions) imo_link;
> > };
> >
> > struct ipstat {
> >
>
> I have been observing panics like below after recent upgrade on VIMAGE kernel:
>
> #0 doadump (textdump=-2022567936) at pcpu.h:244
> #1 0x8051b739 in db_fncall (dummy1=1, dummy2=0, dummy3=-2127531040,
> dummy4=0x872b2920 "")
> at /home/golub/freebsd/base/head/sys/ddb/db_command.c:573
> #2 0x8051bb31 in db_command (last_cmdp=0x8112eefc, cmd_table=0x0, dopager=1)
> at /home/golub/freebsd/base/head/sys/ddb/db_command.c:449
> #3 0x8051bc8a in db_command_loop () at
> /home/golub/freebsd/base/head/sys/ddb/db_command.c:502
> #4 0x8051dc7d in db_trap (type=12, code=0)
> at /home/golub/freebsd/base/head/sys/ddb/db_main.c:229
> #5 0x80a82566 in kdb_trap (type=12, code=0, tf=0x872b2bbc)
> at /home/golub/freebsd/base/head/sys/kern/subr_kdb.c:629
> #6 0x80ddd26f in trap_fatal (frame=0x872b2bbc, eva=24)
> at /home/golub/freebsd/base/head/sys/i386/i386/trap.c:1014
> #7 0x80ddd347 in trap_pfault (frame=0x872b2bbc, usermode=0, eva=24)
> at /home/golub/freebsd/base/head/sys/i386/i386/trap.c:835
> #8 0x80dde411 in trap (frame=0x872b2bbc)
> at /home/golub/freebsd/base/head/sys/i386/i386/trap.c:547
> #9 0x80dc7c6c in calltrap () at
> /home/golub/freebsd/base/head/sys/i386/i386/exception.s:169
> #10 0x80b6f1fd in igmp_change_state (inm=0x8ae70480)
> at /home/golub/freebsd/base/head/sys/netinet/igmp.c:2595
> #11 0x80b76f68 in in_leavegroup_locked (inm=0x8ae70480, imf=0x8a655a00)
> at /home/golub/freebsd/base/head/sys/netinet/in_mcast.c:1239
> #12 0x80b76fbd in in_leavegroup (inm=0x8ae70480, imf=0x8a655a00)
> at /home/golub/freebsd/base/head/sys/netinet/in_mcast.c:1184
> #13 0x80b770b4 in inp_gcmoptions (context=0x0, pending=1)
> at /home/golub/freebsd/base/head/sys/netinet/in_mcast.c:1554
> #14 0x80a8ff2b in taskqueue_run_locked (queue=0x87594880)
> at /home/golub/freebsd/base/head/sys/kern/subr_taskqueue.c:308
> #15 0x80a90987 in taskqueue_thread_loop (arg=0x81186bcc)
> at /home/golub/freebsd/base/head/sys/kern/subr_taskqueue.c:497
> #16 0x80a1b2d8 in fork_exit (callout=0x80a90920
> <taskqueue_thread_loop>, arg=0x81186bcc,
> frame=0x872b2d28) at /home/golub/freebsd/base/head/sys/kern/kern_fork.c:992
> #17 0x80dc7d14 in fork_trampoline ()
> at /home/golub/freebsd/base/head/sys/i386/i386/exception.s:276
> (kgdb) fr 10
> #10 0x80b6f1fd in igmp_change_state (inm=0x8ae70480)
> at /home/golub/freebsd/base/head/sys/netinet/igmp.c:2595
> 2595 V_state_change_timers_running = 1;
>
> (kgdb) l
> 2590 ("%s: enqueue record =
> %d", __func__,
> 2591 retval));
> 2592
> 2593 inm->inm_state = IGMP_LEAVING_MEMBER;
> 2594 inm->inm_sctimer = 1;
> 2595 V_state_change_timers_running = 1;
> 2596 syncstates = 0;
> 2597 }
> 2598 break;
> 2599 }
>
> VNET context is not set at that point.
>
> The attached patch fixes the issue for me. Not sure about inm->inm_ifp
> != NULL assumption but I need interface to get vnet :-).
I will to defer to bz@ (cc'd) on how best to fix this. Another option would
be to save the current vnet in the 'ip_moptions' struct (would have to add a
new field) when queueing this imo to be free'd via the task. You could
then do the curvnet set/restore at a higher level without any locks held, etc.
> BTW, in igmp_change_state() this looks for me a bit strange:
>
> if (ifp != NULL) {
> /*
> * Sanity check that netinet's notion of ifp is the
> * same as net's.
> */
> KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
> }
>
> IGMP_LOCK();
>
> igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
>
> The check ifp != NULL suggests that ifp may be NULL, but then it will
> panic at the last shown line.
Yes, it seems that the check should be removed and the KASSERT() should
just always fire.
--
John Baldwin
More information about the svn-src-head
mailing list