svn commit: r228969 - head/sys/netinet

John Baldwin jhb at freebsd.org
Wed Oct 16 21:22:59 UTC 2013


On Sunday, July 01, 2012 1:48:23 pm Mikolaj Golub wrote:
> 
> On Mon, 2 Apr 2012 08:48:04 -0400 John Baldwin wrote:
> 
>  JB> On Sunday, April 01, 2012 8:05:00 am Mikolaj Golub wrote:
>  >> Hi,
>  >> 
>  >> On 12/29/11, John Baldwin <jhb at freebsd.org> wrote:
>  >> > Author: jhb
>  >> > Date: Thu Dec 29 20:41:16 2011
>  >> > New Revision: 228969
>  >> > URL: http://svn.freebsd.org/changeset/base/228969
>  >> >
>  >> > Log:
>  >> >   Defer the work of freeing IPv4 multicast options from a socket to an
>  >> >   asychronous task.  This avoids tearing down multicast state including
>  >> >   sending IGMP leave messages and reprogramming MAC filters while holding
>  >> >   the per-protocol global pcbinfo lock that is used in the receive path of
>  >> >   packet processing.
>  >> >
>  >> >   Reviewed by:        rwatson
>  >> >   MFC after:        1 month
>  >> >
>  >> > Modified:
>  >> >   head/sys/netinet/in_mcast.c
>  >> >   head/sys/netinet/ip_var.h
>  >> >
>  >> > Modified: head/sys/netinet/in_mcast.c
>  >> > ==============================================================================
>  >> > --- head/sys/netinet/in_mcast.c        Thu Dec 29 19:01:29 2011        (r228968)
>  >> > +++ head/sys/netinet/in_mcast.c        Thu Dec 29 20:41:16 2011        (r228969)
>  >> > @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
>  >> >  #include <sys/protosw.h>
>  >> >  #include <sys/sysctl.h>
>  >> >  #include <sys/ktr.h>
>  >> > +#include <sys/taskqueue.h>
>  >> >  #include <sys/tree.h>
>  >> >
>  >> >  #include <net/if.h>
>  >> > @@ -144,6 +145,8 @@ static void        inm_purge(struct in_multi *)
>  >> >  static void        inm_reap(struct in_multi *);
>  >> >  static struct ip_moptions *
>  >> >                  inp_findmoptions(struct inpcb *);
>  >> > +static void        inp_freemoptions_internal(struct ip_moptions *);
>  >> > +static void        inp_gcmoptions(void *, int);
>  >> >  static int        inp_get_source_filters(struct inpcb *, struct sockopt *);
>  >> >  static int        inp_join_group(struct inpcb *, struct sockopt *);
>  >> >  static int        inp_leave_group(struct inpcb *, struct sockopt *);
>  >> > @@ -179,6 +182,10 @@ static SYSCTL_NODE(_net_inet_ip_mcast, O
>  >> >      CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters,
>  >> >      "Per-interface stack-wide source filters");
>  >> >
>  >> > +static STAILQ_HEAD(, ip_moptions) imo_gc_list =
>  >> > +    STAILQ_HEAD_INITIALIZER(imo_gc_list);
>  >> > +static struct task imo_gc_task = TASK_INITIALIZER(0, inp_gcmoptions, NULL);
>  >> > +
>  >> >  /*
>  >> >   * Inline function which wraps assertions for a valid ifp.
>  >> >   * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp
>  >> > @@ -1518,17 +1525,29 @@ inp_findmoptions(struct inpcb *inp)
>  >> >  }
>  >> >
>  >> >  /*
>  >> > - * Discard the IP multicast options (and source filters).
>  >> > + * Discard the IP multicast options (and source filters).  To minimize
>  >> > + * the amount of work done while holding locks such as the INP's
>  >> > + * pcbinfo lock (which is used in the receive path), the free
>  >> > + * operation is performed asynchronously in a separate task.
>  >> >   *
>  >> >   * SMPng: NOTE: assumes INP write lock is held.
>  >> >   */
>  >> >  void
>  >> >  inp_freemoptions(struct ip_moptions *imo)
>  >> >  {
>  >> > -        struct in_mfilter        *imf;
>  >> > -        size_t                         idx, nmships;
>  >> >
>  >> >          KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
>  >> > +        IN_MULTI_LOCK();
>  >> > +        STAILQ_INSERT_TAIL(&imo_gc_list, imo, imo_link);
>  >> > +        IN_MULTI_UNLOCK();
>  >> > +        taskqueue_enqueue(taskqueue_thread, &imo_gc_task);
>  >> > +}
>  >> > +
>  >> > +static void
>  >> > +inp_freemoptions_internal(struct ip_moptions *imo)
>  >> > +{
>  >> > +        struct in_mfilter        *imf;
>  >> > +        size_t                         idx, nmships;
>  >> >
>  >> >          nmships = imo->imo_num_memberships;
>  >> >          for (idx = 0; idx < nmships; ++idx) {
>  >> > @@ -1546,6 +1565,22 @@ inp_freemoptions(struct ip_moptions *imo
>  >> >          free(imo, M_IPMOPTS);
>  >> >  }
>  >> >
>  >> > +static void
>  >> > +inp_gcmoptions(void *context, int pending)
>  >> > +{
>  >> > +        struct ip_moptions *imo;
>  >> > +
>  >> > +        IN_MULTI_LOCK();
>  >> > +        while (!STAILQ_EMPTY(&imo_gc_list)) {
>  >> > +                imo = STAILQ_FIRST(&imo_gc_list);
>  >> > +                STAILQ_REMOVE_HEAD(&imo_gc_list, imo_link);
>  >> > +                IN_MULTI_UNLOCK();
>  >> > +                inp_freemoptions_internal(imo);
>  >> > +                IN_MULTI_LOCK();
>  >> > +        }
>  >> > +        IN_MULTI_UNLOCK();
>  >> > +}
>  >> > +
>  >> >  /*
>  >> >   * Atomically get source filters on a socket for an IPv4 multicast group.
>  >> >   * Called with INP lock held; returns with lock released.
>  >> >
>  >> > Modified: head/sys/netinet/ip_var.h
>  >> > ==============================================================================
>  >> > --- head/sys/netinet/ip_var.h        Thu Dec 29 19:01:29 2011        (r228968)
>  >> > +++ head/sys/netinet/ip_var.h        Thu Dec 29 20:41:16 2011        (r228969)
>  >> > @@ -93,6 +93,7 @@ struct ip_moptions {
>  >> >          u_short        imo_max_memberships;        /* max memberships this socket */
>  >> >          struct        in_multi **imo_membership;        /* group memberships */
>  >> >          struct        in_mfilter *imo_mfilters;        /* source filters */
>  >> > +        STAILQ_ENTRY(ip_moptions) imo_link;
>  >> >  };
>  >> >
>  >> >  struct        ipstat {
>  >> >
>  >> 
>  >> I have been observing panics like below after recent upgrade on VIMAGE kernel:
>  >> 
>  >> #0  doadump (textdump=-2022567936) at pcpu.h:244
>  >> #1  0x8051b739 in db_fncall (dummy1=1, dummy2=0, dummy3=-2127531040,
>  >> dummy4=0x872b2920 "")
>  >>     at /home/golub/freebsd/base/head/sys/ddb/db_command.c:573
>  >> #2  0x8051bb31 in db_command (last_cmdp=0x8112eefc, cmd_table=0x0, dopager=1)
>  >>     at /home/golub/freebsd/base/head/sys/ddb/db_command.c:449
>  >> #3  0x8051bc8a in db_command_loop () at
>  >> /home/golub/freebsd/base/head/sys/ddb/db_command.c:502
>  >> #4  0x8051dc7d in db_trap (type=12, code=0)
>  >>     at /home/golub/freebsd/base/head/sys/ddb/db_main.c:229
>  >> #5  0x80a82566 in kdb_trap (type=12, code=0, tf=0x872b2bbc)
>  >>     at /home/golub/freebsd/base/head/sys/kern/subr_kdb.c:629
>  >> #6  0x80ddd26f in trap_fatal (frame=0x872b2bbc, eva=24)
>  >>     at /home/golub/freebsd/base/head/sys/i386/i386/trap.c:1014
>  >> #7  0x80ddd347 in trap_pfault (frame=0x872b2bbc, usermode=0, eva=24)
>  >>     at /home/golub/freebsd/base/head/sys/i386/i386/trap.c:835
>  >> #8  0x80dde411 in trap (frame=0x872b2bbc)
>  >>     at /home/golub/freebsd/base/head/sys/i386/i386/trap.c:547
>  >> #9  0x80dc7c6c in calltrap () at
>  >> /home/golub/freebsd/base/head/sys/i386/i386/exception.s:169
>  >> #10 0x80b6f1fd in igmp_change_state (inm=0x8ae70480)
>  >>     at /home/golub/freebsd/base/head/sys/netinet/igmp.c:2595
>  >> #11 0x80b76f68 in in_leavegroup_locked (inm=0x8ae70480, imf=0x8a655a00)
>  >>     at /home/golub/freebsd/base/head/sys/netinet/in_mcast.c:1239
>  >> #12 0x80b76fbd in in_leavegroup (inm=0x8ae70480, imf=0x8a655a00)
>  >>     at /home/golub/freebsd/base/head/sys/netinet/in_mcast.c:1184
>  >> #13 0x80b770b4 in inp_gcmoptions (context=0x0, pending=1)
>  >>     at /home/golub/freebsd/base/head/sys/netinet/in_mcast.c:1554
>  >> #14 0x80a8ff2b in taskqueue_run_locked (queue=0x87594880)
>  >>     at /home/golub/freebsd/base/head/sys/kern/subr_taskqueue.c:308
>  >> #15 0x80a90987 in taskqueue_thread_loop (arg=0x81186bcc)
>  >>     at /home/golub/freebsd/base/head/sys/kern/subr_taskqueue.c:497
>  >> #16 0x80a1b2d8 in fork_exit (callout=0x80a90920
>  >> <taskqueue_thread_loop>, arg=0x81186bcc,
>  >>     frame=0x872b2d28) at /home/golub/freebsd/base/head/sys/kern/kern_fork.c:992
>  >> #17 0x80dc7d14 in fork_trampoline ()
>  >>     at /home/golub/freebsd/base/head/sys/i386/i386/exception.s:276
>  >> (kgdb) fr 10
>  >> #10 0x80b6f1fd in igmp_change_state (inm=0x8ae70480)
>  >>     at /home/golub/freebsd/base/head/sys/netinet/igmp.c:2595
>  >> 2595                                    V_state_change_timers_running = 1;
>  >> 
>  >> (kgdb) l
>  >> 2590                                        ("%s: enqueue record =
>  >> %d", __func__,
>  >> 2591                                         retval));
>  >> 2592
>  >> 2593                                    inm->inm_state = IGMP_LEAVING_MEMBER;
>  >> 2594                                    inm->inm_sctimer = 1;
>  >> 2595                                    V_state_change_timers_running = 1;
>  >> 2596                                    syncstates = 0;
>  >> 2597                            }
>  >> 2598                            break;
>  >> 2599                    }
>  >> 
>  >> VNET context is not set at that point.
>  >> 
>  >> The attached patch fixes the issue for me. Not sure about inm->inm_ifp
>  >> != NULL assumption but I need interface to get vnet :-).
> 
>  JB> I will to defer to bz@ (cc'd) on how best to fix this.  Another option would
>  JB> be to save the current vnet in the 'ip_moptions' struct (would have to add a
>  JB> new field) when queueing this imo to be free'd via the task.  You could
>  JB> then do the curvnet set/restore at a higher level without any locks held, etc.
> 
> Hi, do you have any news here? I would really appreciate to have this fixed in
> any way, as currently to avoid the crash I always have to remember to apply
> the patch when compiling VIMAGE kernels.
> 
> Here is another version of the patch. It fixes it in the way suggested above
> (storing vnet in ip_moptions).
> 
> The thing that worries me though is the case when vnet is destroyed and we
> still have options that refer it in the queue. I expect panic in this case.
> 
> BTW, isn't the same problem with stale pointer dereferencing possible when
> removing interface?

I think this was just fixed by glebius@ in r256587:

Author: glebius
Date: Wed Oct 16 05:02:01 2013
New Revision: 256587
URL: http://svnweb.freebsd.org/changeset/base/256587

Log:
  For VIMAGE kernels store vnet in the struct task, and set vnet context
  during task processing.
  
  Reported & tested by: mm

-- 
John Baldwin


More information about the svn-src-head mailing list