svn commit: r238990 - in head/sys: net netinet netinet6

Kenneth D. Merry ken at FreeBSD.org
Wed Aug 22 16:09:20 UTC 2012


On Wed, Aug 22, 2012 at 14:17:05 +0400, Gleb Smirnoff wrote:
> On Tue, Aug 21, 2012 at 12:50:14PM -0600, Kenneth D. Merry wrote:
> K> On Thu, Aug 02, 2012 at 13:57:50 +0000, Gleb Smirnoff wrote:
> K> > Author: glebius
> K> > Date: Thu Aug  2 13:57:49 2012
> K> > New Revision: 238990
> K> > URL: http://svn.freebsd.org/changeset/base/238990
> K> > 
> K> > Log:
> K> >   Fix races between in_lltable_prefix_free(), lla_lookup(),
> K> >   llentry_free() and arptimer():
> K> >   
> K> >   o Use callout_init_rw() for lle timeout, this allows us safely
> K> >     disestablish them.
> K> >     - This allows us to simplify the arptimer() and make it
> K> >       race safe.
> K> >   o Consistently use ifp->if_afdata_lock to lock access to
> K> >     linked lists in the lle hashes.
> K> >   o Introduce new lle flag LLE_LINKED, which marks an entry that
> K> >     is attached to the hash.
> K> >     - Use LLE_LINKED to avoid double unlinking via consequent
> K> >       calls to llentry_free().
> K> >     - Mark lle with LLE_DELETED via |= operation istead of =,
> K> >       so that other flags won't be lost.
> K> >   o Make LLE_ADDREF(), LLE_REMREF() and LLE_FREE_LOCKED() more
> K> >     consistent and provide more informative KASSERTs.
> K> >   
> K> >   The patch is a collaborative work of all submitters and myself.
> K> >   
> K> >   PR:		kern/165863
> K> >   Submitted by:	Andrey Zonov <andrey zonov.org>
> K> >   Submitted by:	Ryan Stone <rysto32 gmail.com>
> K> >   Submitted by:	Eric van Gyzen <eric_van_gyzen dell.com>
> K> 
> K> I'm running into this on stable/9, any plan on when this will get MFCed?
> 
> I'm sorry, but after 9.1-RELEASE. Too large change to run MFC prior to
> release.

I understand.

> I'd appreciate if you patch your stable/9 system manually and thus
> perform some testing prior to merge.

I'm running stable/9 from late March (we're working on merging a newer
version of stable/9), and have merged in these changes from head:
237571, 238222, 238945, 238967, 238990

At the moment I'm getting a panic inside arptimer:

Fatal trap 12: page fault while in kernel mode
cpuid = 0; apic id = 00
fault virtual address   = 0x0
fault code              = supervisor read instruction, page not present
instruction pointer     = 0x20:0x0
stack pointer           = 0x28:0xffffff800027da40
frame pointer           = 0x28:0xffffff800027da80
code segment            = base 0x0, limit 0xfffff, type 0x1b
                        = DPL 0, pres 1, long 1, def32 0, gran 1
processor eflags        = interrupt enabled, resume, IOPL = 0
current process         = 12 (swi4: clock)
[ thread pid 12 tid 100010 ]
Stopped at      0:      *** error reading from address 0 ***
db> bt
Tracing pid 12 tid 100010 td 0xfffffe00072158e0
uart_sab82532_class() at 0
arptimer() at arptimer+0xd0
softclock() at softclock+0x2ba
intr_event_execute_handlers() at intr_event_execute_handlers+0x66
ithread_loop() at ithread_loop+0xb2
fork_exit() at fork_exit+0x135
fork_trampoline() at fork_trampoline+0xe
--- trap 0, rip = 0, rsp = 0xffffff800027dcf0, rbp = 0 ---
db> 

It looks like it's inside llentry_free():

(kgdb) up 15
#15 0xffffffff8056b420 in arptimer (arg=Variable "arg" is not available.
)   
    at /usr/home/kenm/perforce7/sys/netinet/if_ether.c:189
189             pkts_dropped = llentry_free(lle);
(kgdb) list
184             /* XXX: LOR avoidance. We still have ref on lle. */
185             LLE_WUNLOCK(lle);
186             IF_AFDATA_LOCK(ifp);
187             LLE_WLOCK(lle);
188             LLE_REMREF(lle);
189             pkts_dropped = llentry_free(lle);
190             IF_AFDATA_UNLOCK(ifp);
191             ARPSTAT_ADD(dropped, pkts_dropped);
192             ARPSTAT_INC(timeouts);
193             CURVNET_RESTORE();
(kgdb) print lle
$1 = (struct llentry *) 0xfffffe000aea8600
(kgdb) print *lle
$2 = {lle_next = {le_next = 0x0, le_prev = 0xfffffe000a36dcd0}, lle_lock = {
    lock_object = {lo_name = 0xffffffff8090cc65 "lle", lo_flags = 73596928,
      lo_data = 0, lo_witness = 0x0}, rw_lock = 18446741874805922016},
  lle_tbl = 0xfffffe000a36dc00, lle_head = 0xfffffe000a36dcd0, lle_free = 0,
  la_hold = 0x0, la_numheld = 0, la_expire = 33571, la_flags = 8192,
  la_asked = 0, la_preempt = 5, ln_byhint = 0, ln_state = 0, ln_router = 0,
  ln_ntick = 0, lle_refcnt = 1, ll_addr = {mac_aligned = 55295740969106,
    mac16 = {32914, 35583, 12874}}, lle_timer = {ln_timer_ch = {c_links = {
        sle = {sle_next = 0x0}, tqe = {tqe_next = 0x0,
          tqe_prev = 0xffffff81ed7e4760}}, c_time = 3357036,
      c_arg = 0xfffffe000aea8600, c_func = 0xffffffff8056b350 <arptimer>,
      c_lock = 0xfffffe000aea8610, c_flags = 16, c_cpu = 0}, la_timer = {
      c_links = {sle = {sle_next = 0x0}, tqe = {tqe_next = 0x0,
          tqe_prev = 0xffffff81ed7e4760}}, c_time = 3357036,
      c_arg = 0xfffffe000aea8600, c_func = 0xffffffff8056b350 <arptimer>,
      c_lock = 0xfffffe000aea8610, c_flags = 16, c_cpu = 0}}}
(kgdb) down
#14 0xffffffff80554950 in llentry_free (lle=0xfffffe000aea8600)
    at /usr/home/kenm/perforce7/sys/net/if_llatbl.c:137
137             LLE_FREE_LOCKED(lle);
(kgdb) list
132
133             KASSERT(lle->la_numheld == 0,
134                     ("%s: la_numheld %d > 0, pkts_droped %zd", __func__,
135                      lle->la_numheld, pkts_dropped));
136
137             LLE_FREE_LOCKED(lle);
138
139             return (pkts_dropped);
140     }
141
(kgdb) print lle->lle_free
$3 = (void (*)(struct lltable *, struct llentry *)) 0

Looks like I'm missing SVN rev 232054.  I'll merge that and try again.

Ken
-- 
Kenneth Merry
ken at FreeBSD.ORG


More information about the svn-src-all mailing list