Races on alias deletion

Mikolaj Golub to.my.trociny at gmail.com
Sun May 2 17:39:31 UTC 2010


I have sent pr about this issue. kern/146250

On Wed, 21 Apr 2010 08:28:48 +0300 Mikolaj Golub wrote:

 MG> Hi,

 MG> Accidentally due to misconfiguration of our tools we ran simultaneously
 MG> deletion of the same interface alias and crashed the box (FreeBSD-7.1).

 MG> So I did some experiments on my 8-STABLE (I have CURRENT in virtualbox only)
 MG> to investigate this running concurrently two scripts, which were adding and
 MG> deleting the same address:

 MG> while true; do
 MG>         ifconfig $IFACE  alias $IP
 MG>         ifconfig $IFACE -alias $IP
 MG> done

 MG> The box crashed just after I started the second script. The crash was in
 MG> in_control() on removing ia->ia_ifa from ifp->if_addrhead list, because there
 MG> was no check if the address is still in the list before removing.

 MG> panic: Bad link elm 0xcd2f3b00 prev->next != elm

 MG> #0  doadump () at pcpu.h:246
 MG> #1  0xc04ec829 in db_fncall (dummy1=-1064461270, dummy2=0, dummy3=-1, dummy4=0xe9a737fc "\0208╖И")
 MG>     at /usr/src/sys/ddb/db_command.c:548
 MG> #2  0xc04ecc5f in db_command (last_cmdp=0xc0e0ab9c, cmd_table=0x0, dopager=0)
 MG>     at /usr/src/sys/ddb/db_command.c:445
 MG> #3  0xc04ecd14 in db_command_script (command=0xc0e0bac4 "call doadump") at /usr/src/sys/ddb/db_command.c:516
 MG> #4  0xc04f0e50 in db_script_exec (scriptname=0xe9a73908 "kdb.enter.panic", warnifnotfound=Variable "warnifnotfound" is not available.
 MG> )
 MG>     at /usr/src/sys/ddb/db_script.c:302
 MG> #5  0xc04f0f37 in db_script_kdbenter (eventname=0xc0cc760a "panic") at /usr/src/sys/ddb/db_script.c:324
 MG> #6  0xc04eec18 in db_trap (type=3, code=0) at /usr/src/sys/ddb/db_main.c:228
 MG> #7  0xc08d9aa6 in kdb_trap (type=3, code=0, tf=0xe9a73a44) at /usr/src/sys/kern/subr_kdb.c:535
 MG> #8  0xc0befbeb in trap (frame=0xe9a73a44) at /usr/src/sys/i386/i386/trap.c:690
 MG> #9  0xc0bd130b in calltrap () at /usr/src/sys/i386/i386/exception.s:165
 MG> #10 0xc08d9c2a in kdb_enter (why=0xc0cc760a "panic", msg=0xc0cc760a "panic") at cpufunc.h:71
 MG> #11 0xc08a95b6 in panic (fmt=0xc0c61bc0 "Bad link elm %p prev->next != elm")
 MG>     at /usr/src/sys/kern/kern_shutdown.c:562
 MG> #12 0xc09ba87f in in_control (so=0xcdbd519c, cmd=2149607705, data=0xcd3db120 "fxp0", ifp=0xc5b94c00, 
 MG>     td=0xc92ddb90) at /usr/src/sys/netinet/in.c:604
 MG> #13 0xc095d400 in ifioctl (so=0xcdbd519c, cmd=2149607705, data=0xcd3db120 "fxp0", td=0xc92ddb90)
 MG>     at /usr/src/sys/net/if.c:2516
 MG> #14 0xc08f69d5 in soo_ioctl (fp=0xcdc90af0, cmd=2149607705, data=0xcd3db120, active_cred=0xc9d78400, 
 MG>     td=0xc92ddb90) at /usr/src/sys/kern/sys_socket.c:212
 MG> #15 0xc08f0a2d in kern_ioctl (td=0xc92ddb90, fd=3, com=2149607705, data=0xcd3db120 "fxp0") at file.h:262
 MG> #16 0xc08f0bb4 in ioctl (td=0xc92ddb90, uap=0xe9a73cf8) at /usr/src/sys/kern/sys_generic.c:678
 MG> #17 0xc0bef320 in syscall (frame=0xe9a73d38) at /usr/src/sys/i386/i386/trap.c:1111
 MG> #18 0xc0bd13a0 in Xint0x80_syscall () at /usr/src/sys/i386/i386/exception.s:261
 MG> #19 0x00000033 in ?? ()
 MG> Previous frame inner to this frame (corrupt stack?)
 MG> (kgdb) fr 12
 MG> #12 0xc09ba87f in in_control (so=0xcdbd519c, cmd=2149607705, data=0xcd3db120 "fxp0", ifp=0xc5b94c00, 
 MG>     td=0xc92ddb90) at /usr/src/sys/netinet/in.c:604
 MG> 604             TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
 MG> (kgdb) list
 MG> 599             default:
 MG> 600                     panic("in_control: unsupported ioctl");
 MG> 601             }
 MG> 602
 MG> 603             IF_ADDR_LOCK(ifp);
 MG> 604             TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
 MG> 605             IF_ADDR_UNLOCK(ifp);
 MG> 606             ifa_free(&ia->ia_ifa);                          /* if_addrhead */
 MG> 607
 MG> 608             IN_IFADDR_WLOCK();

 MG> The fist patch in the attachments fixed this type of crashes for me, but the
 MG> box started to crash in in_lltable_prefix_free (now it was required for
 MG> scripts to run a few seconds).

 MG> (kgdb) bt
 MG> #0  doadump () at pcpu.h:246
 MG> #1  0xc04ec829 in db_fncall (dummy1=1, dummy2=0, dummy3=-1056922880, dummy4=0xe8636760 "")
 MG>     at /usr/src/sys/ddb/db_command.c:548
 MG> #2  0xc04ecc21 in db_command (last_cmdp=0xc0e0ac1c, cmd_table=0x0, dopager=1)
 MG>     at /usr/src/sys/ddb/db_command.c:445
 MG> #3  0xc04ecd7a in db_command_loop () at /usr/src/sys/ddb/db_command.c:498
 MG> #4  0xc04eec1d in db_trap (type=12, code=0) at /usr/src/sys/ddb/db_main.c:229
 MG> #5  0xc08d9aa6 in kdb_trap (type=12, code=0, tf=0xe863694c) at /usr/src/sys/kern/subr_kdb.c:535
 MG> #6  0xc0beeedf in trap_fatal (frame=0xe863694c, eva=420) at /usr/src/sys/i386/i386/trap.c:929
 MG> #7  0xc0bef800 in trap (frame=0xe863694c) at /usr/src/sys/i386/i386/trap.c:328
 MG> #8  0xc0bd139b in calltrap () at /usr/src/sys/i386/i386/exception.s:165
 MG> #9  0xc08a6a8b in _rw_wlock_hard (rw=0xc79e1508, tid=3334964384, file=0xc0ce01e4 "/usr/src/sys/netinet/in.c", 
 MG>     line=1370) at /usr/src/sys/kern/kern_rwlock.c:677
 MG> #10 0xc08a75d6 in _rw_wlock (rw=0xc79e1508, file=0xc0ce01e4 "/usr/src/sys/netinet/in.c", line=1370)
 MG>     at /usr/src/sys/kern/kern_rwlock.c:237
 MG> #11 0xc09bb17e in in_lltable_prefix_free (llt=0xc5dabc00, prefix=0xe8636a94, mask=0xe8636a84)
 MG>     at /usr/src/sys/netinet/in.c:1370
 MG> #12 0xc09631d1 in lltable_prefix_free (af=2, prefix=0xe8636a94, mask=0xe8636a84)
 MG>     at /usr/src/sys/net/if_llatbl.c:217
 MG> #13 0xc09b8d77 in in_ifscrub (ifp=0xc5b94c00, ia=0xc6ec0500) at /usr/src/sys/netinet/in.c:1197
 MG> #14 0xc09ba6dc in in_control (so=0xc79d2338, cmd=2149607705, data=0xc629b0c0 "fxp0", ifp=0xc5b94c00, 
 MG>     td=0xc6c784a0) at /usr/src/sys/netinet/in.c:586
 MG> #15 0xc095d400 in ifioctl (so=0xc79d2338, cmd=2149607705, data=0xc629b0c0 "fxp0", td=0xc6c784a0)
 MG>     at /usr/src/sys/net/if.c:2516
 MG> #16 0xc08f69d5 in soo_ioctl (fp=0xc6304738, cmd=2149607705, data=0xc629b0c0, active_cred=0xc79d8d80, 
 MG>     td=0xc6c784a0) at /usr/src/sys/kern/sys_socket.c:212
 MG> #17 0xc08f0a2d in kern_ioctl (td=0xc6c784a0, fd=3, com=2149607705, data=0xc629b0c0 "fxp0") at file.h:262
 MG> #18 0xc08f0bb4 in ioctl (td=0xc6c784a0, uap=0xe8636cf8) at /usr/src/sys/kern/sys_generic.c:678
 MG> #19 0xc0bef3b0 in syscall (frame=0xe8636d38) at /usr/src/sys/i386/i386/trap.c:1111
 MG> #20 0xc0bd1430 in Xint0x80_syscall () at /usr/src/sys/i386/i386/exception.s:261
 MG> #21 0x00000033 in ?? ()
 MG> Previous frame inner to this frame (corrupt stack?)
 MG> (kgdb) fr 11
 MG> #11 0xc09bb17e in in_lltable_prefix_free (llt=0xc5dabc00, prefix=0xe8636a94, mask=0xe8636a84)
 MG>     at /usr/src/sys/netinet/in.c:1370
 MG> 1370                                    LLE_WLOCK(lle);
 MG> (kgdb) list
 MG> 1365                    LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
 MG> 1366
 MG> 1367                            if (IN_ARE_MASKED_ADDR_EQUAL((struct sockaddr_in *)L3_ADDR(lle), 
 MG> 1368                                                         pfx, msk)) {
 MG> 1369                                    callout_drain(&lle->la_timer);
 MG> 1370                                    LLE_WLOCK(lle);
 MG> 1371                                    llentry_free(lle);
 MG> 1372                            }
 MG> 1373                    }
 MG> 1374            }
 MG> (kgdb) p *lle
 MG> $1 = {lle_next = {le_next = 0xdeadc0de, le_prev = 0xdeadc0de}, lle_lock = {lock_object = {
 MG>       lo_name = 0xdeadc0de <Address 0xdeadc0de out of bounds>, lo_flags = 3735929054, lo_data = 3735929054, 
 MG>       lo_witness = 0xdeadc0de}, rw_lock = 3735929054}, lle_tbl = 0xdeadc0de, lle_head = 0xdeadc0de, 
 MG>   la_hold = 0xdeadc0de, la_expire = -559038242, la_flags = 49374, la_asked = 57005, la_preempt = 49374, 
 MG>   ln_byhint = 57005, ln_state = -16162, ln_router = 57005, ln_ntick = -559038242, lle_refcnt = -559038242, 
 MG>   ll_addr = {mac_aligned = 16045693110842147038, mac16 = {49374, 57005, 49374}}, lle_timer = {ln_timer_ch = {
 MG>       c_links = {sle = {sle_next = 0xdeadc0de}, tqe = {tqe_next = 0xdeadc0de, tqe_prev = 0xdeadc0de}}, 
 MG>       c_time = -559038242, c_arg = 0xdeadc0de, c_func = 0xdeadc0de, c_lock = 0xdeadc0de, 
 MG>       c_flags = -559038242, c_cpu = -559038242}, la_timer = {c_links = {sle = {sle_next = 0xdeadc0de}, tqe = {
 MG>           tqe_next = 0xdeadc0de, tqe_prev = 0xdeadc0de}}, c_time = -559038242, c_arg = 0xdeadc0de, 
 MG>       c_func = 0xdeadc0de, c_lock = 0xdeadc0de, c_flags = -559038242, c_cpu = -559038242}}}
 MG> (kgdb) fr 12
 MG> #12 0xc09631d1 in lltable_prefix_free (af=2, prefix=0xe8636a94, mask=0xe8636a84)
 MG>     at /usr/src/sys/net/if_llatbl.c:217
 MG> 217                     llt->llt_prefix_free(llt, prefix, mask);
 MG> (kgdb) list
 MG> 212             LLTABLE_RLOCK();
 MG> 213             SLIST_FOREACH(llt, &V_lltables, llt_link) {
 MG> 214                     if (llt->llt_af != af)
 MG> 215                             continue;
 MG> 216
 MG> 217                     llt->llt_prefix_free(llt, prefix, mask);
 MG> 218             }
 MG> 219             LLTABLE_RUNLOCK();
 MG> 220     }
 MG> 221

 MG> So lltable is RLOCKed while the entries are deleted from the table. When
 MG> callout_drain() is run by one thread other thread has time to destroy lle.

 MG> Is LLTABLE_RLOCK (and not LLTABLE_WLOCK) in lltable_prefix_free used
 MG> intentionally? I tried the patch (the second in the attaches) with WLOCK
 MG> instead of RLOCK and this fixed this type of crashes for me.

 MG> After this the box was able to live some time with two test scripts running
 MG> but then crashed in sysctl_iflist(), processing ifa, which is destroyed by
 MG> other thread:
 MG>  
 MG> (kgdb) bt
 MG> #0  doadump () at pcpu.h:246
 MG> #1  0xc08a92fe in boot (howto=260) at /usr/src/sys/kern/kern_shutdown.c:416
 MG> #2  0xc08a95d2 in panic (fmt=Variable "fmt" is not available.
 MG> ) at /usr/src/sys/kern/kern_shutdown.c:579
 MG> #3  0xc0beef23 in trap_fatal (frame=0xe8456a64, eva=3735929054) at /usr/src/sys/i386/i386/trap.c:938
 MG> #4  0xc0bef113 in trap_pfault (frame=0xe8456a64, usermode=0, eva=3735929054)
 MG>     at /usr/src/sys/i386/i386/trap.c:851
 MG> #5  0xc0befb05 in trap (frame=0xe8456a64) at /usr/src/sys/i386/i386/trap.c:533
 MG> #6  0xc0bd139b in calltrap () at /usr/src/sys/i386/i386/exception.s:165
 MG> #7  0xc096f85e in rt_msg2 (type=12, rtinfo=0xe8456b08, cp=0x0, w=0xe8456b38) at /usr/src/sys/net/rtsock.c:1022
 MG> #8  0xc096ff7b in sysctl_rtsock (oidp=0xc0dcb900, arg1=0xe8456c18, arg2=4, req=0xe8456ba4)
 MG>     at /usr/src/sys/net/rtsock.c:1408
 MG> #9  0xc08b4598 in sysctl_root (oidp=Variable "oidp" is not available.
 MG> ) at /usr/src/sys/kern/kern_sysctl.c:1418
 MG> #10 0xc08b475c in userland_sysctl (td=0xc636b250, name=0xe8456c10, namelen=6, old=0x0, oldlenp=0xbfbfe048, 
 MG>     inkernel=0, new=0x0, newlen=0, retval=0xe8456c70, flags=0) at /usr/src/sys/kern/kern_sysctl.c:1522
 MG> #11 0xc08b4b14 in __sysctl (td=0xc636b250, uap=0xe8456cf8) at /usr/src/sys/kern/kern_sysctl.c:1448
 MG> #12 0xc0bef3b0 in syscall (frame=0xe8456d38) at /usr/src/sys/i386/i386/trap.c:1111
 MG> #13 0xc0bd1430 in Xint0x80_syscall () at /usr/src/sys/i386/i386/exception.s:261
 MG> #14 0x00000033 in ?? ()
 MG> Previous frame inner to this frame (corrupt stack?)
 MG> (kgdb) fr 8
 MG> #8  0xc096ff7b in sysctl_rtsock (oidp=0xc0dcb900, arg1=0xe8456c18, arg2=4, req=0xe8456ba4)
 MG>     at /usr/src/sys/net/rtsock.c:1408
 MG> 1408                            len = rt_msg2(RTM_NEWADDR, &info, NULL, w);
 MG> (kgdb) list 1370,1410
 MG> 1370    static int
 MG> 1371    sysctl_iflist(int af, struct walkarg *w)
 MG> 1372    {
 MG> 1373            struct ifnet *ifp;
 MG> 1374            struct ifaddr *ifa;
 MG> 1375            struct rt_addrinfo info;
 MG> 1376            int len, error = 0;
 MG> 1377
 MG> 1378            bzero((caddr_t)&info, sizeof(info));
 MG> 1379            IFNET_RLOCK();
 MG> 1380            TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 MG> 1381                    if (w->w_arg && w->w_arg != ifp->if_index)
 MG> 1382                            continue;
 MG> 1383                    ifa = ifp->if_addr;
 MG> 1384                    info.rti_info[RTAX_IFP] = ifa->ifa_addr;
 MG> 1385                    len = rt_msg2(RTM_IFINFO, &info, NULL, w);
 MG> 1386                    info.rti_info[RTAX_IFP] = NULL;
 MG> 1387                    if (w->w_req && w->w_tmem) {
 MG> 1388                            struct if_msghdr *ifm;
 MG> 1389
 MG> 1390                            ifm = (struct if_msghdr *)w->w_tmem;
 MG> 1391                            ifm->ifm_index = ifp->if_index;
 MG> 1392                            ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
 MG> 1393                            ifm->ifm_data = ifp->if_data;
 MG> 1394                            ifm->ifm_addrs = info.rti_addrs;
 MG> 1395                            error = SYSCTL_OUT(w->w_req,(caddr_t)ifm, len);
 MG> 1396                            if (error)
 MG> 1397                                    goto done;
 MG> 1398                    }
 MG> 1399                    while ((ifa = TAILQ_NEXT(ifa, ifa_link)) != NULL) {
 MG> 1400                            if (af && af != ifa->ifa_addr->sa_family)
 MG> 1401                                    continue;
 MG> 1402                            if (prison_if(w->w_req->td->td_ucred,
 MG> 1403                                ifa->ifa_addr) != 0)
 MG> 1404                                    continue;
 MG> 1405                            info.rti_info[RTAX_IFA] = ifa->ifa_addr;
 MG> 1406                            info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
 MG> 1407                            info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
 MG> 1408                            len = rt_msg2(RTM_NEWADDR, &info, NULL, w);
 MG> 1409                            if (w->w_req && w->w_tmem) {
 MG> 1410                                    struct ifa_msghdr *ifam;
 MG> (kgdb) fr 7  
 MG> #7  0xc096f85e in rt_msg2 (type=12, rtinfo=0xe8456b08, cp=0x0, w=0xe8456b38) at /usr/src/sys/net/rtsock.c:1022
 MG> 1022                    rtinfo->rti_addrs |= (1 << i);
 MG> (kgdb) p *rtinfo
 MG> $2 = {rti_addrs = 4, rti_info = {0x0, 0x0, 0xdeadc0de, 0x0, 0x0, 0xdeadc0de, 0x0, 0xdeadc0de}, rti_flags = 0, 
 MG>   rti_ifa = 0x0, rti_ifp = 0x0}

 MG> The third patch fixed this type of crashes. But the crashes were still possible:

 MG> panic: Bad link elm 0xc876ea00 prev->next != elm

 MG> #0  doadump () at pcpu.h:246
 MG> #1  0xc04ec829 in db_fncall (dummy1=-1064461270, dummy2=0, dummy3=-1, dummy4=0xe880d784 "\230в\200Х")
 MG>     at /usr/src/sys/ddb/db_command.c:548
 MG> #2  0xc04ecc5f in db_command (last_cmdp=0xc0e0ac9c, cmd_table=0x0, dopager=0)
 MG>     at /usr/src/sys/ddb/db_command.c:445
 MG> #3  0xc04ecd14 in db_command_script (command=0xc0e0bbc4 "call doadump") at /usr/src/sys/ddb/db_command.c:516
 MG> #4  0xc04f0e50 in db_script_exec (scriptname=0xe880d890 "kdb.enter.panic", warnifnotfound=Variable "warnifnotfound" is not available.
 MG> )
 MG>     at /usr/src/sys/ddb/db_script.c:302
 MG> #5  0xc04f0f37 in db_script_kdbenter (eventname=0xc0cc770a "panic") at /usr/src/sys/ddb/db_script.c:324
 MG> #6  0xc04eec18 in db_trap (type=3, code=0) at /usr/src/sys/ddb/db_main.c:228
 MG> #7  0xc08d9aa6 in kdb_trap (type=3, code=0, tf=0xe880d9cc) at /usr/src/sys/kern/subr_kdb.c:535
 MG> #8  0xc0befcfb in trap (frame=0xe880d9cc) at /usr/src/sys/i386/i386/trap.c:690
 MG> #9  0xc0bd141b in calltrap () at /usr/src/sys/i386/i386/exception.s:165
 MG> #10 0xc08d9c2a in kdb_enter (why=0xc0cc770a "panic", msg=0xc0cc770a "panic") at cpufunc.h:71
 MG> #11 0xc08a95b6 in panic (fmt=0xc0c61cc0 "Bad link elm %p prev->next != elm")
 MG>     at /usr/src/sys/kern/kern_shutdown.c:562
 MG> #12 0xc09b8efc in in_ifinit (ifp=0xc5b94c00, ia=0xc876ea00, sin=0xc185fcf6, scrub=0)
 MG>     at /usr/src/sys/netinet/in.c:844
 MG> #13 0xc09ba58b in in_control (so=0xc7b13ce0, cmd=2151704858, data=0xc7841bc0 "fxp0", ifp=0xc5b94c00, 
 MG>     td=0xc818db90) at /usr/src/sys/netinet/in.c:564
 MG> #14 0xc095d400 in ifioctl (so=0xc7b13ce0, cmd=2151704858, data=0xc7841bc0 "fxp0", td=0xc818db90)
 MG>     at /usr/src/sys/net/if.c:2516
 MG> #15 0xc08f69d5 in soo_ioctl (fp=0xc70a84d0, cmd=2151704858, data=0xc7841bc0, active_cred=0xc78dc400, 
 MG>     td=0xc818db90) at /usr/src/sys/kern/sys_socket.c:212
 MG> #16 0xc08f0a2d in kern_ioctl (td=0xc818db90, fd=3, com=2151704858, data=0xc7841bc0 "fxp0") at file.h:262
 MG> #17 0xc08f0bb4 in ioctl (td=0xc818db90, uap=0xe880dcf8) at /usr/src/sys/kern/sys_generic.c:678
 MG> #18 0xc0bef430 in syscall (frame=0xe880dd38) at /usr/src/sys/i386/i386/trap.c:1111
 MG> #19 0xc0bd14b0 in Xint0x80_syscall () at /usr/src/sys/i386/i386/exception.s:261
 MG> #20 0x00000033 in ?? ()
 MG> Previous frame inner to this frame (corrupt stack?)
 MG> (kgdb) fr 12
 MG> #12 0xc09b8efc in in_ifinit (ifp=0xc5b94c00, ia=0xc876ea00, sin=0xc185fcf6, scrub=0)
 MG>     at /usr/src/sys/netinet/in.c:844
 MG> 844                     LIST_REMOVE(ia, ia_hash);
 MG> (kgdb) list in_ifinit
 MG> 832      * and routing table entry.
 MG> 833      */
 MG> 834     static int
 MG> 835     in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
 MG> 836         int scrub)
 MG> 837     {
 MG> 838             register u_long i = ntohl(sin->sin_addr.s_addr);
 MG> 839             struct sockaddr_in oldaddr;
 MG> 840             int s = splimp(), flags = RTF_UP, error = 0;
 MG> 841     
 MG> (kgdb) 
 MG> 842             oldaddr = ia->ia_addr;
 MG> 843             if (oldaddr.sin_family == AF_INET)
 MG> 844                     LIST_REMOVE(ia, ia_hash);
 MG> 845             ia->ia_addr = *sin;
 MG> 846             if (ia->ia_addr.sin_family == AF_INET) {
 MG> 847                     IN_IFADDR_WLOCK();
 MG> 848                     LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr),
 MG> 849                         ia, ia_hash);
 MG> 850                     IN_IFADDR_WUNLOCK();
 MG> 851             }

 MG> Applying the fourth patch fixed this. But it is still possible to crash the
 MG> box:

 MG> #0  doadump () at pcpu.h:246
 MG> #1  0xc04ec829 in db_fncall (dummy1=1, dummy2=0, dummy3=-1056922624, dummy4=0xe847c890 "")
 MG>     at /usr/src/sys/ddb/db_command.c:548
 MG> #2  0xc04ecc21 in db_command (last_cmdp=0xc0e0ad1c, cmd_table=0x0, dopager=1)
 MG>     at /usr/src/sys/ddb/db_command.c:445
 MG> #3  0xc04ecd7a in db_command_loop () at /usr/src/sys/ddb/db_command.c:498
 MG> #4  0xc04eec1d in db_trap (type=12, code=0) at /usr/src/sys/ddb/db_main.c:229
 MG> #5  0xc08d9aa6 in kdb_trap (type=12, code=0, tf=0xe847ca7c) at /usr/src/sys/kern/subr_kdb.c:535
 MG> #6  0xc0beefbf in trap_fatal (frame=0xe847ca7c, eva=3735929146) at /usr/src/sys/i386/i386/trap.c:929
 MG> #7  0xc0bef8e0 in trap (frame=0xe847ca7c) at /usr/src/sys/i386/i386/trap.c:328
 MG> #8  0xc0bd147b in calltrap () at /usr/src/sys/i386/i386/exception.s:165
 MG> #9  0xc09b9c24 in in_control (so=0xc6e29670, cmd=2149607705, data=0xc6246ba0 "fxp0", ifp=0xc5b94c00, 
 MG>     td=0xc6a59940) at /usr/src/sys/netinet/in.c:331
 MG> #10 0xc095d400 in ifioctl (so=0xc6e29670, cmd=2149607705, data=0xc6246ba0 "fxp0", td=0xc6a59940)
 MG>     at /usr/src/sys/net/if.c:2516
 MG> #11 0xc08f69d5 in soo_ioctl (fp=0xc6374700, cmd=2149607705, data=0xc6246ba0, active_cred=0xc7131280, 
 MG>     td=0xc6a59940) at /usr/src/sys/kern/sys_socket.c:212
 MG> #12 0xc08f0a2d in kern_ioctl (td=0xc6a59940, fd=3, com=2149607705, data=0xc6246ba0 "fxp0") at file.h:262
 MG> #13 0xc08f0bb4 in ioctl (td=0xc6a59940, uap=0xe847ccf8) at /usr/src/sys/kern/sys_generic.c:678
 MG> #14 0xc0bef490 in syscall (frame=0xe847cd38) at /usr/src/sys/i386/i386/trap.c:1111
 MG> #15 0xc0bd1510 in Xint0x80_syscall () at /usr/src/sys/i386/i386/exception.s:261
 MG> #16 0x00000033 in ?? ()
 MG> Previous frame inner to this frame (corrupt stack?)
 MG> (kgdb) fr 9
 MG> #9  0xc09b9c24 in in_control (so=0xc6e29670, cmd=2149607705, data=0xc6246ba0 "fxp0", ifp=0xc5b94c00, 
 MG>     td=0xc6a59940) at /usr/src/sys/netinet/in.c:331
 MG> 331                     if (iap->ia_ifp == ifp &&
 MG> (kgdb) list
 MG> 326              * first one on the interface, if possible.
 MG> 327              */
 MG> 328             dst = ((struct sockaddr_in *)&ifr->ifr_addr)->sin_addr;
 MG> 329             IN_IFADDR_RLOCK();
 MG> 330             LIST_FOREACH(iap, INADDR_HASH(dst.s_addr), ia_hash) {
 MG> 331                     if (iap->ia_ifp == ifp &&
 MG> 332                         iap->ia_addr.sin_addr.s_addr == dst.s_addr) {
 MG> 333                             if (td == NULL || prison_check_ip4(td->td_ucred,
 MG> 334                                 &dst) == 0)
 MG> 335                                     ia = iap;
 MG> (kgdb) p iap
 MG> $1 = (struct in_ifaddr *) 0xdeadc0de

 MG> But I don't have the patch for this yet :-).

 MG> Also I have noticed that after running my tests long enough (but not so long
 MG> to crash the box) the error message starts to appear on every attempt to add
 MG> tested alias IP (although the alias is created):

 MG> ifconfig: ioctl (SIOCAIFADDR): File exists

 MG> This is because the route is not deleted on alias removal (some reference
 MG> leak?). After removing the route manually the error does not appear.

 MG> -- 
 MG> Mikolaj Golub

 MG> --- sys/netinet/in.c.orig        2010-04-16 15:15:07.000000000 +0300
 MG> +++ sys/netinet/in.c        2010-04-18 17:22:57.000000000 +0300
 MG> @@ -601,8 +601,17 @@ in_control(struct socket *so, u_long cmd
 MG>          }
 MG>  
 MG>          IF_ADDR_LOCK(ifp);
 MG> -        TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
 MG> +        TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 MG> +                if (&ia->ia_ifa == ifa) {
 MG> +                        TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
 MG> +                        break;
 MG> +                }
 MG> +        }
 MG>          IF_ADDR_UNLOCK(ifp);
 MG> +        if (ifa == NULL) {
 MG> +                error = EADDRNOTAVAIL;
 MG> +                goto out;
 MG> +        }
 MG>          ifa_free(&ia->ia_ifa);                                /* if_addrhead */
 MG>  
 MG>          IN_IFADDR_WLOCK();
 MG> --- sys/net/if_llatbl.c.orig        2010-04-18 22:38:58.000000000 +0300
 MG> +++ sys/net/if_llatbl.c        2010-04-18 22:39:13.000000000 +0300
 MG> @@ -209,14 +209,14 @@ lltable_prefix_free(int af, struct socka
 MG>  {
 MG>          struct lltable *llt;
 MG>  
 MG> -        LLTABLE_RLOCK();
 MG> +        LLTABLE_WLOCK();
 MG>          SLIST_FOREACH(llt, &V_lltables, llt_link) {
 MG>                  if (llt->llt_af != af)
 MG>                          continue;
 MG>  
 MG>                  llt->llt_prefix_free(llt, prefix, mask);
 MG>          }
 MG> -        LLTABLE_RUNLOCK();
 MG> +        LLTABLE_WUNLOCK();
 MG>  }
 MG>  
 MG>  
 MG> --- sys/net/rtsock.c.orig        2010-04-19 08:19:48.000000000 +0300
 MG> +++ sys/net/rtsock.c        2010-04-19 08:26:02.000000000 +0300
 MG> @@ -1380,6 +1380,7 @@ sysctl_iflist(int af, struct walkarg *w)
 MG>          TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 MG>                  if (w->w_arg && w->w_arg != ifp->if_index)
 MG>                          continue;
 MG> +                IF_ADDR_LOCK(ifp);
 MG>                  ifa = ifp->if_addr;
 MG>                  info.rti_info[RTAX_IFP] = ifa->ifa_addr;
 MG>                  len = rt_msg2(RTM_IFINFO, &info, NULL, w);
 MG> @@ -1419,10 +1420,13 @@ sysctl_iflist(int af, struct walkarg *w)
 MG>                                          goto done;
 MG>                          }
 MG>                  }
 MG> +                IF_ADDR_UNLOCK(ifp);
 MG>                  info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
 MG>                          info.rti_info[RTAX_BRD] = NULL;
 MG>          }
 MG>  done:
 MG> +        if (ifp)
 MG> +                IF_ADDR_UNLOCK(ifp);
 MG>          IFNET_RUNLOCK();
 MG>          return (error);
 MG>  }
 MG> --- sys/netinet/in.c.in_control        2010-04-18 21:00:37.000000000 +0300
 MG> +++ sys/netinet/in.c        2010-04-20 13:08:41.000000000 +0300
 MG> @@ -836,19 +836,25 @@ in_ifinit(struct ifnet *ifp, struct in_i
 MG>      int scrub)
 MG>  {
 MG>          register u_long i = ntohl(sin->sin_addr.s_addr);
 MG> +        register struct in_ifaddr *iap;
 MG>          struct sockaddr_in oldaddr;
 MG>          int s = splimp(), flags = RTF_UP, error = 0;
 MG>  
 MG>          oldaddr = ia->ia_addr;
 MG> -        if (oldaddr.sin_family == AF_INET)
 MG> -                LIST_REMOVE(ia, ia_hash);
 MG> +        IN_IFADDR_WLOCK();
 MG> +        if (oldaddr.sin_family == AF_INET) {
 MG> +                LIST_FOREACH(iap, INADDR_HASH(oldaddr.sin_addr.s_addr), ia_hash) {
 MG> +                        if (iap == ia) {
 MG> +                                LIST_REMOVE(ia, ia_hash);
 MG> +                                break;
 MG> +                        }
 MG> +                }
 MG> +        }
 MG>          ia->ia_addr = *sin;
 MG> -        if (ia->ia_addr.sin_family == AF_INET) {
 MG> -                IN_IFADDR_WLOCK();
 MG> +        if (ia->ia_addr.sin_family == AF_INET) 
 MG>                  LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr),
 MG>                      ia, ia_hash);
 MG> -                IN_IFADDR_WUNLOCK();
 MG> -        }
 MG> +        IN_IFADDR_WUNLOCK();
 MG>          /*
 MG>           * Give the interface a chance to initialize
 MG>           * if this is its first address,

-- 
Mikolaj Golub


More information about the freebsd-net mailing list