kmem leakage on tun/tap device removal

Mikolaj Golub to.my.trociny at gmail.com
Sun Feb 28 11:31:12 UTC 2010


On Sat, 27 Feb 2010 17:16:26 +0200 Kostik Belousov wrote:

> Please test the patch below.

Tested. It works as expected: running in loop

ifconfig tun0 create
ifconfig tun0 destroy

(and in parallel the same for tun1) no kmem leakage is observed:

 Time          Type InUse MemUse HighUse Requests  Size(s)
20:20        DEVFS1   104    26K       -    12481  256
20:25        DEVFS1   104    26K       -   110695  256
20:30        DEVFS1   103    26K       -   228557  256
20:35        DEVFS1   102    26K       -   345257  256
20:40        DEVFS1   113    29K       -   460939  256
20:45        DEVFS1   102    26K       -   575485  256
20:50        DEVFS1   109    28K       -   687967  256
20:55        DEVFS1   103    26K       -   800969  256
21:00        DEVFS1   102    26K       -   909753  256
21:05        DEVFS1   101    26K       -  1022029  256
21:10        DEVFS1   103    26K       -  1132231  256
21:15        DEVFS1   105    27K       -  1239151  256
21:20        DEVFS1   106    27K       -  1349211  256

But I have faced with another issue (not related to your patch, as it is
observed with unpatched kernel too). When I try to run concurrently two
create/destroy scripts with the same interface the system panics:

Bad link elm <ifp address> next->prev != elm

Here are details about panic:

Unread portion of the kernel message buffer:
panic: Bad link elm 0xc5f1a800 next->prev != elm
cpuid = 2
KDB: enter: panic
exclusive sleep mutex if_clone lock (if_clone lock) r = 0 (0xc0da1cf0) locked @ /usr/src/sys/net/if_clone.c:248
exclusive sleep mutex if_clone lock (if_clone lock) r = 0 (0xc0da1cf0) locked @ /usr/src/sys/net/if_clone.c:248
exclusive sx so_rcv_sx (so_rcv_sx) r = 0 (0xc6cd3560) locked @ /usr/src/sys/kern/uipc_sockbuf.c:148
exclusive sx so_rcv_sx (so_rcv_sx) r = 0 (0xc6b4dbd0) locked @ /usr/src/sys/kern/uipc_sockbuf.c:148
Physical memory: 2019 MB
Dumping 160 MB: 145 129 113 97 81 65 49 33 17 1

#0  doadump () at pcpu.h:246
246             __asm __volatile("movl %%fs:0,%0" : "=r" (td));
(kgdb) bt
#0  doadump () at pcpu.h:246
#1  0xc04e8bb9 in db_fncall (dummy1=-1064515926, dummy2=0, dummy3=-1, dummy4=0xe83f4834 "HH?Х")
    at /usr/src/sys/ddb/db_command.c:548
#2  0xc04e8fef in db_command (last_cmdp=0xc0de14dc, cmd_table=0x0, dopager=0)
    at /usr/src/sys/ddb/db_command.c:445
#3  0xc04e90a4 in db_command_script (command=0xc0de2404 "call doadump")
    at /usr/src/sys/ddb/db_command.c:516
#4  0xc04ed1d0 in db_script_exec (scriptname=0xe83f4940 "kdb.enter.panic", warnifnotfound=Variable "warnifnotfound" is not available.
)
    at /usr/src/sys/ddb/db_script.c:302
#5  0xc04ed2b7 in db_script_kdbenter (eventname=0xc0ca1948 "panic") at /usr/src/sys/ddb/db_script.c:324
#6  0xc04eaf98 in db_trap (type=3, code=0) at /usr/src/sys/ddb/db_main.c:228
#7  0xc08cc526 in kdb_trap (type=3, code=0, tf=0xe83f4a7c) at /usr/src/sys/kern/subr_kdb.c:535
#8  0xc0bdd38b in trap (frame=0xe83f4a7c) at /usr/src/sys/i386/i386/trap.c:690
#9  0xc0bbef1b in calltrap () at /usr/src/sys/i386/i386/exception.s:165
#10 0xc08cc6aa in kdb_enter (why=0xc0ca1948 "panic", msg=0xc0ca1948 "panic") at cpufunc.h:71
#11 0xc089d716 in panic (fmt=0xc0c3c80c "Bad link elm %p next->prev != elm")
    at /usr/src/sys/kern/kern_shutdown.c:562
#12 0xc094e7fb in if_clone_destroyif (ifc=0xc0da1cc0, ifp=0xc5f1a800) at /usr/src/sys/net/if_clone.c:249
#13 0xc094eb52 in if_clone_destroy (name=0xc664ac20 "tun0") at /usr/src/sys/net/if_clone.c:227
#14 0xc094c8a6 in ifioctl (so=0xc6e0a9a8, cmd=2149607801, data=0xc664ac20 "tun0", td=0xc66c0d80)
    at /usr/src/sys/net/if.c:2412
#15 0xc08e8b25 in soo_ioctl (fp=0xc6d46af0, cmd=2149607801, data=0xc664ac20, active_cred=0xc5f62280, 
    td=0xc66c0d80) at /usr/src/sys/kern/sys_socket.c:212
#16 0xc08e31bd in kern_ioctl (td=0xc66c0d80, fd=3, com=2149607801, data=0xc664ac20 "tun0") at file.h:262
#17 0xc08e3344 in ioctl (td=0xc66c0d80, uap=0xe83f4cf8) at /usr/src/sys/kern/sys_generic.c:678
#18 0xc0bdca33 in syscall (frame=0xe83f4d38) at /usr/src/sys/i386/i386/trap.c:1078
#19 0xc0bbefb0 in Xint0x80_syscall () at /usr/src/sys/i386/i386/exception.s:261
#20 0x00000033 in ?? ()
Previous frame inner to this frame (corrupt stack?)
(kgdb) fr 12
#12 0xc094e7fb in if_clone_destroyif (ifc=0xc0da1cc0, ifp=0xc5f1a800) at /usr/src/sys/net/if_clone.c:249
249             IFC_IFLIST_REMOVE(ifc, ifp);
(kgdb) list
244              * switch to the vnet context of the target vnet.
245              */
246             CURVNET_SET_QUIET(ifp->if_vnet);
247
248             IF_CLONE_LOCK(ifc);
249             IFC_IFLIST_REMOVE(ifc, ifp);
250             IF_CLONE_UNLOCK(ifc);
251
252             if_delgroup(ifp, ifc->ifc_name);
253

The concurrent ifconfig destroy thread:

#0  sched_switch (td=0xc6c51900, newtd=0xc597b6c0, flags=1547) at /usr/src/sys/kern/sched_ule.c:1864
#1  0xc08a65a0 in mi_switch (flags=1547, newtd=0x0) at /usr/src/sys/kern/kern_synch.c:449
#2  0xc08bf165 in sched_preempt (td=0xc6c51900) at /usr/src/sys/kern/sched_ule.c:2078
#3  0xc0bce954 in ipi_bitmap_handler (frame=
      {tf_fs = 8, tf_es = -1060437976, tf_ds = 40, tf_edi = -956607168, tf_esi = -956607232, tf_ebp = -397423980, tf_isp = -397424072, tf_ebx = -959068160, tf_edx = -974018560, tf_ecx = 1, tf_eax = -960161535, tf_trapno = 0, tf_err = 0, tf_eip = -1064007759, tf_cs = 32, tf_eflags = 658, tf_esp = -397423980, tf_ss = -1063994479}) at /usr/src/sys/i386/i386/mp_machdep.c:1266
#4  0xc0bbf63e in Xipi_intr_bitmap_handler () at apic_vector.s:285
#5  0xc09487b1 in if_purgemaddrs (ifp=0xc5f1a800) at /usr/src/sys/net/if.c:780
#6  0xc09545b9 in tun_destroy (tp=0xc6fb5500) at /usr/src/sys/net/if_tun.c:259
#7  0xc09548e8 in tun_clone_destroy (ifp=0xc5f1a800) at /usr/src/sys/net/if_tun.c:277
#8  0xc094e207 in ifc_simple_destroy (ifc=0xc0da1cc0, ifp=0xc5f1a800) at /usr/src/sys/net/if_clone.c:595
#9  0xc094e871 in if_clone_destroyif (ifc=0xc0da1cc0, ifp=0xc5f1a800) at /usr/src/sys/net/if_clone.c:254
#10 0xc094eb52 in if_clone_destroy (name=0xc6f855a0 "tun0") at /usr/src/sys/net/if_clone.c:227
#11 0xc094c8a6 in ifioctl (so=0xc6e04670, cmd=2149607801, data=0xc6f855a0 "tun0", td=0xc6c51900)
    at /usr/src/sys/net/if.c:2412
#12 0xc08e8b25 in soo_ioctl (fp=0xc6d3b0a8, cmd=2149607801, data=0xc6f855a0, active_cred=0xc6cce800, 
    td=0xc6c51900) at /usr/src/sys/kern/sys_socket.c:212
#13 0xc08e31bd in kern_ioctl (td=0xc6c51900, fd=3, com=2149607801, data=0xc6f855a0 "tun0") at file.h:262
#14 0xc08e3344 in ioctl (td=0xc6c51900, uap=0xe84fccf8) at /usr/src/sys/kern/sys_generic.c:678
#15 0xc0bdca33 in syscall (frame=0xe84fcd38) at /usr/src/sys/i386/i386/trap.c:1078
#16 0xc0bbefb0 in Xint0x80_syscall () at /usr/src/sys/i386/i386/exception.s:261
#17 0x00000033 in ?? ()
Previous frame inner to this frame (corrupt stack?)
(kgdb) fr 9
#9  0xc094e871 in if_clone_destroyif (ifc=0xc0da1cc0, ifp=0xc5f1a800) at /usr/src/sys/net/if_clone.c:254
254             err =  (*ifc->ifc_destroy)(ifc, ifp);
(kgdb) list
249             IFC_IFLIST_REMOVE(ifc, ifp);
250             IF_CLONE_UNLOCK(ifc);
251
252             if_delgroup(ifp, ifc->ifc_name);
253
254             err =  (*ifc->ifc_destroy)(ifc, ifp);
255
256             if (err != 0) {
257                     if_addgroup(ifp, ifc->ifc_name);
258

So, it is a race but for me some things look a bit strange (may be because my
knowledge in this field is rather casual).

We have:

#define IFC_IFLIST_REMOVE(_ifc, _ifp)                                   \
        LIST_REMOVE(_ifp, if_clones)

and the message "Bad link elm 0xc5f1a800 next->prev != elm" should mean that 

*(0xc5f1a800)->if_clones.le_prev != (0xc5f1a800)

But actually it is not observed:

(kgdb) p ifp
$1 = (struct ifnet *) 0xc5f1a800
(kgdb) p *(ifp)->if_clones.le_prev
$2 = (struct ifnet *) 0xc5f1c800

Also it looks like IFC_IFLIST_REMOVE() was properly locked. At the first
glance the code below IF_CLONE_UNLOCK() does not work with if_clones list.

And doesn't this look strange that we have two messages about the same lock?

exclusive sleep mutex if_clone lock (if_clone lock) r = 0 (0xc0da1cf0) locked @ /usr/src/sys/net/if_clone.c:248
exclusive sleep mutex if_clone lock (if_clone lock) r = 0 (0xc0da1cf0) locked @ /usr/src/sys/net/if_clone.c:248

My kernel config is:

include         GENERIC

ident           DTRACE

options         INVARIANTS
options         INVARIANT_SUPPORT
options         WITNESS
options         DEBUG_LOCKS
options         DEBUG_VFS_LOCKS
options         DIAGNOSTIC
options         KDB
options         DDB

options         KDTRACE_HOOKS
options         DDB_CTF

I am running i386 8.0-STABLE (but rather old, from Dec 1, I can run tests on
newer sources if this makes difference).

Hardware:

CPU: Intel(R) Core(TM)2 Quad  CPU   Q8200  @ 2.33GHz (2335.63-MHz 686-class CPU)
  Origin = "GenuineIntel"  Id = 0x10677  Stepping = 7
  Features=0xbfebfbff<FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CLFLUSH,DTS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE>
  Features2=0x8e39d<SSE3,DTES64,MON,DS_CPL,EST,TM2,SSSE3,CX16,xTPR,PDCM,SSE4.1>
  AMD Features=0x20100000<NX,LM>
  AMD Features2=0x1<LAHF>
  TSC: P-state invariant
real memory  = 2147483648 (2048 MB)
avail memory = 2074243072 (1978 MB)
ACPI APIC Table: <INTEL DG31GL>
FreeBSD/SMP: Multiprocessor System Detected: 4 CPUs
FreeBSD/SMP: 1 package(s) x 4 core(s)
 cpu0 (BSP): APIC ID:  0
 cpu1 (AP): APIC ID:  1
 cpu2 (AP): APIC ID:  2
 cpu3 (AP): APIC ID:  3

I tried running the test on GENERIC (without debugging options) and got "page
fault" panic.

Unread portion of the kernel message buffer:
bpfdetach: tun0 was not attached

Fatal trap 12: page fault while in kernel mode
cpuid = 0; apic id = 00
fault virtual address   = 0x1a0
fault code              = supervisor read, page not present
instruction pointer     = 0x20:0xc087233f
stack pointer           = 0x28:0xe8495a2c
frame pointer           = 0x28:0xe8495a44
code segment            = base 0x0, limit 0xfffff, type 0x1b
                        = DPL 0, pres 1, def32 1, gran 1
processor eflags        = interrupt enabled, resume, IOPL = 0
current process         = 53523 (ifconfig)
trap number             = 12
panic: page fault
cpuid = 0
Uptime: 1m15s
Physical memory: 2019 MB
Dumping 109 MB: 94 78 62 46 30 14


  137 Thread 100216 (PID=53523: ifconfig)  doadump () at pcpu.h:246

#0  doadump () at pcpu.h:246
#1  0xc0881c97 in boot (howto=260) at /usr/src/sys/kern/kern_shutdown.c:416
#2  0xc0881f89 in panic (fmt=Variable "fmt" is not available.
) at /usr/src/sys/kern/kern_shutdown.c:579
#3  0xc0bb39ec in trap_fatal (frame=0xe84959ec, eva=416) at /usr/src/sys/i386/i386/trap.c:938
#4  0xc0bb3c70 in trap_pfault (frame=0xe84959ec, usermode=0, eva=416) at /usr/src/sys/i386/i386/trap.c:851
#5  0xc0bb4675 in trap (frame=0xe84959ec) at /usr/src/sys/i386/i386/trap.c:533
#6  0xc0b96e0b in calltrap () at /usr/src/sys/i386/i386/exception.s:165
#7  0xc087233f in _mtx_lock_sleep (m=0xc5c4b22c, tid=3330476288, opts=0, file=0x0, line=0)
    at /usr/src/sys/kern/kern_mutex.c:369
#8  0xc0926471 in if_detach (ifp=0xc5c4b000) at /usr/src/sys/net/if.c:1188
#9  0xc0930879 in tun_destroy (tp=0xc6860d80) at /usr/src/sys/net/if_tun.c:259
#10 0xc0931927 in tun_clone_destroy (ifp=0xc5c4b000) at /usr/src/sys/net/if_tun.c:277
#11 0xc092a407 in ifc_simple_destroy (ifc=0xc0d496e0, ifp=0xc5c4b000) at /usr/src/sys/net/if_clone.c:595
#12 0xc092a62c in if_clone_destroyif (ifc=0xc0d496e0, ifp=0xc5c4b000) at /usr/src/sys/net/if_clone.c:254
#13 0xc092a9e2 in if_clone_destroy (name=0xc5f201e0 "tun0") at /usr/src/sys/net/if_clone.c:227
#14 0xc0928a26 in ifioctl (so=0xc6806000, cmd=2149607801, data=0xc5f201e0 "tun0", td=0xc6830900)
    at /usr/src/sys/net/if.c:2412
#15 0xc08c4c32 in soo_ioctl (fp=0xc6743968, cmd=2149607801, data=0xc5f201e0, active_cred=0xc66fa680, 
    td=0xc6830900) at /usr/src/sys/kern/sys_socket.c:212
#16 0xc08bdb00 in kern_ioctl (td=0xc6830900, fd=3, com=2149607801, data=0xc5f201e0 "tun0") at file.h:262
#17 0xc08bdc74 in ioctl (td=0xc6830900, uap=0xe8495cf8) at /usr/src/sys/kern/sys_generic.c:678
#18 0xc0bb3fb5 in syscall (frame=0xe8495d38) at /usr/src/sys/i386/i386/trap.c:1078
#19 0xc0b96e70 in Xint0x80_syscall () at /usr/src/sys/i386/i386/exception.s:261
#20 0x00000033 in ?? ()
Previous frame inner to this frame (corrupt stack?)

  138 Thread 100211 (PID=53526: ifconfig)  sched_switch (td=0xc6832480, newtd=0xc5951b40, flags=259)
    at /usr/src/sys/kern/sched_ule.c:1864

#0  sched_switch (td=0xc6832480, newtd=0xc5951b40, flags=259) at /usr/src/sys/kern/sched_ule.c:1864
#1  0xc088a15a in mi_switch (flags=259, newtd=0x0) at /usr/src/sys/kern/kern_synch.c:449
#2  0xc08bc02b in turnstile_wait (ts=0xc5ec1700, owner=0xc6830900, queue=Variable "queue" is not available.
)
    at /usr/src/sys/kern/subr_turnstile.c:745
#3  0xc088073f in _rw_rlock (rw=0xc0db6024, file=0x0, line=0) at /usr/src/sys/kern/kern_rwlock.c:460
#4  0xc0924867 in ifunit_ref (name=0xc685e200 "tun0") at /usr/src/sys/net/if.c:2017
#5  0xc0928d10 in ifioctl (so=0xc6820338, cmd=3223349536, data=0xc685e200 "tun0", td=0xc6832480)
    at /usr/src/sys/net/if.c:2420
#6  0xc08c4c32 in soo_ioctl (fp=0xc6808c78, cmd=3223349536, data=0xc685e200, active_cred=0xc6802500, 
    td=0xc6832480) at /usr/src/sys/kern/sys_socket.c:212
#7  0xc08bdb00 in kern_ioctl (td=0xc6832480, fd=3, com=3223349536, data=0xc685e200 "tun0") at file.h:262
#8  0xc08bdc74 in ioctl (td=0xc6832480, uap=0xe846ccf8) at /usr/src/sys/kern/sys_generic.c:678
#9  0xc0bb3fb5 in syscall (frame=0xe846cd38) at /usr/src/sys/i386/i386/trap.c:1078
#10 0xc0b96e70 in Xint0x80_syscall () at /usr/src/sys/i386/i386/exception.s:261
#11 0x00000033 in ?? ()
Previous frame inner to this frame (corrupt stack?)

-- 
Mikolaj Golub


More information about the freebsd-net mailing list