Reproducible filesystem deadlock on RELENG_5
Matt Reimer
mreimer at vpop.net
Mon Jan 10 15:47:34 PST 2005
On a UP machine (P4, 128M RAM) running RELENG_5 (as of Friday), I am seeing
what looks like a hang or deadlock on a filesystem with 10 snapshots. Our
problems began when we ran out of disk space, resulting in a series of these
log messages:
kernel: pid 39 (bufdaemon), uid 0 inumber 7277783 on /backup: filesystem full
kernel: initiate_write_filepage: already started
So I tried to delete a snapshot to free up some space, but then the kernel
began panicking. In my effort to workaround the panic, I disabled
softupdates. Then I came across the identical panic in a post by Kris
Kennaway
(http://lists.freebsd.org/pipermail/freebsd-current/2004-September/036946.html),
which he fixed by increasing KSTACK_PAGES. After increasing it to 31, the
kernel no longer panics, but instead filesystem access seems to deadlock: if
I try to even touch a file into existence on that partition, the touch
command hangs in state 'wdrain', and other attempts to access that filesystem
hang as well. This problem is 100% reproducible.
How to proceed? Serial console access is available if someone wants to tackle
it.
Matt
# sysctl vfs
vfs.nfs4.access_cache_timeout: 60
vfs.nfs4.nfsv3_commit_on_close: 0
vfs.ufs.dirhash_minsize: 2560
vfs.ufs.dirhash_maxmem: 2097152
vfs.ufs.dirhash_mem: 20750
vfs.ufs.dirhash_docheck: 0
vfs.devfs.noverflow: 32768
vfs.devfs.generation: 72
vfs.devfs.inodes: 72
vfs.devfs.topinode: 75
vfs.nfs.downdelayinitial: 12
vfs.nfs.downdelayinterval: 30
vfs.nfs.realign_test: 0
vfs.nfs.realign_count: 0
vfs.nfs.bufpackets: 4
vfs.nfs.reconnects: 0
vfs.nfs.iodmaxidle: 120
vfs.nfs.iodmin: 4
vfs.nfs.iodmax: 20
vfs.nfs.defect: 0
vfs.nfs.nfs_ip_paranoia: 1
vfs.nfs.diskless_valid: 0
vfs.nfs.diskless_rootpath:
vfs.nfs.access_cache_timeout: 60
vfs.nfs.nfsv3_commit_on_close: 0
vfs.pfs.vncache.entries: 0
vfs.pfs.vncache.maxentries: 0
vfs.pfs.vncache.hits: 0
vfs.pfs.vncache.misses: 0
vfs.vmiodirenable: 1
vfs.runningbufspace: 5294080
vfs.bufspace: 18628608
vfs.maxbufspace: 23756800
vfs.bufmallocspace: 57344
vfs.maxmallocbufspace: 1155072
vfs.lobufspace: 23035904
vfs.hibufspace: 23101440
vfs.bufreusecnt: 1137
vfs.buffreekvacnt: 0
vfs.bufdefragcnt: 0
vfs.lorunningspace: 524288
vfs.hirunningspace: 1048576
vfs.dirtybufferflushes: 0
vfs.altbufferflushes: 0
vfs.recursiveflushes: 1310
vfs.numdirtybuffers: 67
vfs.lodirtybuffers: 191
vfs.hidirtybuffers: 382
vfs.dirtybufthresh: 343
vfs.numfreebuffers: 764
vfs.lofreebuffers: 85
vfs.hifreebuffers: 170
vfs.getnewbufcalls: 1544
vfs.getnewbufrestarts: 0
vfs.flushwithdeps: 0
vfs.cache.numneg: 10
vfs.cache.numcache: 167
vfs.cache.numcalls: 5481
vfs.cache.dothits: 9
vfs.cache.dotdothits: 4
vfs.cache.numchecks: 4748
vfs.cache.nummiss: 786
vfs.cache.nummisszap: 0
vfs.cache.numposzaps: 0
vfs.cache.numposhits: 4330
vfs.cache.numnegzaps: 1
vfs.cache.numneghits: 351
vfs.cache.nchstats: 4330 351 1 0 786 0 8 90
vfs.cache.numcwdcalls: 3
vfs.cache.numcwdfail1: 0
vfs.cache.numcwdfail2: 0
vfs.cache.numcwdfail3: 0
vfs.cache.numcwdfail4: 0
vfs.cache.numcwdfound: 3
vfs.cache.numfullpathcalls: 0
vfs.cache.numfullpathfail1: 0
vfs.cache.numfullpathfail2: 0
vfs.cache.numfullpathfail3: 0
vfs.cache.numfullpathfail4: 0
vfs.cache.numfullpathfound: 0
vfs.write_behind: 1
vfs.read_max: 8
vfs.opv_numops: 64
vfs.usermount: 0
vfs.numvnodes: 352
vfs.wantfreevnodes: 25
vfs.freevnodes: 189
vfs.reassignbufcalls: 343
vfs.nameileafonly: 0
vfs.timestamp_precision: 0
vfs.worklist_len: 10
vfs.nfsrv.nfs_privport: 0
vfs.nfsrv.async: 0
vfs.nfsrv.commit_blks: 0
vfs.nfsrv.commit_miss: 0
vfs.nfsrv.realign_test: 0
vfs.nfsrv.realign_count: 0
vfs.nfsrv.gatherdelay: 10000
vfs.nfsrv.gatherdelay_v3: 0
vfs.ffs.doasyncfree: 1
vfs.ffs.doreallocblks: 1
db> ps
pid proc uarea uid ppid pgrp flag stat wmesg wchan
cmd
140 c0f57388 c84c4000 0 138 140 0004002 [SLPQ wdrain 0xc0661324]
[SLP] touch
138 c0f5754c c84c5000 0 137 138 0004002 [SLPQ wait 0xc0f5754c][SLP]
bash
137 c0f57e20 c874a000 1001 135 137 0004102 [SLPQ wait 0xc0f57e20][SLP]
su
135 c0f57710 c84c6000 1001 134 135 0004002 [SLPQ wait 0xc0f57710][SLP]
bash
134 c1010000 c8b98000 1001 131 131 0000100 [SLPQ select
0xc0660d24][SLP] sshd
131 c0f571c4 c84c3000 0 128 131 0000100 [SLPQ sbwait
0xc10acd40][SLP] sshd
128 c0f57a98 c8748000 0 1 128 0000100 [SLPQ select
0xc0660d24][SLP] sshd
106 c10101c4 c8b9a000 0 1 106 0004002 [SLPQ ttyin 0xc0fe9610][SLP]
sh
46 c0ecda98 c8326000 0 0 0 0000204 [SLPQ - 0xc7b66d18][SLP]
schedcpu
45 c0ecdc5c c8327000 0 0 0 0000204 [SLPQ - 0xc0663dcc][SLP]
nfsiod 3
44 c0ecde20 c8328000 0 0 0 0000204 [SLPQ - 0xc0663dc8][SLP]
nfsiod 2
43 c0f54000 c8379000 0 0 0 0000204 [SLPQ - 0xc0663dc4][SLP]
nfsiod 1
42 c0f541c4 c84ba000 0 0 0 0000204 [SLPQ - 0xc0663dc0][SLP]
nfsiod 0
41 c0f54388 c84bb000 0 0 0 0000204 [SLPQ syncer
0xc065d4cc][SLP] syncer
40 c0f5454c c84bc000 0 0 0 0000204 [SLPQ vlruwt
0xc0f5454c][SLP] vnlru
39 c0f54710 c84bd000 0 0 0 0000204 [SLPQ psleep
0xc06612ec][SLP] bufdaemon
38 c0f548d4 c84be000 0 0 0 000020c [SLPQ pgzero
0xc066a634][SLP] pagezero
37 c0f54a98 c84bf000 0 0 0 0000204 [SLPQ psleep
0xc066a688][SLP] vmdaemon
36 c0f54c5c c84c0000 0 0 0 0000204 [SLPQ psleep
0xc066a644][SLP] pagedaemon
35 c0f54e20 c84c1000 0 0 0 0000204 [IWAIT] swi0: sio
34 c0f57000 c84c2000 0 0 0 0000204 [SLPQ - 0xc0f4603c][SLP]
fdc0
9 c0ebd54c c81da000 0 0 0 0000204 [SLPQ actask
0xc06550ac][SLP] acpi_task2
8 c0ebd710 c81db000 0 0 0 0000204 [SLPQ actask
0xc06550ac][SLP] acpi_task1
7 c0ebd8d4 c81dc000 0 0 0 0000204 [SLPQ actask
0xc06550ac][SLP] acpi_task0
33 c0ebda98 c831d000 0 0 0 0000204 [IWAIT] swi6: acpitaskq
6 c0ebdc5c c831e000 0 0 0 0000204 [SLPQ - 0xc0ee3ac0][SLP]
kqueue taskq
32 c0ebde20 c831f000 0 0 0 0000204 [IWAIT] swi6:+
5 c0ecd000 c8320000 0 0 0 0000204 [SLPQ - 0xc0ee3b80][SLP]
thread taskq
31 c0ecd1c4 c8321000 0 0 0 0000204 [IWAIT] swi6:+
30 c0ecd388 c8322000 0 0 0 0000204 [IWAIT] swi6: task
queue
29 c0ecd54c c8323000 0 0 0 0000204 [SLPQ - 0xc0655320][SLP]
yarrow
4 c0ecd710 c8324000 0 0 0 0000204 [SLPQ - 0xc0657da8][SLP]
g_down
3 c0ecd8d4 c8325000 0 0 0 0000204 [SLPQ - 0xc0657da4][SLP]
g_up
2 c0ea21c4 c7aa5000 0 0 0 0000204 [SLPQ - 0xc0657d9c][SLP]
g_event
28 c0ea2388 c7aa6000 0 0 0 0000204 [IWAIT] swi1: net
27 c0ea254c c7be7000 0 0 0 0000204 [IWAIT] swi4: vm
26 c0ea2710 c7be8000 0 0 0 000020c [IWAIT] swi5: clock sio
25 c0ea28d4 c7be9000 0 0 0 0000204 [IWAIT] irq15: ata1
24 c0ea2a98 c7bea000 0 0 0 0000204 [IWAIT] irq14: ata0
23 c0ea2c5c c7beb000 0 0 0 0000204 [IWAIT] irq13:
22 c0ea2e20 c7bec000 0 0 0 0000204 [IWAIT] irq12:
21 c0ebd000 c81d7000 0 0 0 0000204 [IWAIT] irq11:
20 c0ebd1c4 c81d8000 0 0 0 0000204 [IWAIT] irq10:
19 c0ebd388 c81d9000 0 0 0 0000204 [IWAIT] irq9: em0 atapci1+
18 c0e9a000 c781b000 0 0 0 0000204 [IWAIT] irq8: rtc
17 c0e9a1c4 c7a9c000 0 0 0 0000204 [IWAIT] irq7:
16 c0e9a388 c7a9d000 0 0 0 0000204 [IWAIT] irq6: fdc0
15 c0e9a54c c7a9e000 0 0 0 0000204 [IWAIT] irq5:
14 c0e9a710 c7a9f000 0 0 0 0000204 [IWAIT] irq4: sio0
13 c0e9a8d4 c7aa0000 0 0 0 0000204 [IWAIT] irq3: sio1
12 c0e9aa98 c7aa1000 0 0 0 0000204 [IWAIT] irq1: atkbd0
11 c0e9ac5c c7aa2000 0 0 0 0000204 [IWAIT] irq0: clk
10 c0e9ae20 c7aa3000 0 0 0 000020c [CPU 0] idle
1 c0ea2000 c7aa4000 0 0 1 0004200 [SLPQ wait 0xc0ea2000][SLP]
init
0 c0657e40 c081f000 0 0 0 0000200 [SLPQ sched 0xc0657e40][SLP]
swapper
More information about the freebsd-current
mailing list