i386/92795: lockmgr panic during sys_exit file cleanup
Steve Watt
steve at Watt.COM
Fri Feb 3 23:20:28 PST 2006
>Number: 92795
>Category: i386
>Synopsis: lockmgr panic during sys_exit file cleanup
>Confidential: no
>Severity: critical
>Priority: medium
>Responsible: freebsd-i386
>State: open
>Quarter:
>Keywords:
>Date-Required:
>Class: sw-bug
>Submitter-Id: current-users
>Arrival-Date: Sat Feb 04 07:20:06 GMT 2006
>Closed-Date:
>Last-Modified:
>Originator: Steve Watt
>Release: FreeBSD 5.4-STABLE i386
>Organization:
Watt Consultants
>Environment:
System: FreeBSD wattres.Watt.COM 5.4-STABLE FreeBSD 5.4-STABLE #9: Wed Jan 11 23:14:23 PST 2006 root at wattres.Watt.COM:/usr/obj/usr/src/sys/WATTRES i386
Last updated to 5-STABLE around 1717Z 11 Jan 06
Machine is running inn and dump -L when the panic occurs.
>Description:
Something ugly is going on again/still between dump and (possibly) inn; there is/was
a deadlock between ffs_mksnap and msync. See my mail to -hackers around 22 May 05.
I suspect this is related somehow because it's my daily dump that's causing
the problem. (Two days in a row, this time, just to be annoying.)
Here's the kgdb session from this morning's failure:
(root at wattres) 12# kgdb kernel.debug /var/crash/vmcore.21
[GDB will not be able to debug user-mode threads: /usr/lib/libthread_db.so: Undefined symbol "ps_pglobal_lookup"]
GNU gdb 6.1.1 [FreeBSD]
Copyright 2004 Free Software Foundation, Inc.
GDB is free software, covered by the GNU General Public License, and you are
welcome to change it and/or distribute copies of it under certain conditions.
Type "show copying" to see the conditions.
There is absolutely no warranty for GDB. Type "show warranty" for details.
This GDB was configured as "i386-marcel-freebsd".
Unread portion of the kernel message buffer:
panic: lockmgr: thread 0xc1a14780, not exclusive lock holder 0xc1561600 unlocking
KDB: enter: panic
Uptime: 21h40m31s
Dumping 223 MB
16 32 48 64 80 96 112 128 144 160 176 192 208
#0 doadump () at pcpu.h:160
160 __asm __volatile("movl %%fs:0,%0" : "=r" (td));
(kgdb) info stack
#0 doadump () at pcpu.h:160
#1 0xc06627af in boot (howto=260) at /usr/src/sys/kern/kern_shutdown.c:412
#2 0xc0662a71 in panic (fmt=0xc08a5cd6 "lockmgr: thread %p, not %s %p unlocking")
at /usr/src/sys/kern/kern_shutdown.c:568
#3 0xc06570af in lockmgr (lkp=0xc205d0ac, flags=6, interlkp=0x0, td=0xc1a14780)
at /usr/src/sys/kern/kern_lock.c:420
#4 0xc06b2cee in vop_stdunlock (ap=0x0) at /usr/src/sys/kern/vfs_default.c:295
#5 0xc06b2b6e in vop_defaultop (ap=0x0) at /usr/src/sys/kern/vfs_default.c:157
#6 0xc07c6234 in ufs_vnoperate (ap=0x0) at /usr/src/sys/ufs/ufs/ufs_vnops.c:2827
#7 0xc07be44b in ufs_inactive (ap=0x0) at vnode_if.h:1044
#8 0xc07c6234 in ufs_vnoperate (ap=0x0) at /usr/src/sys/ufs/ufs/ufs_vnops.c:2827
#9 0xc06bbaa4 in vrele (vp=0xc205d000) at vnode_if.h:953
#10 0xc07c2407 in ufs_close (ap=0x0) at /usr/src/sys/ufs/ufs/ufs_vnops.c:317
#11 0xc07c6234 in ufs_vnoperate (ap=0x0) at /usr/src/sys/ufs/ufs/ufs_vnops.c:2827
#12 0xc06c6ca5 in vn_close (vp=0xc205d000, flags=0, file_cred=0x0, td=0x0)
at vnode_if.h:262
#13 0xc06c7c76 in vn_closefile (fp=0xc2111154, td=0x0)
at /usr/src/sys/kern/vfs_vnops.c:953
#14 0xc06440b8 in fdrop_locked (fp=0xc2111154, td=0x0) at file.h:288
#15 0xc064401c in fdrop (fp=0xc2111154, td=0x0) at /usr/src/sys/kern/kern_descrip.c:2105
#16 0xc0642a0b in closef (fp=0xc2111154, td=0xc1a14780)
at /usr/src/sys/kern/kern_descrip.c:1922
#17 0xc0641aea in fdfree (td=0xc1a14780) at /usr/src/sys/kern/kern_descrip.c:1622
#18 0xc064a87e in exit1 (td=0xc1a14780, rv=0) at /usr/src/sys/kern/kern_exit.c:237
#19 0xc064a401 in sys_exit (td=0x0, uap=0x0) at /usr/src/sys/kern/kern_exit.c:94
#20 0xc08306c4 in syscall (frame=
{tf_fs = 47, tf_es = 135921711, tf_ds = -1078001617, tf_edi = 33134, tf_esi = 134528110, tf_ebp = -1077943064, tf_isp = -806167196, tf_ebx = 672471660, tf_edx = 672471344, tf_ecx = 672471344, tf_eax = 1, tf_trapno = 12, tf_err = 2, tf_eip = 671936515, tf_cs = 31, tf_eflags = 646, tf_esp = -1077943076, tf_ss = 47}) at /usr/src/sys/i386/i386/trap.c:1014
#21 0xc081ed4f in Xint0x80_syscall () at /usr/src/sys/i386/i386/exception.s:201
---Type <return> to continue, or q <return> to quit---q
Quit
(kgdb) up
#1 0xc06627af in boot (howto=260) at /usr/src/sys/kern/kern_shutdown.c:412
412 doadump();
(kgdb) up
#2 0xc0662a71 in panic (fmt=0xc08a5cd6 "lockmgr: thread %p, not %s %p unlocking")
at /usr/src/sys/kern/kern_shutdown.c:568
568 boot(bootopt);
(kgdb) up
#3 0xc06570af in lockmgr (lkp=0xc205d0ac, flags=6, interlkp=0x0, td=0xc1a14780)
at /usr/src/sys/kern/kern_lock.c:420
420 panic("lockmgr: thread %p, not %s %p unlocking",
(kgdb) list
415
416 case LK_RELEASE:
417 if (lkp->lk_exclusivecount != 0) {
418 if (lkp->lk_lockholder != thr &&
419 lkp->lk_lockholder != LK_KERNPROC) {
420 panic("lockmgr: thread %p, not %s %p unlocking",
421 thr, "exclusive lock holder",
422 lkp->lk_lockholder);
423 }
424 if (lkp->lk_exclusivecount == 1) {
(kgdb) print *thr
$1 = {td_proc = 0xc168f54c, td_ksegrp = 0xc1b21a10, td_plist = {tqe_next = 0x0,
tqe_prev = 0xc168f55c}, td_kglist = {tqe_next = 0x0, tqe_prev = 0xc1b21a1c},
td_slpq = {tqe_next = 0x0, tqe_prev = 0xc176c780}, td_lockq = {tqe_next = 0x0,
tqe_prev = 0x0}, td_runq = {tqe_next = 0x0, tqe_prev = 0xc090342c}, td_selq = {
tqh_first = 0x0, tqh_last = 0xc1a147b0}, td_sleepqueue = 0xc176c780,
td_turnstile = 0xc18f1b40, td_tid = 100135, td_flags = 16842754, td_inhibitors = 0,
td_pflags = 0, td_dupfd = 0, td_wchan = 0x0, td_wmesg = 0x0, td_lastcpu = 0 '\0',
td_oncpu = 0 '\0', td_locks = -10048, td_blocked = 0x0, td_ithd = 0x0,
td_lockname = 0x0, td_contested = {lh_first = 0x0}, td_sleeplocks = 0x0,
td_intr_nesting_level = 0, td_pinned = 0, td_mailbox = 0x0, td_ucred = 0xc1ed2100,
td_standin = 0x0, td_prticks = 0, td_upcall = 0x0, td_sticks = 1692, td_uuticks = 0,
td_usticks = 0, td_intrval = 0, td_oldsigmask = {__bits = {0, 0, 0, 0}}, td_sigmask = {
__bits = {0, 0, 0, 0}}, td_siglist = {__bits = {0, 0, 0, 0}}, td_umtx = {
tqe_next = 0x0, tqe_prev = 0x0}, td_generation = 24058, td_sigstk = {ss_sp = 0x0,
ss_size = 0, ss_flags = 4}, td_kflags = 0, td_xsig = 0, td_profil_addr = 0,
td_profil_ticks = 0, td_base_pri = 77 'M', td_priority = 77 'M', td_pcb = 0xcff2dd90,
td_state = TDS_RUNNING, td_retval = {0, 672471344}, td_slpcallout = {c_links = {sle = {
sle_next = 0x0}, tqe = {tqe_next = 0x0, tqe_prev = 0xc5acfb18}},
c_time = 7706191, c_arg = 0xc1a14780, c_func = 0xc068169a <sleepq_timeout>,
c_flags = 8}, td_frame = 0xcff2dd38, td_kstack_obj = 0xc1a0bc60,
td_kstack = 3488792576, td_kstack_pages = 2, td_altkstack_obj = 0x0, td_altkstack = 0,
td_altkstack_pages = 0, td_critnest = 0, td_md = {md_savecrit = 582},
td_sched = 0xc1a148d0}
(kgdb) print *thr->td_proc
$2 = {p_list = {le_next = 0xc1e69000, le_prev = 0xc1e6ac5c}, p_ksegrps = {
tqh_first = 0xc1b21a10, tqh_last = 0xc1b21a14}, p_threads = {tqh_first = 0xc1a14780,
tqh_last = 0xc1a14788}, p_suspended = {tqh_first = 0x0, tqh_last = 0xc168f564},
p_ucred = 0xc1ed2100, p_fd = 0xc23e2500, p_fdtol = 0x0, p_stats = 0xc1f40200,
p_limit = 0xc1c81300, p_unused1 = 0x0, p_sigacts = 0xc23d0000, p_flag = 24576,
p_sflag = 1, p_state = PRS_NORMAL, p_pid = 33134, p_hash = {le_next = 0x0,
le_prev = 0xc14691b8}, p_pglist = {le_next = 0x0, le_prev = 0xc25513dc},
p_pptr = 0xc2551388, p_sibling = {le_next = 0x0, le_prev = 0xc25513f0}, p_children = {
lh_first = 0x0}, p_mtx = {mtx_object = {lo_class = 0xc09012dc,
lo_name = 0xc08a6b18 "process lock", lo_type = 0xc08a6b18 "process lock",
lo_flags = 4390912, lo_list = {tqe_next = 0xc168f77c, tqe_prev = 0xc168f404},
lo_witness = 0x0}, mtx_lock = 4, mtx_recurse = 0}, p_oppid = 0,
p_vmspace = 0xc254ea8c, p_swtime = 704, p_realtimer = {it_interval = {tv_sec = 0,
tv_usec = 0}, it_value = {tv_sec = 0, tv_usec = 0}}, p_runtime = {sec = 2,
frac = 1951776024854486118}, p_uu = 0, p_su = 0, p_iu = 0, p_uticks = 67,
p_sticks = 1742, p_iticks = 0, p_profthreads = 0, p_maxthrwaits = 0, p_traceflag = 0,
p_tracevp = 0x0, p_tracecred = 0x0, p_textvp = 0xc1aa5630, p_siglist = {__bits = {0,
0, 0, 0}}, p_lock = 0 '\0', p_sigiolst = {slh_first = 0x0}, p_sigparent = 20,
p_sig = 0, p_code = 0, p_stops = 0, p_stype = 0, p_step = 0 '\0', p_pfsflags = 0 '\0',
p_nlminfo = 0x0, p_aioinfo = 0x0, p_singlethread = 0x0, p_suspcount = 0,
p_xthread = 0x0, p_boundary_count = 0, p_magic = 3203398350,
p_comm = "dump\000er", '\0' <repeats 12 times>, p_pgrp = 0xc1834d00,
p_sysent = 0xc09422e0, p_args = 0xc1ad7c00, p_cpulimit = 9223372036854775807,
p_nice = 0 '\0', p_xstat = 0, p_klist = {kl_lock = 0xc168f5b8, kl_list = {
slh_first = 0x0}}, p_numthreads = 1, p_numksegrps = 1, p_md = {
md_ldt = 0xc246ba20}, p_itcallout = {c_links = {sle = {sle_next = 0x0}, tqe = {
tqe_next = 0x0, tqe_prev = 0x0}}, c_time = 0, c_arg = 0x0, c_func = 0,
c_flags = 8}, p_unused2 = 0x0, p_acflag = 0, p_ru = 0xc1d47980, p_peers = 0x0,
p_leader = 0xc168f54c, p_emuldata = 0x0, p_label = 0x0, p_sched = 0xc168f710}
... so it seems to be dump that died on exit.
(kgdb) up
#4 0xc06b2cee in vop_stdunlock (ap=0x0) at /usr/src/sys/kern/vfs_default.c:295
295 return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE, VI_MTX(vp),
(kgdb) list
290 struct thread *a_td;
291 } */ *ap;
292 {
293 struct vnode *vp = ap->a_vp;
294
295 return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE, VI_MTX(vp),
296 ap->a_td));
297 }
298
299 /* See above. */
(kgdb) list -
280 ap->a_td, "vop_stdlock", vp->filename, vp->line));
281 #endif
282 }
283
284 /* See above. */
285 int
286 vop_stdunlock(ap)
287 struct vop_unlock_args /* {
288 struct vnode *a_vp;
289 int a_flags;
(kgdb) up
#5 0xc06b2b6e in vop_defaultop (ap=0x0) at /usr/src/sys/kern/vfs_default.c:157
157 return (VOCALL(default_vnodeop_p, ap->a_desc->vdesc_offset, ap));
(kgdb) list
152 */
153 int
154 vop_defaultop(struct vop_generic_args *ap)
155 {
156
157 return (VOCALL(default_vnodeop_p, ap->a_desc->vdesc_offset, ap));
158 }
159
160 /*
161 * Helper function to panic on some bad VOPs in some filesystems.
(kgdb) up
#6 0xc07c6234 in ufs_vnoperate (ap=0x0) at /usr/src/sys/ufs/ufs/ufs_vnops.c:2827
2827 return (VOCALL(ufs_vnodeop_p, ap->a_desc->vdesc_offset, ap));
(kgdb) list
2822 ufs_vnoperate(ap)
2823 struct vop_generic_args /* {
2824 struct vnodeop_desc *a_desc;
2825 } */ *ap;
2826 {
2827 return (VOCALL(ufs_vnodeop_p, ap->a_desc->vdesc_offset, ap));
2828 }
2829
2830 int
2831 ufs_vnoperatefifo(ap)
(kgdb) up
#7 0xc07be44b in ufs_inactive (ap=0x0) at vnode_if.h:1044
1044 rc = VCALL(vp, VOFFSET(vop_unlock), &a);
(kgdb) list
1039 a.a_flags = flags;
1040 a.a_td = td;
1041 #ifdef DEBUG_VFS_LOCKS
1042 vop_unlock_pre(&a);
1043 #endif
1044 rc = VCALL(vp, VOFFSET(vop_unlock), &a);
1045 CTR3(KTR_VOP, "VOP_UNLOCK(vp 0x%lX, flags %ld, td 0x%lX)", vp, flags, td);
1046 if (rc == 0) {
1047 } else {
1048 }
(kgdb) list -
1029 extern struct vnodeop_desc vop_unlock_desc;
1030 static __inline int VOP_UNLOCK(
1031 struct vnode *vp,
1032 int flags,
1033 struct thread *td)
1034 {
1035 struct vop_unlock_args a;
1036 int rc;
1037 a.a_desc = VDESC(vop_unlock);
1038 a.a_vp = vp;
(kgdb) up
#8 0xc07c6234 in ufs_vnoperate (ap=0x0) at /usr/src/sys/ufs/ufs/ufs_vnops.c:2827
2827 return (VOCALL(ufs_vnodeop_p, ap->a_desc->vdesc_offset, ap));
(kgdb) list
2822 ufs_vnoperate(ap)
2823 struct vop_generic_args /* {
2824 struct vnodeop_desc *a_desc;
2825 } */ *ap;
2826 {
2827 return (VOCALL(ufs_vnodeop_p, ap->a_desc->vdesc_offset, ap));
2828 }
2829
2830 int
2831 ufs_vnoperatefifo(ap)
(kgdb) up
#9 0xc06bbaa4 in vrele (vp=0xc205d000) at vnode_if.h:953
953 rc = VCALL(vp, VOFFSET(vop_inactive), &a);
(kgdb) list
948 a.a_desc = VDESC(vop_inactive);
949 a.a_vp = vp;
950 a.a_td = td;
951 ASSERT_VI_UNLOCKED(vp, "VOP_INACTIVE");
952 ASSERT_VOP_LOCKED(vp, "VOP_INACTIVE");
953 rc = VCALL(vp, VOFFSET(vop_inactive), &a);
954 CTR2(KTR_VOP, "VOP_INACTIVE(vp 0x%lX, td 0x%lX)", vp, td);
955 if (rc == 0) {
956 ASSERT_VI_UNLOCKED(vp, "VOP_INACTIVE");
957 ASSERT_VOP_UNLOCKED(vp, "VOP_INACTIVE");
... OK, that's weird, there's an ASSERT_VOP_LOCKED() here. Does that mean
that the assert_VOP_LOCKED doesn't check for exclusive?
(kgdb) up
#10 0xc07c2407 in ufs_close (ap=0x0) at /usr/src/sys/ufs/ufs/ufs_vnops.c:317
317 vrele(vp);
(kgdb) list
312 * XXX - EAGAIN is returned to prevent vn_close from
313 * repeating the vrele operation.
314 */
315 if (vp->v_type == VREG && VTOI(vp)->i_effnlink == 0) {
316 (void) vn_start_write(vp, &mp, V_WAIT);
317 vrele(vp);
318 vn_finished_write(mp);
319 return (EAGAIN);
320 }
321 }
(kgdb) up
#11 0xc07c6234 in ufs_vnoperate (ap=0x0) at /usr/src/sys/ufs/ufs/ufs_vnops.c:2827
2827 return (VOCALL(ufs_vnodeop_p, ap->a_desc->vdesc_offset, ap));
(kgdb) up
#12 0xc06c6ca5 in vn_close (vp=0xc205d000, flags=0, file_cred=0x0, td=0x0)
at vnode_if.h:262
262 rc = VCALL(vp, VOFFSET(vop_close), &a);
(kgdb) up
#13 0xc06c7c76 in vn_closefile (fp=0xc2111154, td=0x0)
at /usr/src/sys/kern/vfs_vnops.c:953
953 error = vn_close(vp, fp->f_flag, fp->f_cred, td);
(kgdb) up
#14 0xc06440b8 in fdrop_locked (fp=0xc2111154, td=0x0) at file.h:288
288 return ((*fp->f_ops->fo_close)(fp, td));
(kgdb) list
283 fo_close(fp, td)
284 struct file *fp;
285 struct thread *td;
286 {
287
288 return ((*fp->f_ops->fo_close)(fp, td));
289 }
290
291 static __inline int
292 fo_kqfilter(fp, kn)
(kgdb) up
#15 0xc064401c in fdrop (fp=0xc2111154, td=0x0) at /usr/src/sys/kern/kern_descrip.c:2105
2105 return (fdrop_locked(fp, td));
(kgdb) up
#16 0xc0642a0b in closef (fp=0xc2111154, td=0xc1a14780)
at /usr/src/sys/kern/kern_descrip.c:1922
1922 return (fdrop(fp, td));
(kgdb) up
#17 0xc0641aea in fdfree (td=0xc1a14780) at /usr/src/sys/kern/kern_descrip.c:1622
1622 (void) closef(*fpp, td);
(kgdb) up
#18 0xc064a87e in exit1 (td=0xc1a14780, rv=0) at /usr/src/sys/kern/kern_exit.c:237
237 fdfree(td);
(kgdb) up
#19 0xc064a401 in sys_exit (td=0x0, uap=0x0) at /usr/src/sys/kern/kern_exit.c:94
94 exit1(td, W_EXITCODE(uap->rval, 0));
>How-To-Repeat:
There doesn't appear to be an iron-clad method; I believe it's a race
condition of some sort.
On my system, my daily dumps cause some kind of panic about once every 2-3 weeks.
>Fix:
Maybe I should pull up to 6-STABLE, but that's kinda painful as it's a semi-production box.
>Release-Note:
>Audit-Trail:
>Unformatted:
More information about the freebsd-i386
mailing list