LOR/page fault panic vfs_mountroot
othermark
atkin901 at yahoo.com
Thu Apr 21 13:01:44 PDT 2005
More info on how this can be reproduced below
othermark wrote:
> Kris Kennaway wrote:
>
>> On Wed, Apr 20, 2005 at 01:04:08PM -0700, othermark wrote:
>>> Current as of a few minutes ago. LOR/panic. Dual processor box.
>>>
>>> kernel has vlan, ipfw, and dummynet enabled, but this doesn't
>>> look like the problem.
>>>
>>> Curiously, booting single user and mounting root there doesn't
>>> panic, but it does panic if you try to 'exit' to multiuser.
>>>
>>> [...]
>>> Timecounters tick every 1.000 msec
>>> ipfw2 (+ipv6) initialized, divert loadable, rule-based forwarding
>>> disabled, default to accept, logging disabled
>>> ad0: 19092MB <WDC WD200EB-32CSF0 04.01B04> at ata0-master UDMA33
>>> acd0: CDROM <TOSHIBA CD-ROM XM-6702B/1007> at ata1-master UDMA33
>>> ATA PseudoRAID loaded
>>> SMP: AP CPU #1 Launched!
>>> Trying to mount root from ufs:/dev/ad0s1a
>>> lock order reversal
>>> 1st 0xc0a2d740 vm page queue mutex (vm page queue mutex)
>>> @ /usr/src/sys/kern/vfs_bio.c:1485
>>> 2nd 0xc25e4d6c vnode interlock (vnode interlock)
>>> @ /usr/src/sys/kern/vfs_subr.c:1992
>>
>> This has been reported a half-dozen times or so.
>>
>>> Fatal trap 12: page fault while in kernel mode
>>> cpuid = 0; apic id = 01
>>> fault virtual address = 0x4ac0c092
>>> fault code = supervisor read, page not present
>>> instruction pointer = 0x20:0xc0703f88
>>> stack pointer = 0x28:0xe5092b78
>>> frame pointer = 0x28:0xe5092b78
>>> code segment = base 0x0, limit 0xfffff, type 0x1b
>>> = DPL 0, pres 1, def32 1, gran 1
>>> processor eflags = interrupt enabled, resume, IOPL = 0
>>> current process = 73 (sysctl)
>>> [thread pid 73 tid 100060 ]
>>> Stopped at strlen+0x8: cmpb $0,0(%edx)
>>> db> show alllocks
>>> Process 73 (sysctl) thread 0xc23b2600 (100060)
>>> exclusive sx sysctl lock r = 0 (0xc09d1c60) locked
>>> @ /usr/src/sys/kern/kern_sysctl.c:1335
>>> exclusive sleep mutex Giant r = 0 (0xc09d1620) locked
>>> @ /usr/src/sys/kern/kern_sysctl.c:1273
>>
>> I think this one might be new. Please obtain a debugging traceback.
>
> Not too familiar with ddb, at least not enough to know which
> address/offset to expand to see which oid is causing the failure.
>
> db> where
> Tracing pid 73 tid 100060 td 0xc23b2600
> strlen(4ac0c092,c091efb7,1,c09d1228,0) at strlen+0x8
> sysctl_sysctl_name(c096d3a0,e5092c74,3,e5092bfc,e5092bfc) at
> sysctl_sysctl_name+0x10f
> sysctl_root(0,e5092c6c,5,e5092bfc,c23b2600) at sysctl_root+0x154
> userland_sysctl(c23b2600,e5092c6c,5,bfbfdc70,bfbfdbfc) at
> userland_sysctl+0x13c
> __sysctl(c23b2600,e5092d04,18,3ff,6) at __sysctl+0xb7
> syscall(bfbf003b,bfbf003b,bfbf003b,bfbfdbfc,bfbfdc00) at syscall+0x2a0
> Xint0x80_syscall() at Xint0x80_syscall+0x1f
> --- syscall (202, FreeBSD ELF32, __sysctl), eip = 0x280be67f, esp =
> 0xbfbfdb7c,
> ebp = 0xbfbfdba8 ---
>
>
> (gdb) l *syscall+0x2a0
> 0xc088cc50 is in syscall (/usr/src/sys/i386/i386/trap.c:951).
> 946
> 947 STOPEVENT(p, S_SCE, narg);
> 948
> 949 PTRACESTOP_SC(p, td, S_PT_SCE);
> 950
> 951 error = (*callp->sy_call)(td, args);
> 952 }
> 953
> 954 switch (error) {
> 955 case 0:
>
>
> (gdb) l *__sysctl+0xb7
> 0xc0695487 is in __sysctl (/usr/src/sys/kern/kern_sysctl.c:1275).
> 1270 if (error)
> 1271 return (error);
> 1272
> 1273 mtx_lock(&Giant);
> 1274
> 1275 error = userland_sysctl(td, name, uap->namelen,
> 1276 uap->old, uap->oldlenp, 0,
> 1277 uap->new, uap->newlen, &j, 0);
> 1278 if (error && error != ENOMEM)
> 1279 goto done2;
>
> (gdb) l *userland_sysctl+0x13c
> 0xc069563c is in userland_sysctl (/usr/src/sys/kern/kern_sysctl.c:1340).
> 1335 SYSCTL_LOCK();
> 1336
> 1337 do {
> 1338 req.oldidx = 0;
> 1339 req.newidx = 0;
> 1340 error = sysctl_root(0, name, namelen, &req);
> 1341 } while (error == EAGAIN);
> 1342
> 1343 if (req.lock == REQ_WIRED && req.validlen > 0)
> 1344 vsunlock(req.oldptr, req.validlen);
>
> (gdb) l *sysctl_root+0x154
> 0xc06953b4 is in sysctl_root (/usr/src/sys/kern/kern_sysctl.c:1241).
> 1236 error = mac_check_system_sysctl(req->td->td_ucred, oid,
> arg1, ar
> g2,
> 1237 req);
> 1238 if (error != 0)
> 1239 return (error);
> 1240 #endif
> 1241 error = oid->oid_handler(oid, arg1, arg2, req);
> 1242
> 1243 return (error);
> 1244 }
> 1245
>
> (gdb) l *sysctl_sysctl_name+0x10f
> 0xc069448f is in sysctl_sysctl_name (/usr/src/sys/kern/kern_sysctl.c:555).
> 550 continue;
> 551
> 552 if (req->oldidx)
> 553 error = SYSCTL_OUT(req, ".", 1);
> 554 if (!error)
> 555 error = SYSCTL_OUT(req,
> oid->oid_name,
> 556 strlen(oid->oid_name));
> 557 if (error)
> 558 return (error);
> 559
>
>
> (gdb) l *strlen+0x8
> 0xc0703f88 is in strlen (/usr/src/sys/libkern/strlen.c:41).
> 36 strlen(str)
> 37 const char *str;
> 38 {
> 39 register const char *s;
> 40
> 41 for (s = str; *s; ++s);
> 42 return(s - str);
> 43 }
>
>
>
/etc/rc.d/preseedrandom does the following:
( ps -fauxww; sysctl -a; date; df -ib; dmesg; ps -fauxww; ) \
| dd of=/dev/random bs=8k 2>/dev/null
In this kernel, if I boot to single user, and simply do 'sysctl -a'
I'll get this panic. Here's the output -- corruption seems to start at
hw.kbd.keymap_restrict_change:
[...]
hw.intr_storm_threshold: 500
hw.availpages: 259958
hw.bus.devctl_disable: 0
hw.dc_quick: 1
hw.ste.rxsyncs: 0
hw.kbd.keymap_restrict_change:
0
hw.syscons.sa
Fer.keybonly: 1
hw.syscons.bellat: 1
hw.syscons.alsc_no_suspend_vt switch: 0
hw.butrsdma.total_bpageaps: 544
hw.busdm a.zone0.total_bp1ages: 512
hw.bu2sdma.zone0.free_:bpages: 512
aw. busdma.zone0.resperved_bpages: 0
hw.busdma.zone0g.active_bpages: e0
hw.busdma.zon e0.total_bouncedf: 0
hw.busdma.zaone0.total_deferured: 0
hw.busdmla.zone0.lowaddr:t 0xffffffff
hw. busdma.zone0.aliwgnment: 4096
hwh.busdma.zone0.boiundary: 0
hw.bulsdma.zone1.totale_bpages: 32
hw. in kernel mode
cpuid = 1; apic id = 00
fault virtual address = 0x4ac0c092
fault code = supervisor read, page not present
instruction pointer = 0x20:0xc0703f88
stack pointer = 0x28:0xe50a1b78
frame pointer = 0x28:0xe50a1b78
code segment = base 0x0, limit 0xfffff, type 0x1b
= DPL 0, pres 1, def32 1, gran 1
processor eflags = interrupt enabled, resume, IOPL = 0
current process = 73 (sysctl)
[thread pid 73 tid 100055 ]
Stopped at strlen+0x8: cmpb $0,0(%edx)
--
othermark
atkin901 at nospam dot yahoo dot com
(!wired)?(coffee++):(wired);
More information about the freebsd-current
mailing list