4.10-RELEASE and -STABLE crashing regularly under load

Tamas TEVESZ ice at wormhole.hu
Tue Jun 15 20:46:38 GMT 2004


hi folks,

[i've posted the following message to -bugs@ a while ago, but
then was directed here by a friend freebsder. while reposting, i
also corrected some minor facts i missed in the previous post]

i have a dell poweredge 2600 (4G ram, 2x2.8ghz xeon cpus, some disk,
full dmesg below), running a heavily loaded website (apache13, php, cgi,
pure-ftpd). this is a brand new 4.10-release install, brought to sync
with -stable, both exhibit the exact same problem), which every once
in a while crashes badly. 4.10-R did that every ~2.5 days, 4.10-S did
it for the first time after one day. (before that, system was running
4.9-stable on a poweredge 4600 with one xeon cpu, no ht, no smp,
no nothing like that, and was very stable).

i cannot entirely rule out bad hardware as this is a brand new system,
but we haven't had many problems with dell stuff before.

everything i think to be related is included below; if anything else
is needed just please tell so.

thanks in advance.


misc related information:
==================================

# sysctl machdep.hlt_logical_cpus
machdep.hlt_logical_cpus: 1
# kldstat
Id Refs Address    Size     Name
 1    2 0xc0100000 1e5214   kernel
 2    1 0xc02e6000 21c4     accf_http.ko
#


information from gdb:
==================================

# gdb -k kernel.debug.1 vmcore.1
GNU gdb 4.18 (FreeBSD)
Copyright 1998 Free Software Foundation, Inc.
GDB is free software, covered by the GNU General Public License, and you are
welcome to change it and/or distribute copies of it under certain conditions.
Type "show copying" to see the conditions.
There is absolutely no warranty for GDB.  Type "show warranty" for details.
This GDB was configured as "i386-unknown-freebsd"...Deprecated bfd_read called at /usr/src/gnu/usr.bin/binutils/gdb/../../../../contrib/gdb/gdb/dbxread.c line 2627 in elfstab_build_psymtabs
Deprecated bfd_read called at /usr/src/gnu/usr.bin/binutils/gdb/../../../../contrib/gdb/gdb/dbxread.c line 933 in fill_symbuf

SMP 4 cpus
IdlePTD at physical address 0x00309000
initial pcb at physical address 0x0027b3c0
panicstr: page fault
panic messages:
---
Fatal trap 12: page fault while in kernel mode
mp_lock = 02000002; cpuid = 2; lapic.id = 06000000
fault virtual address   = 0xbfc00000
fault code    = supervisor write, page not present
instruction pointer     = 0x8:0xc0213fd9
stack pointer         = 0x10:0xfc749e04
frame pointer         = 0x10:0xfc749e10
code segment   = base 0x0, limit 0xfffff, type 0x1b
      = DPL 0, pres 1, def32 1, gran 1
processor eflags        = interrupt enabled, resume, IOPL = 0
current process         = 73408 (grep)
interrupt mask  = none <- SMP: XXX
trap number    = 12
panic: page fault
mp_lock = 02000002; cpuid = 2; lapic.id = 06000000
boot() called on cpu#2

syncing disks... 45 7
done
Uptime: 1d0h14m54s
amr0: flushing cache...done
amr1: flushing cache...done

dumping to dev #amrd/0x20001, offset 1048960
dump 3583 3582 3581 3580 3579 3578 3577 3576 3575 3574 3573 3572 3571 [...]
#0  dumpsys () at /usr/src/sys/kern/kern_shutdown.c:487
487   if (dumping++) {
(kgdb) bt
#0  dumpsys () at /usr/src/sys/kern/kern_shutdown.c:487
#1  0xc01664df in boot (howto=256) at /usr/src/sys/kern/kern_shutdown.c:316
#2  0xc0166938 in poweroff_wait (junk=0xc024ff79, howto=-1071318481) at /usr/src/sys/kern/kern_shutdown.c:595
#3  0xc0217e98 in trap_fatal (frame=0xfc749dc4, eva=3217031168) at /usr/src/sys/i386/i386/trap.c:974
#4  0xc0217b29 in trap_pfault (frame=0xfc749dc4, usermode=0, eva=3217031168) at /usr/src/sys/i386/i386/trap.c:867
#5  0xc02176c7 in trap (frame={tf_fs = 24, tf_es = -68485104, tf_ds = 134610960, tf_edi = -99396280, tf_esi = 0, tf_ebp = -59466224,
      tf_isp = -59466256, tf_ebx = 3, tf_edx = -1043777528, tf_ecx = 0, tf_eax = 1245573123, tf_trapno = 12, tf_err = 2, tf_eip = -1071562791,
      tf_cs = 8, tf_eflags = 66054, tf_esp = 134660096, tf_ss = 134660096}) at /usr/src/sys/i386/i386/trap.c:466
#6  0xc0213fd9 in pmap_qenter (va=0, m=0xfa135548, count=4) at /usr/src/sys/i386/i386/pmap.c:848
#7  0xc017711a in pipe_build_write_buffer (wpipe=0xfa135520, uio=0xfc749ed0) at /usr/src/sys/kern/sys_pipe.c:594
#8  0xc01772e0 in pipe_direct_write (wpipe=0xfa135520, uio=0xfc749ed0) at /usr/src/sys/kern/sys_pipe.c:709
#9  0xc0177682 in pipe_write (fp=0xce43cec0, uio=0xfc749ed0, cred=0xcd0c6080, flags=0, p=0xfbeb3ee0) at /usr/src/sys/kern/sys_pipe.c:827
#10 0xc0175a05 in dofilewrite (p=0xfbeb3ee0, fp=0xce43cec0, fd=1, buf=0x8068000, nbyte=16384, offset=-1, flags=0)
    at /usr/src/sys/sys/file.h:163
#11 0xc01758be in write (p=0xfbeb3ee0, uap=0xfc749f80) at /usr/src/sys/kern/sys_generic.c:329
#12 0xc02181c9 in syscall2 (frame={tf_fs = 47, tf_es = 47, tf_ds = 47, tf_edi = 134643712, tf_esi = 672187864, tf_ebp = -1077937456,
      tf_isp = -59465772, tf_ebx = 672188332, tf_edx = 672187864, tf_ecx = 0, tf_eax = 4, tf_trapno = 12, tf_err = 2, tf_eip = 672141560,
      tf_cs = 31, tf_eflags = 663, tf_esp = -1077937500, tf_ss = 47}) at /usr/src/sys/i386/i386/trap.c:1175
#13 0xc02056fb in Xint0x80_syscall ()
#14 0x280fedd2 in ?? ()
#15 0x280fed41 in ?? ()
#16 0x280fbc26 in ?? ()
#17 0x280a50d5 in ?? ()
#18 0x804ec04 in ?? ()
#19 0x804edc6 in ?? ()
#20 0x804eec5 in ?? ()
#21 0x804f0c7 in ?? ()
#22 0x804f3a4 in ?? ()
#23 0x80500f3 in ?? ()
#24 0x8049046 in ?? ()
(kgdb) list *0xc0213fd9
0xc0213fd9 is in pmap_qenter (/usr/src/sys/i386/i386/pmap.c:848).
843  void
844  pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
845  {
846   while (count-- > 0) {
847     pt_entry_t *pte = vtopte(va);
848     *pte = VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag;
849  #ifdef SMP
850     cpu_invlpg((void *)va);
851  #else
852     invltlb_1pg(va);
(kgdb)

dmesg:
==================================

Copyright (c) 1992-2004 The FreeBSD Project.
Copyright (c) 1979, 1980, 1983, 1986, 1988, 1989, 1991, 1992, 1993, 1994
	The Regents of the University of California. All rights reserved.
FreeBSD 4.10-STABLE #3: Mon Jun 14 12:39:29 CEST 2004
    root at mammut.swi.hu:/usr/obj/usr/src/sys/MAMMUT
Timecounter "i8254"  frequency 1193182 Hz
CPU: Intel(R) Xeon(TM) CPU 2.80GHz (2791.01-MHz 686-class CPU)
  Origin = "GenuineIntel"  Id = 0xf29  Stepping = 9
  Features=0xbfebfbff<FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CLFLUSH,DTS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE>
  Hyperthreading: 2 logical CPUs
real memory  = 3757899776 (3669824K bytes)
avail memory = 3660173312 (3574388K bytes)
Changing APIC ID for IO APIC #0 from 0 to 8 on chip
Changing APIC ID for IO APIC #1 from 0 to 9 on chip
Changing APIC ID for IO APIC #2 from 0 to 10 on chip
Changing APIC ID for IO APIC #3 from 0 to 11 on chip
Changing APIC ID for IO APIC #4 from 0 to 12 on chip
Programming 24 pins in IOAPIC #0
IOAPIC #0 intpin 2 -> irq 0
Programming 24 pins in IOAPIC #1
Programming 24 pins in IOAPIC #2
Programming 24 pins in IOAPIC #3
Programming 24 pins in IOAPIC #4
FreeBSD/SMP: Multiprocessor motherboard: 4 CPUs
 cpu0 (BSP): apic id:  0, version: 0x00050014, at 0xfee00000
 cpu1 (AP):  apic id:  1, version: 0x00050014, at 0xfee00000
 cpu2 (AP):  apic id:  6, version: 0x00050014, at 0xfee00000
 cpu3 (AP):  apic id:  7, version: 0x00050014, at 0xfee00000
 io0 (APIC): apic id:  8, version: 0x00178020, at 0xfec00000
 io1 (APIC): apic id:  9, version: 0x00178020, at 0xfec80000
 io2 (APIC): apic id: 10, version: 0x00178020, at 0xfec81000
 io3 (APIC): apic id: 11, version: 0x00178020, at 0xfec82000
 io4 (APIC): apic id: 12, version: 0x00178020, at 0xfec82800
Preloaded elf kernel "kernel" at 0xc02ea000.
Preloaded elf module "accf_http.ko" at 0xc02ea09c.
Warning: Pentium 4 CPU: PSE disabled
module_register_init: MOD_LOAD (accf_http, c0180dc0, 0xc02e7a60) error 17
Pentium Pro MTRR support enabled
md0: Malloc disk
Using $PIR table, 12 entries at 0xc00fc160
npx0: <math processor> on motherboard
npx0: INT 16 interface
pcib0: <Host to PCI bridge> on motherboard
IOAPIC #0 intpin 16 -> irq 2
pci0: <PCI bus> on pcib0
pcib1: <PCI to PCI bridge (vendor=8086 device=2543)> at device 2.0 on pci0
pci1: <PCI bus> on pcib1
pci1: <unknown card> (vendor=0x8086, dev=0x1461) at 28.0
pcib2: <PCI to PCI bridge (vendor=8086 device=1460)> at device 29.0 on pci1
pci2: <PCI bus> on pcib2
pci1: <unknown card> (vendor=0x8086, dev=0x1461) at 30.0
pcib3: <PCI to PCI bridge (vendor=8086 device=1460)> at device 31.0 on pci1
IOAPIC #1 intpin 4 -> irq 5
pci3: <PCI bus> on pcib3
em0: <Intel(R) PRO/1000 Network Connection, Version - 1.7.25> port 0xece0-0xecff mem 0xfdcc0000-0xfdcdffff,0xfdce0000-0xfdcfffff irq 5 at device 1.0 on pci3
em0:  Speed:N/A  Duplex:N/A
pcib4: <PCI to PCI bridge (vendor=8086 device=2545)> at device 3.0 on pci0
pci4: <PCI bus> on pcib4
pci4: <unknown card> (vendor=0x8086, dev=0x1461) at 28.0
pcib5: <PCI to PCI bridge (vendor=8086 device=1460)> at device 29.0 on pci4
pci5: <PCI bus> on pcib5
pci4: <unknown card> (vendor=0x8086, dev=0x1461) at 30.0
pcib6: <PCI to PCI bridge (vendor=8086 device=1460)> at device 31.0 on pci4
pci6: <PCI bus> on pcib6
pcib7: <PCI to PCI bridge (vendor=8086 device=2547)> at device 4.0 on pci0
pci7: <PCI bus> on pcib7
pci7: <unknown card> (vendor=0x8086, dev=0x1461) at 28.0
pcib8: <PCI to PCI bridge (vendor=8086 device=1460)> at device 29.0 on pci7
IOAPIC #3 intpin 0 -> irq 7
pci8: <PCI bus> on pcib8
amr0: <LSILogic MegaRAID> mem 0xf7ff0000-0xf7ffffff irq 7 at device 8.0 on pci8
amr0: <LSILogic PERC 4/Di> Firmware 2.48, BIOS 1.06, 128MB RAM
pci7: <unknown card> (vendor=0x8086, dev=0x1461) at 30.0
pcib9: <PCI to PCI bridge (vendor=8086 device=1460)> at device 31.0 on pci7
pci10: <PCI bus> on pcib9
pcib10: <PCI to PCI bridge (vendor=8086 device=b154)> at device 6.0 on pci10
IOAPIC #4 intpin 1 -> irq 11
pci11: <PCI bus> on pcib10
pcib11: <PCI to PCI bridge (vendor=8086 device=b154)> at device 0.0 on pci11
IOAPIC #4 intpin 0 -> irq 13
pci12: <PCI bus> on pcib11
amr1: <LSILogic MegaRAID> mem 0xe8000000-0xefffffff irq 13 at device 0.0 on pci12
amr1: <LSILogic PERC 3/DC> Firmware 1.80, BIOS 3.29, 128MB RAM
pci11: <unknown card> (vendor=0x1077, dev=0x1216) at 1.0 irq 11
pci0: <UHCI USB controller> at 29.0 irq 2
pcib12: <Intel 82801BA/BAM (ICH2) Hub to PCI bridge> at device 30.0 on pci0
pci13: <PCI bus> on pcib12
pci13: <ATI Mach64-GR graphics accelerator> at 4.0
isab0: <PCI to ISA bridge (vendor=8086 device=2480)> at device 31.0 on pci0
isa0: <ISA bus> on isab0
atapci0: <Intel ICH3 ATA100 controller> port 0xfc00-0xfc0f,0-0x3,0-0x7,0-0x3,0-0x7 irq 2 at device 31.1 on pci0
ata0: at 0x1f0 irq 14 on atapci0
ata1: at 0x170 irq 15 on atapci0
orm0: <Option ROMs> at iomem 0xc0000-0xc7fff,0xec000-0xeffff on isa0
pmtimer0 on isa0
fdc0: <NEC 72065B or clone> at port 0x3f0-0x3f5,0x3f7 irq 6 drq 2 on isa0
fdc0: FIFO enabled, 8 bytes threshold
fd0: <1440-KB 3.5" drive> on fdc0 drive 0
atkbdc0: <Keyboard controller (i8042)> at port 0x60,0x64 on isa0
atkbd0: <AT Keyboard> flags 0x1 irq 1 on atkbdc0
kbd0 at atkbd0
vga0: <Generic ISA VGA> at port 0x3c0-0x3df iomem 0xa0000-0xbffff on isa0
sc0: <System console> at flags 0x100 on isa0
sc0: VGA <16 virtual consoles, flags=0x300>
sio0: configured irq 4 not in bitmap of probed irqs 0
sio0 at port 0x3f8-0x3ff irq 4 flags 0x10 on isa0
sio0: type 8250
sio1: configured irq 3 not in bitmap of probed irqs 0
APIC_IO: Testing 8254 interrupt delivery
APIC_IO: routing 8254 via IOAPIC #0 intpin 2
IP packet filtering initialized, divert disabled, rule-based forwarding enabled, default to accept, logging disabled
IP Filter: v3.4.31 initialized.  Default = pass all, Logging = enabled
ata0-slave: ATAPI identify retries exceeded
SMP: AP CPU #1 Launched!
SMP: AP CPU #2 Launched!
acd0: CDROM <TEAC CD-ROM CD-224E> at ata0-master PIO4
amrd0: <LSILogic MegaRAID logical drive> on amr0
amrd0: 34680MB (71024640 sectors) RAID 1 (optimal)
amrd1: <LSILogic MegaRAID logical drive> on amr0
amrd1: 139900MB (286515200 sectors) RAID 0 (optimal)
amrd2: <LSILogic MegaRAID logical drive> on amr1
amrd2: 450814MB (923267072 sectors) RAID 5 (optimal)
amrd3: <LSILogic MegaRAID logical drive> on amr1
amrd3: 450814MB (923267072 sectors) RAID 5 (optimal)
SMP: AP CPU #3 Launched!
pass0 at amr0 bus 0 target 6 lun 0
pass0: <PE/PV 1x6 SCSI BP 1.1> Fixed Processor SCSI-2 device
pass1 at amr1 bus 0 target 6 lun 0
pass1: <DELL PV22XS E.14> Fixed Processor SCSI-3 device
pass2 at amr1 bus 1 target 6 lun 0
pass2: <DELL PV22XS E.14> Fixed Processor SCSI-3 device
Mounting root from ufs:/dev/amrd0s1a
WARNING: / was not properly dismounted

config:
==================================

machine		i386
cpu		I686_CPU
ident		mammut
maxusers	0

options         NMBCLUSTERS=32768
options		PMAP_SHPGPERPROC=256
options		ACCEPT_FILTER_HTTP

makeoptions	DEBUG=-g

options 	INET			#InterNETworking
options 	FFS			#Berkeley Fast Filesystem
options 	FFS_ROOT		#FFS usable as root device [keep this!]
options 	SOFTUPDATES		#Enable FFS soft updates support
options 	UFS_DIRHASH		#Improve performance on big directories
options 	MFS			#Memory Filesystem
options 	MD_ROOT			#MD is a potential root device
options 	CD9660			#ISO 9660 Filesystem
options 	CD9660_ROOT		#CD-ROM usable as root, CD9660 required
options 	PROCFS			#Process filesystem
options 	COMPAT_43		#Compatible with BSD 4.3 [KEEP THIS!]
options 	SCSI_DELAY=15000	#Delay (in ms) before probing SCSI
options 	UCONSOLE		#Allow users to grab the console
options 	USERCONFIG		#boot -c editor
options 	VISUAL_USERCONFIG	#visual boot -c editor
options 	KTRACE			#ktrace(1) support
options 	SYSVMSG			#SYSV-style message queues
options 	SYSVSEM			#SYSV-style semaphores
options 	P1003_1B		#Posix P1003_1B real-time extensions
options 	_KPOSIX_PRIORITY_SCHEDULING
options 	ICMP_BANDLIM		#Rate limit bad replies
options 	KBD_INSTALL_CDEV	# install a CDEV entry in /dev

options 	SMP			# Symmetric MultiProcessor Kernel
options 	APIC_IO			# Symmetric (APIC) I/O
options		IPFILTER
options		IPFILTER_LOG
options		IPFIREWALL
options		IPFIREWALL_DEFAULT_TO_ACCEPT

device		isa
device		eisa
device		pci

device		fdc0	at isa? port IO_FD1 irq 6 drq 2
device		fd0	at fdc0 drive 0

device		ata0	at isa? port IO_WD1 irq 14
device		ata
device		atapicd			# ATAPI CDROM drives
options 	ATA_STATIC_ID		#Static device numbering

device		scbus		# SCSI bus (required)
device		da		# Direct Access (disks)
device		pass		# Passthrough device (direct SCSI access)

device		amr		# AMI MegaRAID

device		atkbdc0	at isa? port IO_KBD
device		atkbd0	at atkbdc? irq 1 flags 0x1

device		vga0	at isa?

device		sc0	at isa? flags 0x100

device		npx0	at nexus? port IO_NPX irq 13

device		sio0	at isa? port IO_COM1 flags 0x10 irq 4
device		sio1	at isa? port IO_COM2 irq 3

device		miibus		# MII bus support
device		em

pseudo-device	loop		# Network loopback
pseudo-device	ether		# Ethernet support
pseudo-device	pty		# Pseudo-ttys (telnet etc)
pseudo-device	md		# Memory "disks"
pseudo-device	bpf		#Berkeley packet filter

-- 
[-]

``Early to rise, early to bed, makes a man healthy, wealthy and dead.''


More information about the freebsd-hackers mailing list