head -r365932 on PowerMac G5 (2 dual-core sockets): Crashes before login prompt if powerd is enabled in /etc/rc.conf

Mark Millard marklmi at yahoo.com
Tue Sep 22 19:35:33 UTC 2020


On 2020-Sep-22, at 12:15, Mark Millard <marklmi at yahoo.com> wrote:

> On 2020-Sep-22, at 12:00, Brandon Bergren <bdragon at FreeBSD.org> wrote:
> 
>> Weird, that read_scom has the "inlines not being inlined" problem.
>> 
>> Are you sure your local tree has my https://svnweb.freebsd.org/base/head/?view=revision&revision=365441 fixes?
> 
> # svnlite diff /usr/src/sys/conf/kern.pre.mk
> # svnlite info /usr/src
> Path: .
> Working Copy Root Path: /usr/src
> URL: https://svn.freebsd.org/base/head
> Relative URL: ^/head
> Repository Root: https://svn.freebsd.org/base
> Repository UUID: ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f
> Revision: 365932
> Node Kind: directory
> Schedule: normal
> Last Changed Author: jrtc27
> Last Changed Rev: 365932
> Last Changed Date: 2020-09-20 16:20:18 -0700 (Sun, 20 Sep 2020)
> 
> But it used -O instead of -O2 . . .
> 
> # Meta data file /usr/obj/powerpc64vtsc_clang/powerpc.powerpc64/usr/src/powerpc.powerpc64/sys/GENERIC64vtsc-NODBG/pcr.o.meta
> CMD cc -target powerpc64-unknown-freebsd13.0 --sysroot=/usr/obj/powerpc64vtsc_clang/powerpc.powerpc64/usr/src/powerpc.powerpc64/tmp -B/usr/obj/powerpc64vtsc_clang/powerpc.powerpc64/usr/src/powerpc.powerpc64/tmp/usr/bin -c -O -pipe  -g -nostdinc  -I. -I/usr/src/sys -I/usr/src/sys/contrib/ck/include -I/usr/src/sys/contrib/libfdt -D_KERNEL -DHAVE_KERNEL_OPTION_HEADERS -include opt_global.h -fno-common    -msoft-float -fPIC -fno-omit-frame-pointer -fdebug-prefix-map=./machine=/usr/src/sys/powerpc/include -mno-altivec -msoft-float -mabi=elfv2 -ffreestanding -fwrapv -fstack-protector -gdwarf-2 -Wall -Wredundant-decls -Wnested-externs -Wstrict-prototypes -Wmissing-prototypes -Wpointer-arith -Wcast-qual -Wundef -Wno-pointer-sign -D__printf__=__freebsd_kprintf__ -Wmissing-include-dirs -fdiagnostics-show-option -Wno-unknown-pragmas -Wno-error-tautological-compare -Wno-error-empty-body -Wno-error-parentheses-equality -Wno-error-unused-function -Wno-error-pointer-sign -Wno-error-shift-negative-value -Wno-address-of-packed-member -Wno-format-zero-length     -std=iso9899:1999   /usr/src/sys/powerpc/cpufreq/pcr.c
> CMD ctfconvert -L VERSION -g pcr.o
> CWD /usr/obj/powerpc64vtsc_clang/powerpc.powerpc64/usr/src/powerpc.powerpc64/sys/GENERIC64vtsc-NODBG
> TARGET pcr.o
> OODATE offset.inc assym.inc

The logic:

.if defined(DEBUG)
.if ${MACHINE_ARCH} == "powerpc" || ${MACHINE_ARCH} == "powerpcspe"
# Work around clang 11 miscompile on 32 bit powerpc.
_MINUS_O=       -O2
.else
_MINUS_O=       -O
.endif
CTFFLAGS+=      -g
.else
_MINUS_O=       -O2
.endif

Seems to target 32 bit powerpc but the context I'm reporting is
for powerpc64 overall. May be the mis-compiles are not limited
to 32-bit powerpc variants?

In fact I have DEBUG defined despite not having witness and
the like. I include GENERIC64 and the am explicit about
overriding things that I care about, even some things that are
the same as in GENERIC64.

# more /usr/src/sys/powerpc/conf/GENERIC64vtsc-NODBG
#
# GENERIC -- Custom configuration for the powerpc/powerpc64
#

include "GENERIC64"

ident   GENERIC64vtsc-NODBG

makeoptions     DEBUG=-g                # Build kernel with gdb(1) debug symbols

nooptions       PS3                     # Sony Playstation 3               HACK!!! to allow sc

options         KDB                     # Enable kernel debugger support

# For minimum debugger support (stable branch) use:
options         KDB_TRACE               # Print a stack trace for a panic
options         DDB                     # Enable the kernel debugger
options         GDB                     # HACK!!! ...

options         ALT_BREAK_TO_DEBUGGER
options         BREAK_TO_DEBUGGER

# Extra stuff:
#options        VERBOSE_SYSINIT=0       # Enable verbose sysinit messages
#options        BOOTVERBOSE=1
#options        BOOTHOWTO=RB_VERBOSE
#options        KTR
#options        KTR_MASK=KTR_TRAP
##options       KTR_CPUMASK=0xF
#options        KTR_VERBOSE

# HACK!!! to allow sc for 2560x1440 display on Radeon X1950 that vt historically mishandled during booting
device          sc
#device                 kbdmux          # HACK: already listed by vt
options         SC_OFWFB        # OFW frame buffer
options         SC_DFLT_FONT    # compile font in
makeoptions     SC_DFLT_FONT=cp437


# Disable any extra checking for. . .
nooptions       DEADLKRES               # Enable the deadlock resolver
nooptions       INVARIANTS              # Enable calls of extra sanity checking
nooptions       INVARIANT_SUPPORT       # Extra sanity checks of internal structures, required by INVARIANTS
nooptions       WITNESS                 # Enable checks to detect deadlocks and cycles
nooptions       WITNESS_SKIPSPIN        # Don't run witness on spinlocks for speed
nooptions       DIAGNOSTIC
nooptions       MALLOC_DEBUG_MAXZONES   # Separate malloc(9) zones

# Avoid dynamic loads?
device          filemon
device          geom_label
device          mac_ntpd

# Avoid .ko dymaical loads that may mix dpcpu_off[cpuid] use with
# internal pcpu_entry_NAME things: dpcpu_off[cpuid] use attempts
# to convert 0xc???_????_????_???? to some 0xe???_????_????_????
# by addition but the dynamically loaded pcpu_entry_NAME is
# already at some 0xe???_????_????_???? and the addition
# overflows/truncates, producing rejected 0x0???_????_????_????  
# addresses.
#device                 epair
#options        IPFIREWALL   # ipfw.ko I've no interest in this
#options        compat_linux # no longer possible to statically include? (32 and 64 bit mixed support?)
#device                 linuxkpi     # linuxkpi.ko I've no interest in
#device                 siftr        # siftr.ko: TCP statistics that I've no interest in

>> On Tue, Sep 22, 2020, at 1:55 PM, Mark Millard wrote:
>>> 
>>> 
>>> On 2020-Sep-22, at 08:58, Brandon Bergren <bdragon at FreeBSD.org> wrote:
>>> 
>>>> In theory, this would also crash if you did "sysctl dev.cpu.0.freq".
>>> 
>>> Yep, that dies with a backtrace but not taking input to
>>> the db> prompt.
>>> 
>>> The call stack looks to have the same sequence.
>>> 
>>> Different srr0 and lr values were listed when I tried
>>> that:
>>> 
>>> srr0=0x8004'4000'0000'0000 (0xc004'4000'0000'0000)
>>> . . .
>>> lr  =0x8004'4000'0000'0000 (0xc004'4000'0000'0000)
>>> 
>>> 
>>>> You sure the lack of a backtrace isn't just that you are using a nodebug config?
>>> 
>>> Back when I was using the FireWire based debugging I
>>> discovered that it continued to report material after
>>> the monitor updates stopped. I learned to not use
>>> the last message on screen to guess where it was
>>> having a problem, other than "sometime after the
>>> last message shown".
>>> 
>>>> Could you please disassemble read_scom?
>>> 
>>> Sure:
>>> 
>>> 0000000000ad7f24 <read_scom> addis   r2,r12,132
>>> 0000000000ad7f28 <read_scom+0x4> addi    r2,r2,220
>>> 0000000000ad7f2c <read_scom+0x8> mflr    r0
>>> 0000000000ad7f30 <read_scom+0xc> std     r31,-8(r1)
>>> 0000000000ad7f34 <read_scom+0x10> std     r0,16(r1)
>>> 0000000000ad7f38 <read_scom+0x14> stdu    r1,-64(r1)
>>> 0000000000ad7f3c <read_scom+0x18> mr      r31,r1
>>> 0000000000ad7f40 <read_scom+0x1c> std     r29,40(r31)
>>> 0000000000ad7f44 <read_scom+0x20> std     r30,48(r31)
>>> 0000000000ad7f48 <read_scom+0x24> bl      0000000000ad7e58 <mfmsr>
>>> 0000000000ad7f4c <read_scom+0x28> mr      r30,r3
>>> 0000000000ad7f50 <read_scom+0x2c> rldicl  r3,r3,48,1
>>> 0000000000ad7f54 <read_scom+0x30> rotldi  r3,r3,16
>>> 0000000000ad7f58 <read_scom+0x34> bl      0000000000ad7e6c <mtmsr>
>>> 0000000000ad7f5c <read_scom+0x38> bl      0000000000ad7e84 <isync>
>>> 0000000000ad7f60 <read_scom+0x3c> lis     r3,16512
>>> 0000000000ad7f64 <read_scom+0x40> ori     r3,r3,33024
>>> 0000000000ad7f68 <read_scom+0x44> mtspr   276,r3
>>> 0000000000ad7f6c <read_scom+0x48> bl      0000000000ad7e84 <isync>
>>> 0000000000ad7f70 <read_scom+0x4c> mfspr   r29,277
>>> 0000000000ad7f74 <read_scom+0x50> mr      r30,r29
>>> 0000000000ad7f78 <read_scom+0x54> rldicl  r29,r29,32,32
>>> 0000000000ad7f7c <read_scom+0x58> mfspr   r3,276
>>> 0000000000ad7f80 <read_scom+0x5c> mr      r3,r30
>>> 0000000000ad7f84 <read_scom+0x60> bl      0000000000ad7e6c <mtmsr>
>>> 0000000000ad7f88 <read_scom+0x64> bl      0000000000ad7e84 <isync>
>>> 0000000000ad7f8c <read_scom+0x68> mr      r3,r29
>>> 0000000000ad7f90 <read_scom+0x6c> ld      r30,48(r31)
>>> 0000000000ad7f94 <read_scom+0x70> ld      r29,40(r31)
>>> 0000000000ad7f98 <read_scom+0x74> addi    r1,r1,64
>>> 0000000000ad7f9c <read_scom+0x78> ld      r0,16(r1)
>>> 0000000000ad7fa0 <read_scom+0x7c> mtlr    r0
>>> 0000000000ad7fa4 <read_scom+0x80> ld      r31,-8(r1)
>>> 0000000000ad7fa8 <read_scom+0x84> blr
>>> 
>>> 
>>>> On Tue, Sep 22, 2020, at 12:46 AM, Mark Millard via freebsd-ppc wrote:
>>>>> 
>>>>> 
>>>>> On 2020-Sep-21, at 21:34, Mark Millard <marklmi at yahoo.com> wrote:
>>>>> 
>>>>>> This was discovered while doing a head -r363590 -> -r365932
>>>>>> upgrade to FreeBSD. (A non-debug system context.)
>>>>>> 
>>>>>> It first showed up only having updated the kernel. It still
>>>>>> shows up after updating world as well. It is now running:
>>>>>> 
>>>>>> # uname -apKU
>>>>>> FreeBSD FBSDG5L2 13.0-CURRENT FreeBSD 13.0-CURRENT #16 r365932M: Sun Sep 20 19:57:07 PDT 2020     root at FBSDFHUGE:/usr/obj/powerpc64vtsc_clang/powerpc.powerpc64/usr/src/powerpc.powerpc64/sys/GENERIC64vtsc-NODBG  powerpc powerpc64 1300115 1300115
>>>>>> 
>>>>>> but with /etc/rc.conf having powerd disabled:
>>>>>> 
>>>>>> #powerd_enable="YES"
>>>>>> 
>>>>>> The crash now is now silent, not getting to the db> prompt
>>>>>> and not showing any messages or backtrace.
>>>>>> 
>>>>>> Prior to world being updated it crashed with a traceback.
>>>>>> A quick summary from a camera picture:
>>>>>> 
>>>>>> fatal kernel trap:
>>>>>> . . .
>>>>>> pid = 1126, comm = powerd
>>>>>> . . .
>>>>>> kernel PGM trap by 0: . . .
>>>>>> at pcr_get+0x4c
>>>>>> at CPUFREQ_DRV_GET+0x78
>>>>>> at cpufreq_get_frequency+0x20
>>>>>> at cpufreq_get_level+0x2c
>>>>>> at cf_get_method+0x20c
>>>>>> at CPUFREQ_GET+0x78
>>>>>> at cpufreq_curr_sysctl+0x70
>>>>>> at sysctl_root_handler_locked+0x10c
>>>>>> at sysctl_root+0x26c
>>>>>> at userland_sysctl+0x14c
>>>>>> at sys___sysctl+0x8c
>>>>>> at syscallenter+0x188
>>>>>> at syscall+0x60
>>>>>> at trap+0x498
>>>>>> at powerpc_interrrupt+0x110
>>>>>> user SC trap . . .
>>>>>> 
>>>>>> After this I tried to make a dump and then proceeded
>>>>>> with disabling powerd in /etc/rc.conf and doing the
>>>>>> world update.
>>>>>> 
>>>>>> Unfortunately, while a dump was written, the core.txt
>>>>>> file from the -r365932 world boot that processed the
>>>>>> dump reported "invalid corefile" all over the place.
>>>>>> 
>>>>>> With powerpd disabled the G5 seems to be operational.
>>>>>> But turning powerd back on in /etc/rc.conf and rebooting
>>>>>> prevents the boot from completing, no messages, no
>>>>>> db> prompt. So I now leave powerd disabled.
>>>>> 
>>>>> Some additional low-level information:
>>>>> 
>>>>> For exception 0x700 (program) the screen picture
>>>>> shows (but I've added ' use):
>>>>> 
>>>>> srr0=0x0 (0x4000'0000'0000'0000)
>>>>> . . .
>>>>> lr  =0x0 (0x4000'0000'0000'0000)
>>>>> 
>>>>> The kernel PGM trap notice does report:
>>>>> 
>>>>> ctr=0xc000'0000'00ad'7ad4
>>>>> (the start of pcr_get but with the 0xc
>>>>> prefix)
>>>>> 
>>>>> I'll remind of the pcr_get+0x4c report in the summary.
>>>>> 
>>>>> objdump for /boot/kernel/kernel reports:
>>>>> 
>>>>> 0000000000ad7ad4 <pcr_get> addis   r2,r12,132
>>>>> 0000000000ad7ad8 <pcr_get+0x4> addi    r2,r2,1324
>>>>> 0000000000ad7adc <pcr_get+0x8> cmpldi  r4,0
>>>>> 0000000000ad7ae0 <pcr_get+0xc> beq     0000000000ad7b48 <pcr_get+0x74>
>>>>> 0000000000ad7ae4 <pcr_get+0x10> mflr    r0
>>>>> 0000000000ad7ae8 <pcr_get+0x14> std     r31,-8(r1)
>>>>> 0000000000ad7aec <pcr_get+0x18> std     r0,16(r1)
>>>>> 0000000000ad7af0 <pcr_get+0x1c> stdu    r1,-64(r1)
>>>>> 0000000000ad7af4 <pcr_get+0x20> mr      r31,r1
>>>>> 0000000000ad7af8 <pcr_get+0x24> std     r29,40(r31)
>>>>> 0000000000ad7afc <pcr_get+0x28> mr      r29,r3
>>>>> 0000000000ad7b00 <pcr_get+0x2c> li      r3,-1
>>>>> 0000000000ad7b04 <pcr_get+0x30> std     r30,48(r31)
>>>>> 0000000000ad7b08 <pcr_get+0x34> mr      r30,r4
>>>>> 0000000000ad7b0c <pcr_get+0x38> std     r3,32(r4)
>>>>> 0000000000ad7b10 <pcr_get+0x3c> std     r3,24(r4)
>>>>> 0000000000ad7b14 <pcr_get+0x40> std     r3,16(r4)
>>>>> 0000000000ad7b18 <pcr_get+0x44> std     r3,8(r4)
>>>>> 0000000000ad7b1c <pcr_get+0x48> std     r3,0(r4)
>>>>> 0000000000ad7b20 <pcr_get+0x4c> bl      0000000000ad7f2c <read_scom+0x8>
>>>>> 0000000000ad7b24 <pcr_get+0x50> rldicl  r3,r3,8,62
>>>>> 0000000000ad7b28 <pcr_get+0x54> li      r4,10000
>>>>> 0000000000ad7b2c <pcr_get+0x58> stw     r4,0(r30)
>>>>> 0000000000ad7b30 <pcr_get+0x5c> cmpldi  r3,1
>>>>> 0000000000ad7b34 <pcr_get+0x60> beq     0000000000ad7b50 <pcr_get+0x7c>
>>>>> 0000000000ad7b38 <pcr_get+0x64> cmpldi  r3,2
>>>>> 0000000000ad7b3c <pcr_get+0x68> bne     0000000000ad7b58 <pcr_get+0x84>
>>>>> 0000000000ad7b40 <pcr_get+0x6c> li      r3,2500
>>>>> 0000000000ad7b44 <pcr_get+0x70> b       0000000000ad7b54 <pcr_get+0x80>
>>>>> 0000000000ad7b48 <pcr_get+0x74> li      r3,22
>>>>> 0000000000ad7b4c <pcr_get+0x78> blr
>>>>> 0000000000ad7b50 <pcr_get+0x7c> li      r3,5000
>>>>> 0000000000ad7b54 <pcr_get+0x80> stw     r3,0(r30)
>>>>> 0000000000ad7b58 <pcr_get+0x84> std     r29,16(r30)
>>>>> 0000000000ad7b5c <pcr_get+0x88> ld      r30,48(r31)
>>>>> 0000000000ad7b60 <pcr_get+0x8c> ld      r29,40(r31)
>>>>> 0000000000ad7b64 <pcr_get+0x90> addi    r1,r1,64
>>>>> 0000000000ad7b68 <pcr_get+0x94> ld      r0,16(r1)
>>>>> 0000000000ad7b6c <pcr_get+0x98> li      r3,0
>>>>> 0000000000ad7b70 <pcr_get+0x9c> mtlr    r0
>>>>> 0000000000ad7b74 <pcr_get+0xa0> ld      r31,-8(r1)
>>>>> 0000000000ad7b78 <pcr_get+0xa4> blr
>> 

-- 
Brandon Bergren
bdragon at FreeBSD.org

===
Mark Millard
marklmi at yahoo.com
( dsl-only.net went
away in early 2018-Mar)


===
Mark Millard
marklmi at yahoo.com
( dsl-only.net went
away in early 2018-Mar)



More information about the freebsd-ppc mailing list