head -r312982 on pine64 (an A64): 4 "openssl speed"s (e.g.) in parallel lead to thermal poweroff despite powerd use

Mark Millard markmi at dsl-only.net
Thu Feb 9 12:41:20 UTC 2017


Attempting (back to back):

# openssl speed > /dev/null 2>&1 &
# openssl speed > /dev/null 2>&1 &
# openssl speed > /dev/null 2>&1 &
# openssl speed > /dev/null 2>&1 &

leads to eventual sudden thermal powerpff on the pine64
(that has a heat sink and fan) for head -r312982 . (By
no means is the command set likely to be unique for
causing such.)

Context details:

# ps -aux | grep powerd
root   608   0.0  0.0  6192   440  -  Ss   03:40    0:00.10 /usr/sbin/powerd
root   720   0.0  0.1  6544  2136  1  S+   03:51    0:00.01 grep powerd

# more /usr/src/sys/arm64/conf/GENERIC-NODBG
#
# GENERIC -- Custom configuration for the arm64/aarch64
#

include "GENERIC"

ident   GENERIC-NODBG

makeoptions     DEBUG=-g                # Build kernel with gdb(1) debug symbols

options         ALT_BREAK_TO_DEBUGGER

options         KDB                     # Enable kernel debugger support

# For minimum debugger support (stable branch) use:
#options        KDB_TRACE               # Print a stack trace for a panic
options         DDB                     # Enable the kernel debugger

# Extra stuff:
#options        VERBOSE_SYSINIT         # Enable verbose sysinit messages
#options        BOOTVERBOSE=1
#options        BOOTHOWTO=RB_VERBOSE
#options        KTR
#options        KTR_MASK=KTR_TRAP
##options       KTR_CPUMASK=0xF
#options        KTR_VERBOSE

# Disable any extra checking for. . .
nooptions       DEADLKRES               # Enable the deadlock resolver
nooptions       INVARIANTS              # Enable calls of extra sanity checking
nooptions       INVARIANT_SUPPORT       # Extra sanity checks of internal structures, required by INVARIANTS
nooptions       WITNESS                 # Enable checks to detect deadlocks and cycles
nooptions       WITNESS_SKIPSPIN        # Don't run witness on spinlocks for speed
nooptions       DIAGNOSTIC
nooptions       MALLOC_DEBUG_MAXZONES   # Separate malloc(9) zones

# uname -apKU
FreeBSD pine64 12.0-CURRENT FreeBSD 12.0-CURRENT  r312982M  arm64 aarch64 1200020 1200020

(I've been holding at 312982 while investigating sh getting core files
on occasion. See:
https://lists.freebsd.org/pipermail/freebsd-arm/2017-February/015620.html .)

# svnlite status /usr/src/ | sort
?       /usr/src/sys/amd64/conf/GENERIC-DBG
?       /usr/src/sys/amd64/conf/GENERIC-NODBG
?       /usr/src/sys/arm/conf/BPIM3-DBG
?       /usr/src/sys/arm/conf/BPIM3-NODBG
?       /usr/src/sys/arm/conf/RPI2-DBG
?       /usr/src/sys/arm/conf/RPI2-NODBG
?       /usr/src/sys/arm64/conf/GENERIC-DBG
?       /usr/src/sys/arm64/conf/GENERIC-NODBG
?       /usr/src/sys/powerpc/conf/GENERIC64vtsc-DBG
?       /usr/src/sys/powerpc/conf/GENERIC64vtsc-NODBG
?       /usr/src/sys/powerpc/conf/GENERICvtsc-DBG
?       /usr/src/sys/powerpc/conf/GENERICvtsc-NODBG
M       /usr/src/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
M       /usr/src/contrib/llvm/tools/lld/ELF/Target.cpp
M       /usr/src/lib/csu/powerpc64/Makefile
M       /usr/src/libexec/rtld-elf/Makefile
M       /usr/src/sys/arm/arm/gic.c
M       /usr/src/sys/boot/ofw/Makefile.inc
M       /usr/src/sys/boot/powerpc/Makefile.inc
M       /usr/src/sys/boot/powerpc/kboot/Makefile
M       /usr/src/sys/boot/uboot/Makefile.inc
M       /usr/src/sys/conf/Makefile.powerpc
M       /usr/src/sys/conf/kern.mk
M       /usr/src/sys/conf/kmod.mk
M       /usr/src/sys/ddb/db_main.c
M       /usr/src/sys/ddb/db_script.c
M       /usr/src/sys/dev/mlx5/diagnostics.h
M       /usr/src/sys/modules/zfs/Makefile
M       /usr/src/sys/powerpc/ofw/ofw_machdep.c

# svnlite diff /usr/src/sys/arm/arm/gic.c
Index: /usr/src/sys/arm/arm/gic.c
===================================================================
--- /usr/src/sys/arm/arm/gic.c  (revision 312982)
+++ /usr/src/sys/arm/arm/gic.c  (working copy)
@@ -672,9 +672,13 @@
 
        if (irq >= sc->nirqs) {
 #ifdef GIC_DEBUG_SPURIOUS
+#define EXPECTED_SPURIOUS_IRQ 1023
+           if (irq != EXPECTED_SPURIOUS_IRQ) {
                device_printf(sc->gic_dev,
-                   "Spurious interrupt detected: last irq: %d on CPU%d\n",
+                   "Spurious interrupt %d detected of %d: last irq: %d on CPU%d\n",
+                   irq, sc->nirqs,
                    sc->last_irq[PCPU_GET(cpuid)], PCPU_GET(cpuid));
+            }
 #endif
                return (FILTER_HANDLED);
        }
@@ -720,6 +724,16 @@
        if (irq < sc->nirqs)
                goto dispatch_irq;
 
+       if (irq != EXPECTED_SPURIOUS_IRQ) {
+#undef EXPECTED_SPURIOUS_IRQ
+#ifdef GIC_DEBUG_SPURIOUS
+               device_printf(sc->gic_dev,
+                   "Spurious end interrupt %d detected of %d: last irq: %d on CPU%d\n",
+                   irq, sc->nirqs,
+                   sc->last_irq[PCPU_GET(cpuid)], PCPU_GET(cpuid));
+#endif
+       }
+
        return (FILTER_HANDLED);
 }
 
Effectively that just disables the spurious interrupt notices that
were being generated: all the examples happen to have irq==1023.

/usr/src/sys/dev/mlx5/diagnostics.h has the removal of an
inappropriate const (as was later done by someone's check-in).
(It turns out clang allows updates to const members in
structs when the whole struct is assigned. gcc correctly
rejects such code.)

Other than KERNCONF files most of the rest of the  changes
are tied to my powerpc64 and powerpc investigations of
clang use as the system compiler (there are problems --and
I occasionally find additional ones).


===
Mark Millard
markmi at dsl-only.net



More information about the freebsd-arm mailing list