nvidia driver crash

From: Andriy Gapon <avg_at_FreeBSD.org>
Date: Thu, 20 Jan 2022 14:07:33 UTC
I've just got a kernel crash in the nvidia driver code on stable/13.
The crash seemingly has to do with freed memory ('deadcode' seen in the register 
dump), but it is hard to tell anything beyond that as the crash is in the 
proprietary code.

Is there a way to report this to nvidia?
If (and that's a big if) they have FreeBSD support then maybe they'll be able to 
find out more details.

The driver is nvidia-driver-470.86.

Thank you.

Fatal trap 9: general protection fault while in kernel mode
cpuid = 5; apic id = 05
instruction pointer     = 0x20:0xffffffff829ccc90
stack pointer           = 0x28:0xfffffe02c1d7d840
frame pointer           = 0x28:0xfffffe021a9a5d20
code segment            = base 0x0, limit 0xfffff, type 0x1b
                         = DPL 0, pres 1, long 1, def32 0, gran 1
processor eflags        = interrupt enabled, resume, IOPL = 0
current process         = 80434 (plasmashell)
trap number             = 9
panic: general protection fault
cpuid = 5
time = 1642654429
KDB: stack backtrace:
db_trace_self_wrapper() at 0xffffffff805ca63b = db_trace_self_wrapper+0x2b/frame 
0xfffffe02c1d7d470
kdb_backtrace() at 0xffffffff808ae0c7 = kdb_backtrace+0x37/frame 0xfffffe02c1d7d520
vpanic() at 0xffffffff8086a2ec = vpanic+0x18c/frame 0xfffffe02c1d7d580
panic() at 0xffffffff80869f03 = panic+0x43/frame 0xfffffe02c1d7d5e0
trap_fatal() at 0xffffffff80b5ac35 = trap_fatal+0x375/frame 0xfffffe02c1d7d640
trap() at 0xffffffff80b5a0e7 = trap+0x67/frame 0xfffffe02c1d7d750
trap_check() at 0xffffffff80b5b069 = trap_check+0x29/frame 0xfffffe02c1d7d770
calltrap() at 0xffffffff80b36778 = calltrap+0x8/frame 0xfffffe02c1d7d770
--- trap 0x9, rip = 0xffffffff829ccc90, rsp = 0xfffffe02c1d7d840, rbp = 
0xfffffe021a9a5d20 ---
_nv035888rm() at 0xffffffff829ccc90 = _nv035888rm+0xb0/frame 0xfffffe021a9a5d20
??() at 0xfffff803cf6cb570/frame 0xdeadc0df00000000
Uptime: 12d16h10m41s
Dumping 6720 out of 32646 MB:..1%..11%..21%..31%..41%..51%..61%..71%..81%..91%

doadump (textdump=textdump@entry=1) at 
/usr/devel/git/trant/sys/kern/kern_shutdown.c:399
399             dumptid = curthread->td_tid;
(kgdb) bt
#0  doadump (textdump=textdump@entry=1) at 
/usr/devel/git/trant/sys/kern/kern_shutdown.c:399
#1  0xffffffff80869cef in kern_reboot (howto=260) at 
/usr/devel/git/trant/sys/kern/kern_shutdown.c:487
#2  0xffffffff8086a35f in vpanic (fmt=0xffffffff80c1a2f6 "%s", ap=<optimized 
out>) at /usr/devel/git/trant/sys/kern/kern_shutdown.c:920
#3  0xffffffff80869f03 in panic (fmt=<unavailable>) at 
/usr/devel/git/trant/sys/kern/kern_shutdown.c:844
#4  0xffffffff80b5ac35 in trap_fatal (frame=0xfffffe02c1d7d780, eva=0) at 
/usr/devel/git/trant/sys/amd64/amd64/trap.c:944
#5  0xffffffff80b5a0e7 in trap (frame=frame@entry=0xfffffe02c1d7d780) at 
/usr/devel/git/trant/sys/amd64/amd64/trap.c:249
#6  0xffffffff80b5b069 in trap_check (frame=0xfffffe02c1d7d780) at 
/usr/devel/git/trant/sys/amd64/amd64/trap.c:667
#7  <signal handler called>
#8  0xffffffff829ccc90 in _nv035888rm () from /boot/modules/nvidia.ko
#9  0xfffff803cf6cb808 in ?? ()
#10 0xfffffe021a9a5f18 in ?? ()
#11 0xfffff80253624190 in ?? ()
#12 0xffffffff829ca95e in _nv014658rm () from /boot/modules/nvidia.ko
#13 0x0000000000000000 in ?? ()

(kgdb) fr 8
#8  0xffffffff829ccc90 in _nv035888rm () from /boot/modules/nvidia.ko
(kgdb) disassemble
Dump of assembler code for function _nv035888rm:
    0xffffffff829ccbe0 <+0>:     push   %r13
    0xffffffff829ccbe2 <+2>:     push   %r12
    0xffffffff829ccbe4 <+4>:     mov    %rsi,%r12
    0xffffffff829ccbe7 <+7>:     push   %rbx
    0xffffffff829ccbe8 <+8>:     sub    $0x20,%rbp
    0xffffffff829ccbec <+12>:    mov    0x20(%rsi),%rdx
    0xffffffff829ccbf0 <+16>:    mov    %rdi,%rbx
    0xffffffff829ccbf3 <+19>:    test   %rdx,%rdx
    0xffffffff829ccbf6 <+22>:    je     0xffffffff829ccc16 <_nv035888rm+54>
    0xffffffff829ccbf8 <+24>:    mov    0x18(%rdi),%rax
    0xffffffff829ccbfc <+28>:    test   %rax,%rax
    0xffffffff829ccbff <+31>:    jne    0xffffffff829ccc11 <_nv035888rm+49>
    0xffffffff829ccc01 <+33>:    jmp    0xffffffff829ccc40 <_nv035888rm+96>
    0xffffffff829ccc03 <+35>:    nopl   0x0(%rax,%rax,1)
    0xffffffff829ccc08 <+40>:    mov    0x18(%rax),%rax
    0xffffffff829ccc0c <+44>:    test   %rax,%rax
    0xffffffff829ccc0f <+47>:    je     0xffffffff829ccc40 <_nv035888rm+96>
    0xffffffff829ccc11 <+49>:    cmp    %rax,%rdx
    0xffffffff829ccc14 <+52>:    jne    0xffffffff829ccc08 <_nv035888rm+40>
    0xffffffff829ccc16 <+54>:    mov    %r12,%rdi
    0xffffffff829ccc19 <+57>:    call   0xffffffff822f53a0 <_nv035883rm>
    0xffffffff829ccc1e <+62>:    lea    0x120(%rbx),%rdi
    0xffffffff829ccc25 <+69>:    mov    %r12,%rsi
    0xffffffff829ccc28 <+72>:    call   0xffffffff829c2520 <_nv029011rm>
    0xffffffff829ccc2d <+77>:    pop    %rbx
    0xffffffff829ccc2e <+78>:    pop    %r12
    0xffffffff829ccc30 <+80>:    pop    %r13
    0xffffffff829ccc32 <+82>:    add    $0x20,%rbp
    0xffffffff829ccc36 <+86>:    ret
    0xffffffff829ccc37 <+87>:    nopw   0x0(%rax,%rax,1)
    0xffffffff829ccc40 <+96>:    lea    0x148(%rdx),%rdi
    0xffffffff829ccc47 <+103>:   call   0xffffffff829c26f0 <_nv029013rm>
    0xffffffff829ccc4c <+108>:   mov    %rax,%r13
    0xffffffff829ccc4f <+111>:   mov    0x20(%r12),%rax
    0xffffffff829ccc54 <+116>:   lea    0x148(%rax),%rdi
    0xffffffff829ccc5b <+123>:   call   0xffffffff829c26c0 <_nv028995rm>
    0xffffffff829ccc60 <+128>:   mov    0x20(%r12),%rcx
    0xffffffff829ccc65 <+133>:   mov    %rax,%rdx
    0xffffffff829ccc68 <+136>:   mov    %rbp,%rdi
    0xffffffff829ccc6b <+139>:   lea    0x148(%rcx),%rsi
    0xffffffff829ccc72 <+146>:   mov    %r13,%rcx
    0xffffffff829ccc75 <+149>:   call   0xffffffff829c2800 <_nv029003rm>
    0xffffffff829ccc7a <+154>:   nopw   0x0(%rax,%rax,1)
    0xffffffff829ccc80 <+160>:   mov    %rbp,%rdi
    0xffffffff829ccc83 <+163>:   call   0xffffffff829c2870 <_nv029002rm>
    0xffffffff829ccc88 <+168>:   test   %al,%al
    0xffffffff829ccc8a <+170>:   je     0xffffffff829ccc16 <_nv035888rm+54>
    0xffffffff829ccc8c <+172>:   mov    0x0(%rbp),%rsi
=> 0xffffffff829ccc90 <+176>:   cmp    %rbx,0x8(%rsi)
    0xffffffff829ccc94 <+180>:   jne    0xffffffff829ccc80 <_nv035888rm+160>
    0xffffffff829ccc96 <+182>:   cmp    %r12,(%rsi)
    0xffffffff829ccc99 <+185>:   jne    0xffffffff829ccc80 <_nv035888rm+160>
    0xffffffff829ccc9b <+187>:   mov    0x20(%r12),%rax
    0xffffffff829ccca0 <+192>:   lea    0x148(%rax),%rdi
    0xffffffff829ccca7 <+199>:   call   0xffffffff829c2520 <_nv029011rm>
    0xffffffff829cccac <+204>:   jmp    0xffffffff829ccc16 <_nv035888rm+54>
End of assembler dump.

(kgdb) i reg
rax            0x1                 1
rbx            0xfffff807c381b828  -8762748192728
rcx            0xfffff803cf6cb570  -8779728112272
rdx            0xdeadc0dedeadc0de  -2401050962867404578
rsi            0xdeadc0df00000000  -2401050962308366336
rdi            0xfffffe021a9a5d20  -2189986996960
rbp            0xfffffe021a9a5d20  0xfffffe021a9a5d20
rsp            0xfffffe02c1d7d840  0xfffffe02c1d7d840
r8             0xffffffff80c2bc91  -2134721391
r9             0xffffffff8414fcab  -2078999381
r10            0x0                 0
r11            0x372               882
r12            0xfffff80253624190  -8786104139376
r13            0xdeadc0df00000000  -2401050962308366336
r14            0xfffffe021a9a5d98  -2189986996840
r15            0xfffff807c381b828  -8762748192728
rip            0xffffffff829ccc90  0xffffffff829ccc90 <_nv035888rm+176>
eflags         0x10202             [ IF RF ]
cs             0x20                32
ss             0x28                40
ds             <unavailable>
es             <unavailable>
fs             <unavailable>
gs             <unavailable>
fs_base        <unavailable>
gs_base        <unavailable>

(kgdb) x/10a $rdi
0xfffffe021a9a5d20:     0xdeadc0df00000000      0xfffff803cf6cb570
0xfffffe021a9a5d30:     0x0     0xdeadc0dedeadc0de
0xfffffe021a9a5d40:     0x0     0xfffffe021a9a5e18
0xfffffe021a9a5d50:     0xfffff807c381b948      0xfffff807c381b828
0xfffffe021a9a5d60:     0xfffffe021a9a5ed0      0x1441b5f4d70

-- 
Andriy Gapon