nvidia driver crash
- Reply: Alexey Dokuchaev : "Re: nvidia driver crash"
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Thu, 20 Jan 2022 14:07:33 UTC
I've just got a kernel crash in the nvidia driver code on stable/13.
The crash seemingly has to do with freed memory ('deadcode' seen in the register
dump), but it is hard to tell anything beyond that as the crash is in the
proprietary code.
Is there a way to report this to nvidia?
If (and that's a big if) they have FreeBSD support then maybe they'll be able to
find out more details.
The driver is nvidia-driver-470.86.
Thank you.
Fatal trap 9: general protection fault while in kernel mode
cpuid = 5; apic id = 05
instruction pointer = 0x20:0xffffffff829ccc90
stack pointer = 0x28:0xfffffe02c1d7d840
frame pointer = 0x28:0xfffffe021a9a5d20
code segment = base 0x0, limit 0xfffff, type 0x1b
= DPL 0, pres 1, long 1, def32 0, gran 1
processor eflags = interrupt enabled, resume, IOPL = 0
current process = 80434 (plasmashell)
trap number = 9
panic: general protection fault
cpuid = 5
time = 1642654429
KDB: stack backtrace:
db_trace_self_wrapper() at 0xffffffff805ca63b = db_trace_self_wrapper+0x2b/frame
0xfffffe02c1d7d470
kdb_backtrace() at 0xffffffff808ae0c7 = kdb_backtrace+0x37/frame 0xfffffe02c1d7d520
vpanic() at 0xffffffff8086a2ec = vpanic+0x18c/frame 0xfffffe02c1d7d580
panic() at 0xffffffff80869f03 = panic+0x43/frame 0xfffffe02c1d7d5e0
trap_fatal() at 0xffffffff80b5ac35 = trap_fatal+0x375/frame 0xfffffe02c1d7d640
trap() at 0xffffffff80b5a0e7 = trap+0x67/frame 0xfffffe02c1d7d750
trap_check() at 0xffffffff80b5b069 = trap_check+0x29/frame 0xfffffe02c1d7d770
calltrap() at 0xffffffff80b36778 = calltrap+0x8/frame 0xfffffe02c1d7d770
--- trap 0x9, rip = 0xffffffff829ccc90, rsp = 0xfffffe02c1d7d840, rbp =
0xfffffe021a9a5d20 ---
_nv035888rm() at 0xffffffff829ccc90 = _nv035888rm+0xb0/frame 0xfffffe021a9a5d20
??() at 0xfffff803cf6cb570/frame 0xdeadc0df00000000
Uptime: 12d16h10m41s
Dumping 6720 out of 32646 MB:..1%..11%..21%..31%..41%..51%..61%..71%..81%..91%
doadump (textdump=textdump@entry=1) at
/usr/devel/git/trant/sys/kern/kern_shutdown.c:399
399 dumptid = curthread->td_tid;
(kgdb) bt
#0 doadump (textdump=textdump@entry=1) at
/usr/devel/git/trant/sys/kern/kern_shutdown.c:399
#1 0xffffffff80869cef in kern_reboot (howto=260) at
/usr/devel/git/trant/sys/kern/kern_shutdown.c:487
#2 0xffffffff8086a35f in vpanic (fmt=0xffffffff80c1a2f6 "%s", ap=<optimized
out>) at /usr/devel/git/trant/sys/kern/kern_shutdown.c:920
#3 0xffffffff80869f03 in panic (fmt=<unavailable>) at
/usr/devel/git/trant/sys/kern/kern_shutdown.c:844
#4 0xffffffff80b5ac35 in trap_fatal (frame=0xfffffe02c1d7d780, eva=0) at
/usr/devel/git/trant/sys/amd64/amd64/trap.c:944
#5 0xffffffff80b5a0e7 in trap (frame=frame@entry=0xfffffe02c1d7d780) at
/usr/devel/git/trant/sys/amd64/amd64/trap.c:249
#6 0xffffffff80b5b069 in trap_check (frame=0xfffffe02c1d7d780) at
/usr/devel/git/trant/sys/amd64/amd64/trap.c:667
#7 <signal handler called>
#8 0xffffffff829ccc90 in _nv035888rm () from /boot/modules/nvidia.ko
#9 0xfffff803cf6cb808 in ?? ()
#10 0xfffffe021a9a5f18 in ?? ()
#11 0xfffff80253624190 in ?? ()
#12 0xffffffff829ca95e in _nv014658rm () from /boot/modules/nvidia.ko
#13 0x0000000000000000 in ?? ()
(kgdb) fr 8
#8 0xffffffff829ccc90 in _nv035888rm () from /boot/modules/nvidia.ko
(kgdb) disassemble
Dump of assembler code for function _nv035888rm:
0xffffffff829ccbe0 <+0>: push %r13
0xffffffff829ccbe2 <+2>: push %r12
0xffffffff829ccbe4 <+4>: mov %rsi,%r12
0xffffffff829ccbe7 <+7>: push %rbx
0xffffffff829ccbe8 <+8>: sub $0x20,%rbp
0xffffffff829ccbec <+12>: mov 0x20(%rsi),%rdx
0xffffffff829ccbf0 <+16>: mov %rdi,%rbx
0xffffffff829ccbf3 <+19>: test %rdx,%rdx
0xffffffff829ccbf6 <+22>: je 0xffffffff829ccc16 <_nv035888rm+54>
0xffffffff829ccbf8 <+24>: mov 0x18(%rdi),%rax
0xffffffff829ccbfc <+28>: test %rax,%rax
0xffffffff829ccbff <+31>: jne 0xffffffff829ccc11 <_nv035888rm+49>
0xffffffff829ccc01 <+33>: jmp 0xffffffff829ccc40 <_nv035888rm+96>
0xffffffff829ccc03 <+35>: nopl 0x0(%rax,%rax,1)
0xffffffff829ccc08 <+40>: mov 0x18(%rax),%rax
0xffffffff829ccc0c <+44>: test %rax,%rax
0xffffffff829ccc0f <+47>: je 0xffffffff829ccc40 <_nv035888rm+96>
0xffffffff829ccc11 <+49>: cmp %rax,%rdx
0xffffffff829ccc14 <+52>: jne 0xffffffff829ccc08 <_nv035888rm+40>
0xffffffff829ccc16 <+54>: mov %r12,%rdi
0xffffffff829ccc19 <+57>: call 0xffffffff822f53a0 <_nv035883rm>
0xffffffff829ccc1e <+62>: lea 0x120(%rbx),%rdi
0xffffffff829ccc25 <+69>: mov %r12,%rsi
0xffffffff829ccc28 <+72>: call 0xffffffff829c2520 <_nv029011rm>
0xffffffff829ccc2d <+77>: pop %rbx
0xffffffff829ccc2e <+78>: pop %r12
0xffffffff829ccc30 <+80>: pop %r13
0xffffffff829ccc32 <+82>: add $0x20,%rbp
0xffffffff829ccc36 <+86>: ret
0xffffffff829ccc37 <+87>: nopw 0x0(%rax,%rax,1)
0xffffffff829ccc40 <+96>: lea 0x148(%rdx),%rdi
0xffffffff829ccc47 <+103>: call 0xffffffff829c26f0 <_nv029013rm>
0xffffffff829ccc4c <+108>: mov %rax,%r13
0xffffffff829ccc4f <+111>: mov 0x20(%r12),%rax
0xffffffff829ccc54 <+116>: lea 0x148(%rax),%rdi
0xffffffff829ccc5b <+123>: call 0xffffffff829c26c0 <_nv028995rm>
0xffffffff829ccc60 <+128>: mov 0x20(%r12),%rcx
0xffffffff829ccc65 <+133>: mov %rax,%rdx
0xffffffff829ccc68 <+136>: mov %rbp,%rdi
0xffffffff829ccc6b <+139>: lea 0x148(%rcx),%rsi
0xffffffff829ccc72 <+146>: mov %r13,%rcx
0xffffffff829ccc75 <+149>: call 0xffffffff829c2800 <_nv029003rm>
0xffffffff829ccc7a <+154>: nopw 0x0(%rax,%rax,1)
0xffffffff829ccc80 <+160>: mov %rbp,%rdi
0xffffffff829ccc83 <+163>: call 0xffffffff829c2870 <_nv029002rm>
0xffffffff829ccc88 <+168>: test %al,%al
0xffffffff829ccc8a <+170>: je 0xffffffff829ccc16 <_nv035888rm+54>
0xffffffff829ccc8c <+172>: mov 0x0(%rbp),%rsi
=> 0xffffffff829ccc90 <+176>: cmp %rbx,0x8(%rsi)
0xffffffff829ccc94 <+180>: jne 0xffffffff829ccc80 <_nv035888rm+160>
0xffffffff829ccc96 <+182>: cmp %r12,(%rsi)
0xffffffff829ccc99 <+185>: jne 0xffffffff829ccc80 <_nv035888rm+160>
0xffffffff829ccc9b <+187>: mov 0x20(%r12),%rax
0xffffffff829ccca0 <+192>: lea 0x148(%rax),%rdi
0xffffffff829ccca7 <+199>: call 0xffffffff829c2520 <_nv029011rm>
0xffffffff829cccac <+204>: jmp 0xffffffff829ccc16 <_nv035888rm+54>
End of assembler dump.
(kgdb) i reg
rax 0x1 1
rbx 0xfffff807c381b828 -8762748192728
rcx 0xfffff803cf6cb570 -8779728112272
rdx 0xdeadc0dedeadc0de -2401050962867404578
rsi 0xdeadc0df00000000 -2401050962308366336
rdi 0xfffffe021a9a5d20 -2189986996960
rbp 0xfffffe021a9a5d20 0xfffffe021a9a5d20
rsp 0xfffffe02c1d7d840 0xfffffe02c1d7d840
r8 0xffffffff80c2bc91 -2134721391
r9 0xffffffff8414fcab -2078999381
r10 0x0 0
r11 0x372 882
r12 0xfffff80253624190 -8786104139376
r13 0xdeadc0df00000000 -2401050962308366336
r14 0xfffffe021a9a5d98 -2189986996840
r15 0xfffff807c381b828 -8762748192728
rip 0xffffffff829ccc90 0xffffffff829ccc90 <_nv035888rm+176>
eflags 0x10202 [ IF RF ]
cs 0x20 32
ss 0x28 40
ds <unavailable>
es <unavailable>
fs <unavailable>
gs <unavailable>
fs_base <unavailable>
gs_base <unavailable>
(kgdb) x/10a $rdi
0xfffffe021a9a5d20: 0xdeadc0df00000000 0xfffff803cf6cb570
0xfffffe021a9a5d30: 0x0 0xdeadc0dedeadc0de
0xfffffe021a9a5d40: 0x0 0xfffffe021a9a5e18
0xfffffe021a9a5d50: 0xfffff807c381b948 0xfffff807c381b828
0xfffffe021a9a5d60: 0xfffffe021a9a5ed0 0x1441b5f4d70
--
Andriy Gapon