[Bug 267769] Bhyve core dump on suspend/resume of virtio-scsi device

From: <bugzilla-noreply_at_freebsd.org>
Date: Mon, 14 Nov 2022 20:50:57 UTC
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=267769

            Bug ID: 267769
           Summary: Bhyve core dump on suspend/resume of virtio-scsi
                    device
           Product: Base System
           Version: CURRENT
          Hardware: amd64
                OS: Any
            Status: New
          Severity: Affects Many People
          Priority: ---
         Component: bhyve
          Assignee: virtualization@FreeBSD.org
          Reporter: dan@sunsaturn.com

Bhyve core dumps after using virtio-scsi device with suspend/resume with exit
code 139. After a resume of guest, a simple "df" or any other command will
freeze and within 3-5 min the bhyve process will dump core. This seems to only
affect virtio-scsi , when I tested virtio-blk all seems well and guest resumes
fine.

router:/root/guests # uname -a
FreeBSD router.sunsaturn.com 14.0-CURRENT FreeBSD 14.0-CURRENT #1
main-n259058-105019e0d6c: Sat Nov  5 05:37:28 CDT 2022    
dan@router.sunsaturn.com:/usr/obj/usr/src/amd64.amd64/sys/MYKERNEL amd64
router:/root/guests # 

Steps to reproduce:
######################################/etc/ctl.conf 
portal-group pg0 {
        discovery-auth-group no-authentication
        listen 127.0.0.1:3260
}
#asterisk
target iqn.com.sunsaturn.asterisk:target1 {
        auth-group no-authentication
        portal-group pg0
        #bhyve virti-iscsi disk - /dev/cam/ctl1.0
        port ioctl/1
        lun 0 {
                path /dev/zvol/zroot/asterisk 
                #blocksize 128
                serial 000c2937247001
                device-id "iSCSI Disk 000c2937247001"
                option vendor "FreeBSD"
                option product "iSCSI Disk"
                option revision "0123"
                option insecure_tpc on
        }
        lun 1 {
                path
/vm/.iso/FreeBSD-14.0-CURRENT-amd64-20221103-5cc5c9254da-259005-disc1.iso
                #byhve seems to just hang when I set it to an actual CDROM so
let it default to type 0
                #device-type 5
                serial 000c2937247003
                device-id "iSCSI CDROM ISO 000c2937247002"
                option vendor "FreeBSD CDROM"
                option product "iSCSI CDROM"
                option revision "0123"
                option insecure_tpc on
        }
}
###########################/etc/iscsi.conf
#asterisk
t1 {
        TargetAddress   = 127.0.0.1:3260
        TargetName      = iqn.com.sunsaturn.asterisk:target1
}

############################/etc/rc.conf##########
ctld_enable="YES"          #load /etc/ctl.conf
iscsid_enable="YES"        #start iscsid process to connect to ctld
#client - service iscsictl start
iscsictl_enable="YES"      #connect to all targets in /etc/iscsi.conf
iscsictl_flags="-Aa"


##################asterisk.sh test script###################
#!/bin/bash
#
# General script to test bhyve suspend/resume features
#
# Requirements: FreeBSD current
# screen
# git clone https://git.FreeBSD.org/src.git /usr/src
# cd /usr/src/sys/amd64/conf (edit MYKERNEL) cp GENERIC MYKERNEL-NODEBUG; (add:
options         BHYVE_SNAPSHOT)
# cd /usr/src
# (find amount of CPUs and adjust -j below - "dmesg|grep SMP")
# make -j12 buildworld -DWITH_BHYVE_SNAPSHOT -DWITH_MALLOC_PRODUCTION
# make -j12 buildkernel KERNCONF=MYKERNEL
# make installkernel KERNCONF=MYKERNEL
# shutdown -r now
# cd /usr/src; make installworld
# shutdown -r now
# etcupdate -B
# pkg bootstrap -f #if new freebsd version
# pkg upgrade -f   #if new freebsd version 
#
# Report anomolies to dan@sunsaturn.com

##############EDIT ME#####################


HOST="127.0.0.1"                        # vncviewer 127.0.0.1:5900 - pkg
install tightvnc
PORT="5900"
WIDTH="800"
HEIGHT="600"
VMNAME="asterisk"
ISO="/vm/.iso/FreeBSD-14.0-CURRENT-amd64-20221103-5cc5c9254da-259005-disc1.iso"
DIR="/vm/asterisk"                      # Used to hold files when guest
suspended
SERIAL="/dev/nmdm_asteriskA"           # For "screen /dev/nmdm_asteriskB" - pkg
install screen
TAP="tap0"
CPU="8"
RAM="8G"

#For testing virtio-scsi
STORAGE="/dev/cam/ctl1.0"               # port from /etc/ctl.conf(port ioctl/1)
- core dumping on resume
DEVICE="virtio-scsi"

#for testing virtio-blk                 # Comment out above 2 lines if using
these
#DEVICE="virtio-blk"
#STORAGE="/dev/zvol/zroot/asterisk"     # Standard zvol
#STORAGE="/dev/da1"                     # Block device created from iscsictl

#########################################

usage() {
   echo "Usage: $1 start    (Start the guest: $VMNAME)"; 
   echo "Usage: $1 stop     (Stop the guest: $VMNAME)"; 
   echo "Usage: $1 resume   (Resume the guest from last suspend: $VMNAME)"; 
   echo "Usage: $1 suspend  (Suspend the guest: $VMNAME)"; 
   echo "Usage: $1 install  (Install new guest: $VMNAME)"; 
   exit
}

if [ ! -d "$DIR" ]; then 
   mkdir -p $DIR
fi

#if [ -z "$2" ]; then
#   usage
#else
#   VMNAME=$2
#fi


if [ "$1" == "install" ]; then
   #Kill it before starting it
   echo "Execute: screen $SERIAL"
   bhyvectl --destroy --vm=$VMNAME
   bhyve -c $CPU -m $RAM -w -H -A \
      -s 0:0,hostbridge \
      -s 3:0,ahci-cd,$ISO \
      -s 4:0,$DEVICE,$STORAGE  \
      -s 5:0,virtio-net,$TAP \
      -s 29,fbuf,tcp=$HOST:$PORT,w=$WIDTH,h=$HEIGHT \
      -s 30,xhci,tablet \
      -s 31,lpc -l com1,stdio \
      -l bootrom,/usr/local/share/uefi-firmware/BHYVE_UEFI.fd \
      $VMNAME
   #kill it after 
   bhyvectl --destroy --vm=$VMNAME
elif [ "$1" == "start" ]; then 
   while true
   do
      echo "Starting $VMNAME -s 29,fbuf,tcp=$HOST:$PORT,w=$WIDTH,h=$HEIGHT"
      #Kill it before starting it
      bhyvectl --destroy --vm=$VMNAME > /dev/null 2>&1
      bhyve -c $CPU -m $RAM -w -H -A \
         -s 0:0,hostbridge \
         -s 4:0,$DEVICE,$STORAGE  \
         -s 5:0,virtio-net,$TAP \
         -s 29,fbuf,tcp=$HOST:$PORT,w=$WIDTH,h=$HEIGHT \
         -s 30,xhci,tablet \
         -s 31,lpc -l com1,$SERIAL \
         -l bootrom,/usr/local/share/uefi-firmware/BHYVE_UEFI.fd \
         $VMNAME
      #DISABLING REBOOT LOOP AS SUSPEND RETURNS ERROR CODE 0 AS WELL
      #if [ "$?" != 0 ];
      #then
      #   echo "The exit code was not reboot code 0!: $?"
      #   exit
      #fi
      echo "The exit code was : $?"
      exit
   done
elif [ "$1" == "resume" ]; then 
   while true
   do
      echo "Starting $VMNAME -s 29,fbuf,tcp=$HOST:$PORT,w=$WIDTH,h=$HEIGHT"
      #Kill it before starting it
      bhyvectl --destroy --vm=$VMNAME > /dev/null 2>&1
      if [ -f "$DIR/default.ckp" ]; then
         bhyve -c $CPU -m $RAM -w -H -A \
            -s 0:0,hostbridge \
            -s 4:0,$DEVICE,$STORAGE  \
            -s 5:0,virtio-net,$TAP \
            -s 29,fbuf,tcp=$HOST:$PORT,w=$WIDTH,h=$HEIGHT \
            -s 30,xhci,tablet \
            -s 31,lpc -l com1,$SERIAL \
            -l bootrom,/usr/local/share/uefi-firmware/BHYVE_UEFI.fd \
            -r $DIR/default.ckp \
            $VMNAME
      else
         echo "Guest was never suspended"
         exit
      fi
      #DISABLING REBOOT LOOP AS SUSPEND RETURNS ERROR CODE 0 AS WELL
      #if [ "$?" != 0 ];
      #then
      #   echo "The exit code was not reboot code 0!: $?"
      #   exit
      #fi
      echo "The exit code was : $?"
      exit
   done
elif [ "$1" == "suspend" ];
then 
   bhyvectl --suspend $DIR/default.ckp --vm=$VMNAME

elif [ "$1" == "stop" ]; then 
   bhyvectl --destroy --vm=$VMNAME 
else 
   usage
fi

##################Making it core dump######################
#terminal 1
router:/root/guests # ./asterisk.sh start
Starting asterisk -s 29,fbuf,tcp=127.0.0.1:5900,w=800,h=600
fbuf frame buffer base: 0x2275eb000000 [sz 16777216]
Unhandled ps2 mouse command 0xe1

#terminal 2
router:/root/guests # ./asterisk.sh suspend
router:/root/guests # ./asterisk.sh resume
Starting asterisk -s 29,fbuf,tcp=127.0.0.1:5900,w=800,h=600
fbuf frame buffer base: 0x1b6423400000 [sz 16777216]
Pausing pci devs...
pci_pause: no such name: virtio-blk
pci_pause: no such name: ahci
pci_pause: no such name: ahci-hd
pci_pause: no such name: ahci-cd
Restoring vm mem...
[8192.000MiB / 8192.000MiB]
|#############################################################################################################################################|
Restoring pci devs...
vm_restore_user_dev: Device size is 0. Assuming virtio-blk is not used
vm_restore_user_dev: Device size is 0. Assuming virtio-rnd is not used
vm_restore_user_dev: Device size is 0. Assuming e1000 is not used
vm_restore_user_dev: Device size is 0. Assuming ahci is not used
vm_restore_user_dev: Device size is 0. Assuming ahci-hd is not used
vm_restore_user_dev: Device size is 0. Assuming ahci-cd is not used
Restoring kernel structs...
Resuming pci devs...
pci_resume: no such name: virtio-blk
pci_resume: no such name: ahci
pci_resume: no such name: ahci-hd
pci_resume: no such name: ahci-cd

##########at this point just go to guest and type "df" or any command and watch
it hang, wait a few minutes, then go back to Terminal 2:

./asterisk.sh: line 147: 34302 Segmentation fault      (core dumped) bhyve -c
$CPU -m $RAM -w -H -A -s 0:0,hostbridge -s 4:0,$DEVICE,$STORAGE -s
5:0,virtio-net,$TAP -s 29,fbuf,tcp=$HOST:$PORT,w=$WIDTH,h=$HEIGHT -s
30,xhci,tablet -s 31,lpc -l com1,$SERIAL -l
bootrom,/usr/local/share/uefi-firmware/BHYVE_UEFI.fd -r $DIR/default.ckp
$VMNAME
The exit code was : 139
router:/root/guests #

-- 
You are receiving this mail because:
You are the assignee for the bug.