svn commit: r286979 - in user/marcel/libvdsk: bhyve bhyveload

Marcel Moolenaar marcel at FreeBSD.org
Fri Aug 21 05:02:29 UTC 2015


Author: marcel
Date: Fri Aug 21 05:02:26 2015
New Revision: 286979
URL: https://svnweb.freebsd.org/changeset/base/286979

Log:
  Sync with ^/head at 286949

Added:
  user/marcel/libvdsk/bhyve/Makefile.depend
     - copied unchanged from r286949, head/usr.sbin/bhyve/Makefile.depend
  user/marcel/libvdsk/bhyve/bootrom.c
     - copied unchanged from r286949, head/usr.sbin/bhyve/bootrom.c
  user/marcel/libvdsk/bhyve/bootrom.h
     - copied unchanged from r286949, head/usr.sbin/bhyve/bootrom.h
Modified:
  user/marcel/libvdsk/bhyve/Makefile
  user/marcel/libvdsk/bhyve/acpi.c
  user/marcel/libvdsk/bhyve/ahci.h
  user/marcel/libvdsk/bhyve/bhyve.8
  user/marcel/libvdsk/bhyve/bhyverun.c
  user/marcel/libvdsk/bhyve/block_if.c
  user/marcel/libvdsk/bhyve/block_if.h
  user/marcel/libvdsk/bhyve/dbgport.c
  user/marcel/libvdsk/bhyve/inout.c
  user/marcel/libvdsk/bhyve/ioapic.c
  user/marcel/libvdsk/bhyve/ioapic.h
  user/marcel/libvdsk/bhyve/pci_ahci.c
  user/marcel/libvdsk/bhyve/pci_emul.c
  user/marcel/libvdsk/bhyve/pci_hostbridge.c
  user/marcel/libvdsk/bhyve/pci_irq.c
  user/marcel/libvdsk/bhyve/pci_irq.h
  user/marcel/libvdsk/bhyve/pci_lpc.c
  user/marcel/libvdsk/bhyve/pci_lpc.h
  user/marcel/libvdsk/bhyve/pci_passthru.c
  user/marcel/libvdsk/bhyve/pci_virtio_block.c
  user/marcel/libvdsk/bhyve/pci_virtio_net.c
  user/marcel/libvdsk/bhyve/pci_virtio_rnd.c
  user/marcel/libvdsk/bhyve/pm.c
  user/marcel/libvdsk/bhyve/task_switch.c
  user/marcel/libvdsk/bhyve/uart_emul.c
  user/marcel/libvdsk/bhyve/virtio.c
  user/marcel/libvdsk/bhyve/virtio.h
  user/marcel/libvdsk/bhyveload/Makefile
  user/marcel/libvdsk/bhyveload/bhyveload.8
  user/marcel/libvdsk/bhyveload/bhyveload.c
Directory Properties:
  user/marcel/libvdsk/bhyve/   (props changed)
  user/marcel/libvdsk/bhyveload/   (props changed)

Modified: user/marcel/libvdsk/bhyve/Makefile
==============================================================================
--- user/marcel/libvdsk/bhyve/Makefile	Fri Aug 21 02:42:14 2015	(r286978)
+++ user/marcel/libvdsk/bhyve/Makefile	Fri Aug 21 05:02:26 2015	(r286979)
@@ -13,6 +13,7 @@ SRCS=	\
 	acpi.c			\
 	bhyverun.c		\
 	block_if.c		\
+	bootrom.c		\
 	consport.c		\
 	dbgport.c		\
 	inout.c			\
@@ -43,8 +44,8 @@ SRCS=	\
 .PATH:	/sys/amd64/vmm
 SRCS+=	vmm_instruction_emul.c
 
-DPADD=	${LIBVDSK} ${LIBVMMAPI} ${LIBMD} ${LIBUTIL} ${LIBPTHREAD}
-LDADD=	-lvdsk -lvmmapi -lmd -lutil -lpthread
+DPADD=	${LIBVDSK} ${LIBVMMAPI} ${LIBMD} ${LIBPTHREAD}
+LDADD=	-lvdsk -lvmmapi -lmd -lpthread
 
 WARNS?=	2
 

Copied: user/marcel/libvdsk/bhyve/Makefile.depend (from r286949, head/usr.sbin/bhyve/Makefile.depend)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ user/marcel/libvdsk/bhyve/Makefile.depend	Fri Aug 21 05:02:26 2015	(r286979, copy of r286949, head/usr.sbin/bhyve/Makefile.depend)
@@ -0,0 +1,22 @@
+# $FreeBSD$
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+	gnu/lib/csu \
+	gnu/lib/libgcc \
+	include \
+	include/xlocale \
+	lib/${CSU_DIR} \
+	lib/libc \
+	lib/libcompiler_rt \
+	lib/libmd \
+	lib/libthr \
+	lib/libutil \
+	lib/libvmmapi \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif

Modified: user/marcel/libvdsk/bhyve/acpi.c
==============================================================================
--- user/marcel/libvdsk/bhyve/acpi.c	Fri Aug 21 02:42:14 2015	(r286978)
+++ user/marcel/libvdsk/bhyve/acpi.c	Fri Aug 21 05:02:26 2015	(r286979)
@@ -386,7 +386,7 @@ basl_fwrite_fadt(FILE *fp)
 	EFPRINTF(fp, "[0001]\t\tDuty Cycle Width : 00\n");
 	EFPRINTF(fp, "[0001]\t\tRTC Day Alarm Index : 00\n");
 	EFPRINTF(fp, "[0001]\t\tRTC Month Alarm Index : 00\n");
-	EFPRINTF(fp, "[0001]\t\tRTC Century Index : 00\n");
+	EFPRINTF(fp, "[0001]\t\tRTC Century Index : 32\n");
 	EFPRINTF(fp, "[0002]\t\tBoot Flags (decoded below) : 0000\n");
 	EFPRINTF(fp, "\t\t\tLegacy Devices Supported (V2) : 0\n");
 	EFPRINTF(fp, "\t\t\t8042 Present on ports 60/64 (V2) : 0\n");

Modified: user/marcel/libvdsk/bhyve/ahci.h
==============================================================================
--- user/marcel/libvdsk/bhyve/ahci.h	Fri Aug 21 02:42:14 2015	(r286978)
+++ user/marcel/libvdsk/bhyve/ahci.h	Fri Aug 21 05:02:26 2015	(r286979)
@@ -96,13 +96,14 @@
 #define         ATA_SS_SPD_NO_SPEED     0x00000000
 #define         ATA_SS_SPD_GEN1         0x00000010
 #define         ATA_SS_SPD_GEN2         0x00000020
-#define         ATA_SS_SPD_GEN3         0x00000040
+#define         ATA_SS_SPD_GEN3         0x00000030
 
 #define         ATA_SS_IPM_MASK         0x00000f00
 #define         ATA_SS_IPM_NO_DEVICE    0x00000000
 #define         ATA_SS_IPM_ACTIVE       0x00000100
 #define         ATA_SS_IPM_PARTIAL      0x00000200
 #define         ATA_SS_IPM_SLUMBER      0x00000600
+#define         ATA_SS_IPM_DEVSLEEP     0x00000800
 
 #define ATA_SERROR                      14
 #define         ATA_SE_DATA_CORRECTED   0x00000001
@@ -133,17 +134,19 @@
 #define         ATA_SC_SPD_NO_SPEED     0x00000000
 #define         ATA_SC_SPD_SPEED_GEN1   0x00000010
 #define         ATA_SC_SPD_SPEED_GEN2   0x00000020
-#define         ATA_SC_SPD_SPEED_GEN3   0x00000040
+#define         ATA_SC_SPD_SPEED_GEN3   0x00000030
 
 #define         ATA_SC_IPM_MASK         0x00000f00
 #define         ATA_SC_IPM_NONE         0x00000000
 #define         ATA_SC_IPM_DIS_PARTIAL  0x00000100
 #define         ATA_SC_IPM_DIS_SLUMBER  0x00000200
+#define         ATA_SC_IPM_DIS_DEVSLEEP 0x00000400
 
 #define ATA_SACTIVE                     16
 
 #define AHCI_MAX_PORTS			32
 #define AHCI_MAX_SLOTS			32
+#define AHCI_MAX_IRQS			16
 
 /* SATA AHCI v1.0 register defines */
 #define AHCI_CAP                    0x00
@@ -208,6 +211,9 @@
 #define		AHCI_CAP2_BOH	0x00000001
 #define		AHCI_CAP2_NVMP	0x00000002
 #define		AHCI_CAP2_APST	0x00000004
+#define		AHCI_CAP2_SDS	0x00000008
+#define		AHCI_CAP2_SADM	0x00000010
+#define		AHCI_CAP2_DESO	0x00000020
 
 #define AHCI_OFFSET                 0x100
 #define AHCI_STEP                   0x80
@@ -265,6 +271,7 @@
 #define         AHCI_P_CMD_ACTIVE   0x10000000
 #define         AHCI_P_CMD_PARTIAL  0x20000000
 #define         AHCI_P_CMD_SLUMBER  0x60000000
+#define         AHCI_P_CMD_DEVSLEEP 0x80000000
 
 #define AHCI_P_TFD                  0x20
 #define AHCI_P_SIG                  0x24
@@ -284,6 +291,17 @@
 #define 	AHCI_P_FBS_ADO_SHIFT 12
 #define 	AHCI_P_FBS_DWE      0x000f0000
 #define 	AHCI_P_FBS_DWE_SHIFT 16
+#define AHCI_P_DEVSLP               0x44
+#define 	AHCI_P_DEVSLP_ADSE  0x00000001
+#define 	AHCI_P_DEVSLP_DSP   0x00000002
+#define 	AHCI_P_DEVSLP_DETO  0x000003fc
+#define 	AHCI_P_DEVSLP_DETO_SHIFT 2
+#define 	AHCI_P_DEVSLP_MDAT  0x00007c00
+#define 	AHCI_P_DEVSLP_MDAT_SHIFT 10
+#define 	AHCI_P_DEVSLP_DITO  0x01ff8000
+#define 	AHCI_P_DEVSLP_DITO_SHIFT 15
+#define 	AHCI_P_DEVSLP_DM    0x0e000000
+#define 	AHCI_P_DEVSLP_DM_SHIFT 25
 
 /* Just to be sure, if building as module. */
 #if MAXPHYS < 512 * 1024

Modified: user/marcel/libvdsk/bhyve/bhyve.8
==============================================================================
--- user/marcel/libvdsk/bhyve/bhyve.8	Fri Aug 21 02:42:14 2015	(r286978)
+++ user/marcel/libvdsk/bhyve/bhyve.8	Fri Aug 21 05:02:26 2015	(r286979)
@@ -24,7 +24,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd September 17, 2014
+.Dd August 7, 2015
 .Dt BHYVE 8
 .Os
 .Sh NAME
@@ -32,7 +32,7 @@
 .Nd "run a guest operating system inside a virtual machine"
 .Sh SYNOPSIS
 .Nm
-.Op Fl abehuwxACHPWY
+.Op Fl abehuwxACHPSWY
 .Op Fl c Ar numcpus
 .Op Fl g Ar gdbport
 .Op Fl l Ar lpcdev Ns Op , Ns Ar conf
@@ -50,7 +50,7 @@ Parameters such as the number of virtual
 I/O connectivity can be specified with command-line parameters.
 .Pp
 The guest operating system must be loaded with
-.Xr bhyveload 4
+.Xr bhyveload 8
 or a similar boot loader before running
 .Nm .
 .Pp
@@ -61,8 +61,8 @@ exit is detected.
 .Bl -tag -width 10n
 .It Fl a
 The guest's local APIC is configured in xAPIC mode.
-The xAPIC mode is the default setting so this option is redundant. It will be
-deprecated in a future version.
+The xAPIC mode is the default setting so this option is redundant.
+It will be deprecated in a future version.
 .It Fl A
 Generate ACPI tables.
 Required for
@@ -99,10 +99,12 @@ Yield the virtual CPU thread when a HLT 
 If this option is not specified, virtual CPUs will use 100% of a host CPU.
 .It Fl l Ar lpcdev Ns Op , Ns Ar conf
 Allow devices behind the LPC PCI-ISA bridge to be configured.
-The only supported devices are the TTY-class devices,
-.Li com1
+The only supported devices are the TTY-class devices
+.Ar com1
 and
-.Li com2 .
+.Ar com2
+and the boot ROM device
+.Ar bootrom .
 .It Fl m Ar size Ns Op Ar K|k|M|m|G|g|T|t
 Guest physical memory size in bytes.
 This must be the same size that was given to
@@ -122,7 +124,7 @@ Force the guest virtual CPU to exit when
 .It Fl s Ar slot,emulation Ns Op , Ns Ar conf
 Configure a virtual PCI slot and function.
 .Pp
-.Nm bhyve
+.Nm
 provides PCI bus emulation and virtual devices that can be attached to
 slots on the bus.
 There are 32 available slots, with the option of providing up to 8 functions
@@ -134,11 +136,19 @@ per slot.
 .Pp
 The
 .Ar pcislot
-value is 0 to 31. The optional function value is 0 to 7. The optional
+value is 0 to 31.
+The optional
+.Ar function
+value is 0 to 7.
+The optional
 .Ar bus
 value is 0 to 255.
-If not specified, the function value defaults to 0.
-If not specified, the bus value defaults to 0.
+If not specified, the
+.Ar function
+value defaults to 0.
+If not specified, the
+.Ar bus
+value defaults to 0.
 .It Ar emulation
 .Bl -tag -width 10n
 .It Li hostbridge | Li amd_hostbridge
@@ -165,8 +175,8 @@ AHCI controller attached to a SATA hard-
 .It Li uart
 PCI 16550 serial device.
 .It Li lpc
-LPC PCI-ISA bridge with COM1 and COM2 16550 serial ports. The LPC bridge
-emulation can only be configured on bus 0.
+LPC PCI-ISA bridge with COM1 and COM2 16550 serial ports and a boot ROM.
+The LPC bridge emulation can only be configured on bus 0.
 .El
 .It Op Ar conf
 This optional parameter describes the backend for device emulations.
@@ -193,8 +203,13 @@ format.
 .Pp
 Block storage devices:
 .Bl -tag -width 10n
-.It Pa /filename Ns Oo , Ns Li nocache Oc Ns Oo , Ns Li direct Oc Ns Oo , Ns Li ro Oc
-.It Pa /dev/xxx Ns Oo , Ns Ar nocache Oc Ns Oo , Ns Ar direct Oc Ns Oo , Ns Ar ro Oc
+.It Pa /filename Ns Oo , Ns Ar block-device-options Oc
+.It Pa /dev/xxx Ns Oo , Ns Ar block-device-options Oc
+.El
+.Pp
+The
+.Ar block-device-options
+are:
 .Bl -tag -width 8n
 .It Li nocache
 Open the file with
@@ -204,25 +219,31 @@ Open the file using
 .Dv O_SYNC .
 .It Li ro
 Force the file to be opened read-only.
-.El
-.Pp
-The
-.Li nocache ,
-.Li direct ,
-and
-.Li ro
-options are not available for virtio block devices.
+.It Li sectorsize= Ns Ar logical Ns Oo / Ns Ar physical Oc
+Specify the logical and physical sector sizes of the emulated disk.
+The physical sector size is optional and is equal to the logical sector size
+if not explicitly specified.
 .El
 .Pp
 TTY devices:
 .Bl -tag -width 10n
 .It Li stdio
 Connect the serial port to the standard input and output of
-the bhyve process.
+the
+.Nm
+process.
 .It Pa /dev/xxx
 Use the host TTY device for serial port I/O.
 .El
 .Pp
+Boot ROM device:
+.Bl -tag -width 10n
+.It Pa romfile
+Map
+.Ar romfile
+in the guest address space reserved for boot firmware.
+.El
+.Pp
 Pass-through devices:
 .Bl -tag -width 10n
 .It Ns Ar slot Ns / Ns Ar bus Ns / Ns Ar function
@@ -234,11 +255,17 @@ and
 numbers.
 .El
 .Pp
+Guest memory must be wired using the
+.Fl S
+option when a pass-through device is configured.
+.Pp
 The host device must have been reserved at boot-time using the
 .Va pptdev
 loader variable as described in
 .Xr vmm 4 .
 .El
+.It Fl S
+Wire guest memory.
 .It Fl u
 RTC keeps UTC time.
 .It Fl U Ar uuid
@@ -248,7 +275,8 @@ in the guest's System Management BIOS Sy
 By default a UUID is generated from the host's hostname and
 .Ar vmname .
 .It Fl w
-Ignore accesses to unimplemented Model Specific Registers (MSRs). This is intended for debug purposes.
+Ignore accesses to unimplemented Model Specific Registers (MSRs).
+This is intended for debug purposes.
 .It Fl W
 Force virtio PCI device emulations to use MSI interrupts instead of MSI-X
 interrupts.
@@ -263,7 +291,7 @@ This should be the same as that created 
 .El
 .Sh EXAMPLES
 The guest operating system must have been loaded with
-.Xr bhyveload 4
+.Xr bhyveload 8
 or a similar boot loader before
 .Xr bhyve 4
 can be run.
@@ -291,9 +319,9 @@ Run an 8GB quad-CPU virtual machine with
 CD-ROM, a single virtio network port, an AMD hostbridge, and the console
 port connected to an
 .Xr nmdm 4
-null-model device.
+null-modem device.
 .Bd -literal -offset indent
-bhyve -c 4 \e\
+bhyve -c 4 \\
   -s 0,amd_hostbridge -s 1,lpc \\
   -s 1:0,ahci-hd,/images/disk.1 \\
   -s 1:1,ahci-hd,/images/disk.2 \\
@@ -303,7 +331,7 @@ bhyve -c 4 \e\
   -s 1:5,ahci-hd,/images/disk.6 \\
   -s 1:6,ahci-hd,/images/disk.7 \\
   -s 1:7,ahci-hd,/images/disk.8 \\
-  -s 2,ahci-cd,/images.install.iso \\
+  -s 2,ahci-cd,/images/install.iso \\
   -s 3,virtio-net,tap0 \\
   -l com1,/dev/nmdm0A \\
   -A -H -P -m 8G

Modified: user/marcel/libvdsk/bhyve/bhyverun.c
==============================================================================
--- user/marcel/libvdsk/bhyve/bhyverun.c	Fri Aug 21 02:42:14 2015	(r286978)
+++ user/marcel/libvdsk/bhyve/bhyverun.c	Fri Aug 21 05:02:26 2015	(r286979)
@@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$");
 #include <pthread.h>
 #include <pthread_np.h>
 #include <sysexits.h>
+#include <stdbool.h>
 
 #include <machine/vmm.h>
 #include <vmmapi.h>
@@ -100,7 +101,7 @@ static struct vm_exit vmexit[VM_MAXCPU];
 
 struct bhyvestats {
         uint64_t        vmexit_bogus;
-        uint64_t        vmexit_bogus_switch;
+	uint64_t	vmexit_reqidle;
         uint64_t        vmexit_hlt;
         uint64_t        vmexit_pause;
         uint64_t        vmexit_mtrap;
@@ -122,7 +123,7 @@ usage(int code)
 {
 
         fprintf(stderr,
-                "Usage: %s [-abehuwxACHPWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n"
+                "Usage: %s [-abehuwxACHPSWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n"
 		"       %*s [-m mem] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] <vm>\n"
 		"       -a: local apic is in xAPIC mode (deprecated)\n"
 		"       -A: create ACPI tables\n"
@@ -137,6 +138,7 @@ usage(int code)
 		"       -p: pin 'vcpu' to 'hostcpu'\n"
 		"       -P: vmexit from the guest on pause\n"
 		"       -s: <slot,driver,configinfo> PCI slot config\n"
+		"       -S: guest memory cannot be swapped\n"
 		"       -u: RTC keeps UTC time\n"
 		"       -U: uuid\n"
 		"       -w: ignore unimplemented MSRs\n"
@@ -325,8 +327,10 @@ vmexit_inout(struct vmctx *ctx, struct v
 
 	error = emulate_inout(ctx, vcpu, vme, strictio);
 	if (error) {
-		fprintf(stderr, "Unhandled %s%c 0x%04x\n", in ? "in" : "out",
-		    bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port);
+		fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n",
+		    in ? "in" : "out",
+		    bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'),
+		    port, vmexit->rip);
 		return (VMEXIT_ABORT);
 	} else {
 		return (VMEXIT_CONTINUE);
@@ -459,6 +463,17 @@ vmexit_bogus(struct vmctx *ctx, struct v
 }
 
 static int
+vmexit_reqidle(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+
+	assert(vmexit->inst_length == 0);
+
+	stats.vmexit_reqidle++;
+
+	return (VMEXIT_CONTINUE);
+}
+
+static int
 vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 
@@ -495,22 +510,27 @@ vmexit_mtrap(struct vmctx *ctx, struct v
 static int
 vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
-	int err;
+	int err, i;
+	struct vie *vie;
+
 	stats.vmexit_inst_emul++;
 
+	vie = &vmexit->u.inst_emul.vie;
 	err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa,
-	    &vmexit->u.inst_emul.vie, &vmexit->u.inst_emul.paging);
+	    vie, &vmexit->u.inst_emul.paging);
 
 	if (err) {
-		if (err == EINVAL) {
-			fprintf(stderr,
-			    "Failed to emulate instruction at 0x%lx\n", 
-			    vmexit->rip);
-		} else if (err == ESRCH) {
+		if (err == ESRCH) {
 			fprintf(stderr, "Unhandled memory access to 0x%lx\n",
 			    vmexit->u.inst_emul.gpa);
 		}
 
+		fprintf(stderr, "Failed to emulate instruction [");
+		for (i = 0; i < vie->num_valid; i++) {
+			fprintf(stderr, "0x%02x%s", vie->inst[i],
+			    i != (vie->num_valid - 1) ? " " : "");
+		}
+		fprintf(stderr, "] at 0x%lx\n", vmexit->rip);
 		return (VMEXIT_ABORT);
 	}
 
@@ -564,6 +584,7 @@ static vmexit_handler_t handler[VM_EXITC
 	[VM_EXITCODE_VMX]    = vmexit_vmx,
 	[VM_EXITCODE_SVM]    = vmexit_svm,
 	[VM_EXITCODE_BOGUS]  = vmexit_bogus,
+	[VM_EXITCODE_REQIDLE] = vmexit_reqidle,
 	[VM_EXITCODE_RDMSR]  = vmexit_rdmsr,
 	[VM_EXITCODE_WRMSR]  = vmexit_wrmsr,
 	[VM_EXITCODE_MTRAP]  = vmexit_mtrap,
@@ -681,26 +702,82 @@ fbsdrun_set_capabilities(struct vmctx *c
 	vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1);
 }
 
+static struct vmctx *
+do_open(const char *vmname)
+{
+	struct vmctx *ctx;
+	int error;
+	bool reinit, romboot;
+
+	reinit = romboot = false;
+
+	if (lpc_bootrom())
+		romboot = true;
+
+	error = vm_create(vmname);
+	if (error) {
+		if (errno == EEXIST) {
+			if (romboot) {
+				reinit = true;
+			} else {
+				/*
+				 * The virtual machine has been setup by the
+				 * userspace bootloader.
+				 */
+			}
+		} else {
+			perror("vm_create");
+			exit(1);
+		}
+	} else {
+		if (!romboot) {
+			/*
+			 * If the virtual machine was just created then a
+			 * bootrom must be configured to boot it.
+			 */
+			fprintf(stderr, "virtual machine cannot be booted\n");
+			exit(1);
+		}
+	}
+
+	ctx = vm_open(vmname);
+	if (ctx == NULL) {
+		perror("vm_open");
+		exit(1);
+	}
+
+	if (reinit) {
+		error = vm_reinit(ctx);
+		if (error) {
+			perror("vm_reinit");
+			exit(1);
+		}
+	}
+	return (ctx);
+}
+
 int
 main(int argc, char *argv[])
 {
 	int c, error, gdb_port, err, bvmcons;
-	int dump_guest_memory, max_vcpus, mptgen;
+	int max_vcpus, mptgen, memflags;
 	int rtc_localtime;
 	struct vmctx *ctx;
 	uint64_t rip;
 	size_t memsize;
+	char *optstr;
 
 	bvmcons = 0;
-	dump_guest_memory = 0;
 	progname = basename(argv[0]);
 	gdb_port = 0;
 	guest_ncpus = 1;
 	memsize = 256 * MB;
 	mptgen = 1;
 	rtc_localtime = 1;
+	memflags = 0;
 
-	while ((c = getopt(argc, argv, "abehuwxACHIPWYp:g:c:s:m:l:U:")) != -1) {
+	optstr = "abehuwxACHIPSWYp:g:c:s:m:l:U:";
+	while ((c = getopt(argc, argv, optstr)) != -1) {
 		switch (c) {
 		case 'a':
 			x2apic_mode = 0;
@@ -721,7 +798,7 @@ main(int argc, char *argv[])
 			guest_ncpus = atoi(optarg);
 			break;
 		case 'C':
-			dump_guest_memory = 1;
+			memflags |= VM_MEM_F_INCORE;
 			break;
 		case 'g':
 			gdb_port = atoi(optarg);
@@ -737,6 +814,9 @@ main(int argc, char *argv[])
 				exit(1);
 			else
 				break;
+		case 'S':
+			memflags |= VM_MEM_F_WIRED;
+			break;
                 case 'm':
 			error = vm_parse_memsize(optarg, &memsize);
 			if (error)
@@ -791,10 +871,10 @@ main(int argc, char *argv[])
 		usage(1);
 
 	vmname = argv[0];
+	ctx = do_open(vmname);
 
-	ctx = vm_open(vmname);
-	if (ctx == NULL) {
-		perror("vm_open");
+	if (guest_ncpus < 1) {
+		fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus);
 		exit(1);
 	}
 
@@ -807,11 +887,10 @@ main(int argc, char *argv[])
 
 	fbsdrun_set_capabilities(ctx, BSP);
 
-	if (dump_guest_memory)
-		vm_set_memflags(ctx, VM_MEM_F_INCORE);
+	vm_set_memflags(ctx, memflags);
 	err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL);
 	if (err) {
-		fprintf(stderr, "Unable to setup memory (%d)\n", err);
+		fprintf(stderr, "Unable to setup memory (%d)\n", errno);
 		exit(1);
 	}
 
@@ -841,6 +920,16 @@ main(int argc, char *argv[])
 	if (bvmcons)
 		init_bvmcons();
 
+	if (lpc_bootrom()) {
+		if (vm_set_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, 1)) {
+			fprintf(stderr, "ROM boot failed: unrestricted guest "
+			    "capability not available\n");
+			exit(1);
+		}
+		error = vcpu_reset(ctx, BSP);
+		assert(error == 0);
+	}
+
 	error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip);
 	assert(error == 0);
 

Modified: user/marcel/libvdsk/bhyve/block_if.c
==============================================================================
--- user/marcel/libvdsk/bhyve/block_if.c	Fri Aug 21 02:42:14 2015	(r286978)
+++ user/marcel/libvdsk/bhyve/block_if.c	Fri Aug 21 05:02:26 2015	(r286979)
@@ -53,16 +53,19 @@ __FBSDID("$FreeBSD$");
 
 #define BLOCKIF_SIG	0xb109b109
 
-#define BLOCKIF_MAXREQ	33
+#define BLOCKIF_NUMTHR	8
+#define BLOCKIF_MAXREQ	(64 + BLOCKIF_NUMTHR)
 
 enum blockop {
 	BOP_READ,
 	BOP_WRITE,
-	BOP_FLUSH
+	BOP_FLUSH,
+	BOP_DELETE
 };
 
 enum blockstat {
 	BST_FREE,
+	BST_BLOCK,
 	BST_PEND,
 	BST_BUSY,
 	BST_DONE
@@ -74,21 +77,22 @@ struct blockif_elem {
 	enum blockop	     be_op;
 	enum blockstat	     be_status;
 	pthread_t            be_tid;
+	off_t		     be_block;
 };
 
 struct blockif_ctxt {
 	int			bc_magic;
+	int			bc_candelete;
 	int			bc_rdonly;
-	pthread_t		bc_btid;
+	int			bc_closing;
+	pthread_t		bc_btid[BLOCKIF_NUMTHR];
         pthread_mutex_t		bc_mtx;
         pthread_cond_t		bc_cond;
-	int			bc_closing;
 
 	/* Request elements and free/pending/busy queues */
 	TAILQ_HEAD(, blockif_elem) bc_freeq;       
 	TAILQ_HEAD(, blockif_elem) bc_pendq;
 	TAILQ_HEAD(, blockif_elem) bc_busyq;
-	u_int			bc_req_count;
 	struct blockif_elem	bc_reqs[BLOCKIF_MAXREQ];
 };
 
@@ -107,69 +111,95 @@ static int
 blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq,
 		enum blockop op)
 {
-	struct blockif_elem *be;
-
-	assert(bc->bc_req_count < BLOCKIF_MAXREQ);
+	struct blockif_elem *be, *tbe;
+	off_t off;
+	int i;
 
 	be = TAILQ_FIRST(&bc->bc_freeq);
 	assert(be != NULL);
 	assert(be->be_status == BST_FREE);
-
 	TAILQ_REMOVE(&bc->bc_freeq, be, be_link);
-	be->be_status = BST_PEND;
 	be->be_req = breq;
 	be->be_op = op;
+	switch (op) {
+	case BOP_READ:
+	case BOP_WRITE:
+	case BOP_DELETE:
+		off = breq->br_offset;
+		for (i = 0; i < breq->br_iovcnt; i++)
+			off += breq->br_iov[i].iov_len;
+		break;
+	default:
+		off = OFF_MAX;
+	}
+	be->be_block = off;
+	TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) {
+		if (tbe->be_block == breq->br_offset)
+			break;
+	}
+	if (tbe == NULL) {
+		TAILQ_FOREACH(tbe, &bc->bc_busyq, be_link) {
+			if (tbe->be_block == breq->br_offset)
+				break;
+		}
+	}
+	if (tbe == NULL)
+		be->be_status = BST_PEND;
+	else
+		be->be_status = BST_BLOCK;
 	TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link);
-
-	bc->bc_req_count++;
-
-	return (0);
+	return (be->be_status == BST_PEND);
 }
 
 static int
-blockif_dequeue(struct blockif_ctxt *bc, struct blockif_elem **bep)
+blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep)
 {
 	struct blockif_elem *be;
 
-	if (bc->bc_req_count == 0)
-		return (ENOENT);
-
-	be = TAILQ_FIRST(&bc->bc_pendq);
-	assert(be != NULL);
-	assert(be->be_status == BST_PEND);
+	TAILQ_FOREACH(be, &bc->bc_pendq, be_link) {
+		if (be->be_status == BST_PEND)
+			break;
+		assert(be->be_status == BST_BLOCK);
+	}
+	if (be == NULL)
+		return (0);
 	TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
 	be->be_status = BST_BUSY;
-	be->be_tid = bc->bc_btid;
+	be->be_tid = t;
 	TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link);
-
 	*bep = be;
-
-	return (0);
+	return (1);
 }
 
 static void
 blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be)
 {
-	assert(be->be_status == BST_DONE);
+	struct blockif_elem *tbe;
 
-	TAILQ_REMOVE(&bc->bc_busyq, be, be_link);
+	if (be->be_status == BST_DONE || be->be_status == BST_BUSY)
+		TAILQ_REMOVE(&bc->bc_busyq, be, be_link);
+	else
+		TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
+	TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) {
+		if (tbe->be_req->br_offset == be->be_block)
+			tbe->be_status = BST_PEND;
+	}
 	be->be_tid = 0;
 	be->be_status = BST_FREE;
 	be->be_req = NULL;
 	TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link);
-
-	bc->bc_req_count--;
 }
 
 static void
-blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be)
+blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf)
 {
 	struct blockif_req *br;
 	int err;
 
 	br = be->be_req;
+	if (br->br_iovcnt <= 1)
+		buf = NULL;
 	err = 0;
-
 	switch (be->be_op) {
 	case BOP_READ:
 		err = vdsk_read(bc, br->br_iov, br->br_iovcnt, br->br_offset);
@@ -180,6 +210,14 @@ blockif_proc(struct blockif_ctxt *bc, st
 	case BOP_FLUSH:
 		err = vdsk_flush(bc);
 		break;
+	case BOP_DELETE:
+		if (!bc->bc_candelete)
+			err = EOPNOTSUPP;
+		else if (bc->bc_rdonly)
+			err = EROFS;
+		else
+			err = EOPNOTSUPP;
+		break;
 	default:
 		err = EINVAL;
 		break;
@@ -195,28 +233,27 @@ blockif_thr(void *arg)
 {
 	struct blockif_ctxt *bc;
 	struct blockif_elem *be;
+	pthread_t t;
 
 	bc = arg;
+	t = pthread_self();
 
+	pthread_mutex_lock(&bc->bc_mtx);
 	for (;;) {
-		pthread_mutex_lock(&bc->bc_mtx);
-		while (!blockif_dequeue(bc, &be)) {
+		while (blockif_dequeue(bc, t, &be)) {
 			pthread_mutex_unlock(&bc->bc_mtx);
-			blockif_proc(bc, be);
+			blockif_proc(bc, be, NULL);
 			pthread_mutex_lock(&bc->bc_mtx);
 			blockif_complete(bc, be);
 		}
-		pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx);
-		pthread_mutex_unlock(&bc->bc_mtx);
-
-		/*
-		 * Check ctxt status here to see if exit requested
-		 */
+		/* Check ctxt status here to see if exit requested */
 		if (bc->bc_closing)
-			pthread_exit(NULL);
+			break;
+		pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx);
 	}
+	pthread_mutex_unlock(&bc->bc_mtx);
 
-	/* Not reached */
+	pthread_exit(NULL);
 	return (NULL);
 }
 
@@ -256,13 +293,14 @@ struct blockif_ctxt *
 blockif_open(const char *optstr, const char *ident)
 {
 	char tname[MAXCOMLEN + 1];
-	char *nopt, *xopts;
+	char *nopt, *xopts, *cp;
 	struct blockif_ctxt *bc;
 	int extra, i;
-	int nocache, sync, ro;
+	int nocache, sync, ro, candelete, ssopt, pssopt;
 
 	pthread_once(&blockif_once, blockif_init);
 
+	ssopt = 0;
 	nocache = 0;
 	sync = 0;
 	ro = 0;
@@ -271,16 +309,25 @@ blockif_open(const char *optstr, const c
 	 * The first element in the optstring is always a pathname.
 	 * Optional elements follow
 	 */
-	nopt = strdup(optstr);
-	for (xopts = strtok(nopt, ",");
-	     xopts != NULL;
-	     xopts = strtok(NULL, ",")) {
-		if (!strcmp(xopts, "nocache"))
+	nopt = xopts = strdup(optstr);
+	while (xopts != NULL) {
+		cp = strsep(&xopts, ",");
+		if (cp == nopt)		/* file or device pathname */
+			continue;
+		else if (!strcmp(cp, "nocache"))
 			nocache = 1;
-		else if (!strcmp(xopts, "sync"))
+		else if (!strcmp(cp, "sync") || !strcmp(cp, "direct"))
 			sync = 1;
-		else if (!strcmp(xopts, "ro"))
+		else if (!strcmp(cp, "ro"))
 			ro = 1;
+		else if (sscanf(cp, "sectorsize=%d/%d", &ssopt, &pssopt) == 2)
+			;
+		else if (sscanf(cp, "sectorsize=%d", &ssopt) == 1)
+			pssopt = ssopt;
+		else {
+			fprintf(stderr, "Invalid device option \"%s\"\n", cp);
+			return (NULL);
+		}
 	}
 
 	extra = 0;
@@ -302,22 +349,23 @@ blockif_open(const char *optstr, const c
 	}
 
 	bc->bc_magic = BLOCKIF_SIG;
+	bc->bc_candelete = candelete;
 	bc->bc_rdonly = ro;
 	pthread_mutex_init(&bc->bc_mtx, NULL);
 	pthread_cond_init(&bc->bc_cond, NULL);
 	TAILQ_INIT(&bc->bc_freeq);
 	TAILQ_INIT(&bc->bc_pendq);
 	TAILQ_INIT(&bc->bc_busyq);
-	bc->bc_req_count = 0;
 	for (i = 0; i < BLOCKIF_MAXREQ; i++) {
 		bc->bc_reqs[i].be_status = BST_FREE;
 		TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link);
 	}
 
-	pthread_create(&bc->bc_btid, NULL, blockif_thr, bc);
-
-	snprintf(tname, sizeof(tname), "blk-%s", ident);
-	pthread_set_name_np(bc->bc_btid, tname);
+	for (i = 0; i < BLOCKIF_NUMTHR; i++) {
+		pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc);
+		snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i);
+		pthread_set_name_np(bc->bc_btid[i], tname);
+	}
 
 	return (bc);
 }
@@ -331,13 +379,13 @@ blockif_request(struct blockif_ctxt *bc,
 	err = 0;
 
 	pthread_mutex_lock(&bc->bc_mtx);
-	if (bc->bc_req_count < BLOCKIF_MAXREQ) {
+	if (!TAILQ_EMPTY(&bc->bc_freeq)) {
 		/*
 		 * Enqueue and inform the block i/o thread
 		 * that there is work available
 		 */
-		blockif_enqueue(bc, breq, op);
-		pthread_cond_signal(&bc->bc_cond);
+		if (blockif_enqueue(bc, breq, op))
+			pthread_cond_signal(&bc->bc_cond);
 	} else {
 		/*
 		 * Callers are not allowed to enqueue more than
@@ -377,6 +425,14 @@ blockif_flush(struct blockif_ctxt *bc, s
 }
 
 int
+blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	return (blockif_request(bc, breq, BOP_DELETE));
+}
+
+int
 blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq)
 {
 	struct blockif_elem *be;
@@ -395,11 +451,7 @@ blockif_cancel(struct blockif_ctxt *bc, 
 		/*
 		 * Found it.
 		 */
-		TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
-		be->be_status = BST_FREE;
-		be->be_req = NULL;
-		TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link);
-		bc->bc_req_count--;
+		blockif_complete(bc, be);
 		pthread_mutex_unlock(&bc->bc_mtx);
 
 		return (0);
@@ -460,7 +512,7 @@ int
 blockif_close(struct blockif_ctxt *bc)
 {
 	void *jval;
-	int err;
+	int err, i;
 
 	err = 0;
 
@@ -469,9 +521,12 @@ blockif_close(struct blockif_ctxt *bc)
 	/*
 	 * Stop the block i/o thread
 	 */
+	pthread_mutex_lock(&bc->bc_mtx);
 	bc->bc_closing = 1;
-	pthread_cond_signal(&bc->bc_cond);
-	pthread_join(bc->bc_btid, &jval);
+	pthread_mutex_unlock(&bc->bc_mtx);
+	pthread_cond_broadcast(&bc->bc_cond);
+	for (i = 0; i < BLOCKIF_NUMTHR; i++)
+		pthread_join(bc->bc_btid[i], &jval);
 
 	/* XXX Cancel queued i/o's ??? */
 
@@ -552,6 +607,15 @@ blockif_sectsz(struct blockif_ctxt *bc)
 	return (vdsk_sectorsize(bc));
 }
 
+void
+blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	*size = vdsk_sectorsize(bc);
+	*off = 0;
+}
+
 int
 blockif_queuesz(struct blockif_ctxt *bc)
 {
@@ -567,3 +631,11 @@ blockif_is_ro(struct blockif_ctxt *bc)
 	assert(bc->bc_magic == BLOCKIF_SIG);
 	return (bc->bc_rdonly);
 }

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-user mailing list