svn commit: r253440 - head/usr.sbin/bhyve

Peter Grehan grehan at FreeBSD.org
Wed Jul 17 23:37:34 UTC 2013


Author: grehan
Date: Wed Jul 17 23:37:33 2013
New Revision: 253440
URL: http://svnweb.freebsd.org/changeset/base/253440

Log:
  Major rework of the virtio code. Split out common parts, and modify
  the net/block devices accordingly.
  
  Submitted by:	Chris Torek   torek at torek dot net
  Reviewed by:	grehan

Added:
  head/usr.sbin/bhyve/virtio.c   (contents, props changed)
Modified:
  head/usr.sbin/bhyve/Makefile
  head/usr.sbin/bhyve/pci_virtio_block.c
  head/usr.sbin/bhyve/pci_virtio_net.c
  head/usr.sbin/bhyve/virtio.h

Modified: head/usr.sbin/bhyve/Makefile
==============================================================================
--- head/usr.sbin/bhyve/Makefile	Wed Jul 17 23:29:56 2013	(r253439)
+++ head/usr.sbin/bhyve/Makefile	Wed Jul 17 23:37:33 2013	(r253440)
@@ -10,7 +10,7 @@ SRCS=	acpi.c atpic.c bhyverun.c consport
 SRCS+=  ioapic.c mem.c mevent.c mptbl.c
 SRCS+=	pci_emul.c pci_hostbridge.c pci_passthru.c pci_virtio_block.c
 SRCS+=	pci_virtio_net.c pci_uart.c pit_8254.c pmtmr.c post.c rtc.c
-SRCS+=	xmsr.c spinup_ap.c
+SRCS+=	virtio.c xmsr.c spinup_ap.c
 
 .PATH:	${.CURDIR}/../../sys/amd64/vmm
 SRCS+=	vmm_instruction_emul.c

Modified: head/usr.sbin/bhyve/pci_virtio_block.c
==============================================================================
--- head/usr.sbin/bhyve/pci_virtio_block.c	Wed Jul 17 23:29:56 2013	(r253439)
+++ head/usr.sbin/bhyve/pci_virtio_block.c	Wed Jul 17 23:37:33 2013	(r253440)
@@ -53,14 +53,6 @@ __FBSDID("$FreeBSD$");
 
 #define VTBLK_RINGSZ	64
 
-#define VTBLK_CFGSZ	28
-
-#define VTBLK_R_CFG		VTCFG_R_CFG1 
-#define VTBLK_R_CFG_END		VTBLK_R_CFG + VTBLK_CFGSZ -1
-#define VTBLK_R_MAX		VTBLK_R_CFG_END
-
-#define VTBLK_REGSZ		VTBLK_R_MAX+1
-
 #define VTBLK_MAXSEGS	32
 
 #define VTBLK_S_OK	0
@@ -71,28 +63,10 @@ __FBSDID("$FreeBSD$");
  */
 #define VTBLK_S_HOSTCAPS      \
   ( 0x00000004 |	/* host maximum request segments */ \
-    0x10000000 )	/* supports indirect descriptors */
-
-static int use_msix = 1;
-
-struct vring_hqueue {
-	/* Internal state */
-	uint16_t	hq_size;
-	uint16_t	hq_cur_aidx;		/* trails behind 'avail_idx' */
-
-	 /* Host-context pointers to the queue */
-	struct virtio_desc *hq_dtable;
-	uint16_t	*hq_avail_flags;
-	uint16_t	*hq_avail_idx;		/* monotonically increasing */
-	uint16_t	*hq_avail_ring;
-
-	uint16_t	*hq_used_flags;
-	uint16_t	*hq_used_idx;		/* monotonically increasing */
-	struct virtio_used *hq_used_ring;
-};
+    VIRTIO_RING_F_INDIRECT_DESC )	/* indirect descriptors */
 
 /*
- * Config space
+ * Config space "registers"
  */
 struct vtblk_config {
 	uint64_t	vbc_capacity;
@@ -104,7 +78,6 @@ struct vtblk_config {
 	uint32_t	vbc_blk_size;
 	uint32_t	vbc_sectors_max;
 } __packed;
-CTASSERT(sizeof(struct vtblk_config) == VTBLK_CFGSZ);
 
 /*
  * Fixed-size block header
@@ -129,113 +102,69 @@ static int pci_vtblk_debug;
  * Per-device softc
  */
 struct pci_vtblk_softc {
-	struct pci_devinst *vbsc_pi;
+	struct virtio_softc vbsc_vs;
+	struct vqueue_info vbsc_vq;
 	int		vbsc_fd;
-	int		vbsc_status;
-	int		vbsc_isr;
-	int		vbsc_lastq;
-	uint32_t	vbsc_features;
-	uint64_t	vbsc_pfn;
-	struct vring_hqueue vbsc_q;
 	struct vtblk_config vbsc_cfg;	
-	uint16_t	msix_table_idx_req;
-	uint16_t	msix_table_idx_cfg;
 };
-#define	vtblk_ctx(sc)	((sc)->vbsc_pi->pi_vmctx)
-
-/* 
- * Return the size of IO BAR that maps virtio header and device specific
- * region. The size would vary depending on whether MSI-X is enabled or 
- * not
- */ 
-static uint64_t
-pci_vtblk_iosize(struct pci_devinst *pi)
-{
-
-	if (pci_msix_enabled(pi)) 
-		return (VTBLK_REGSZ);
-	else
-		return (VTBLK_REGSZ - (VTCFG_R_CFG1 - VTCFG_R_MSIX));
-}
 
-/*
- * Return the number of available descriptors in the vring taking care
- * of the 16-bit index wraparound.
- */
-static int
-hq_num_avail(struct vring_hqueue *hq)
-{
-	uint16_t ndesc;
-
-	/*
-	 * We're just computing (a-b) in GF(216).
-	 *
-	 * The only glitch here is that in standard C,
-	 * uint16_t promotes to (signed) int when int has
-	 * more than 16 bits (pretty much always now), so
-	 * we have to force it back to unsigned.
-	 */
-	ndesc = (unsigned)*hq->hq_avail_idx - (unsigned)hq->hq_cur_aidx;
-
-	assert(ndesc <= hq->hq_size);
-
-	return (ndesc);
-}
+static void pci_vtblk_reset(void *);
+static void pci_vtblk_notify(void *, struct vqueue_info *);
+static int pci_vtblk_cfgread(void *, int, int, uint32_t *);
+static int pci_vtblk_cfgwrite(void *, int, int, uint32_t);
+
+static struct virtio_consts vtblk_vi_consts = {
+	"vtblk",		/* our name */
+	1,			/* we support 1 virtqueue */
+	sizeof(struct vtblk_config), /* config reg size */
+	pci_vtblk_reset,	/* reset */
+	pci_vtblk_notify,	/* device-wide qnotify */
+	pci_vtblk_cfgread,	/* read PCI config */
+	pci_vtblk_cfgwrite,	/* write PCI config */
+	VTBLK_S_HOSTCAPS,	/* our capabilities */
+};
 
 static void
-pci_vtblk_update_status(struct pci_vtblk_softc *sc, uint32_t value)
+pci_vtblk_reset(void *vsc)
 {
-	if (value == 0) {
-		DPRINTF(("vtblk: device reset requested !\n"));
-		sc->vbsc_isr = 0;
-		sc->msix_table_idx_req = VIRTIO_MSI_NO_VECTOR;
-		sc->msix_table_idx_cfg = VIRTIO_MSI_NO_VECTOR;
-		sc->vbsc_features = 0;
-		sc->vbsc_pfn = 0;
-		sc->vbsc_lastq = 0;
-		memset(&sc->vbsc_q, 0, sizeof(struct vring_hqueue));
-	}
+	struct pci_vtblk_softc *sc = vsc;
 
-	sc->vbsc_status = value;
+	DPRINTF(("vtblk: device reset requested !\n"));
+	vi_reset_dev(&sc->vbsc_vs);
 }
 
 static void
-pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vring_hqueue *hq)
+pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq)
 {
-	struct iovec iov[VTBLK_MAXSEGS];
 	struct virtio_blk_hdr *vbh;
-	struct virtio_desc *vd, *vid;
-	struct virtio_used *vu;
 	uint8_t *status;
-	int i;
+	int i, n;
 	int err;
 	int iolen;
-	int uidx, aidx, didx;
-	int indirect, writeop, type;
+	int writeop, type;
 	off_t offset;
+	struct iovec iov[VTBLK_MAXSEGS + 2];
+	uint16_t flags[VTBLK_MAXSEGS + 2];
 
-	uidx = *hq->hq_used_idx;
-	aidx = hq->hq_cur_aidx;
-	didx = hq->hq_avail_ring[aidx % hq->hq_size];
-	assert(didx >= 0 && didx < hq->hq_size);
-
-	vd = &hq->hq_dtable[didx];
-
-	indirect = ((vd->vd_flags & VRING_DESC_F_INDIRECT) != 0);
-
-	if (indirect) {
-		vid = paddr_guest2host(vtblk_ctx(sc), vd->vd_addr, vd->vd_len);
-		vd = &vid[0];
-	}
+	n = vq_getchain(vq, iov, VTBLK_MAXSEGS + 2, flags);
 
 	/*
-	 * The first descriptor will be the read-only fixed header
+	 * The first descriptor will be the read-only fixed header,
+	 * and the last is for status (hence +2 above and below).
+	 * The remaining iov's are the actual data I/O vectors.
+	 *
+	 * XXX - note - this fails on crash dump, which does a
+	 * VIRTIO_BLK_T_FLUSH with a zero transfer length
 	 */
-	vbh = paddr_guest2host(vtblk_ctx(sc), vd->vd_addr,
-			    sizeof(struct virtio_blk_hdr));
-	assert(vd->vd_len == sizeof(struct virtio_blk_hdr));
-	assert(vd->vd_flags & VRING_DESC_F_NEXT);
-	assert((vd->vd_flags & VRING_DESC_F_WRITE) == 0);
+	assert (n >= 3 && n < VTBLK_MAXSEGS + 2);
+
+	assert((flags[0] & VRING_DESC_F_WRITE) == 0);
+	assert(iov[0].iov_len == sizeof(struct virtio_blk_hdr));
+	vbh = iov[0].iov_base;
+
+	status = iov[--n].iov_base;
+	assert(iov[n].iov_len == 1);
+	assert(flags[n] & VRING_DESC_F_WRITE);
 
 	/*
 	 * XXX
@@ -247,120 +176,44 @@ pci_vtblk_proc(struct pci_vtblk_softc *s
 
 	offset = vbh->vbh_sector * DEV_BSIZE;
 
-	/*
-	 * Build up the iovec based on the guest's data descriptors
-	 */
-	i = iolen = 0;
-	while (1) {
-		if (indirect)
-			vd = &vid[i + 1];	/* skip first indirect desc */
-		else
-			vd = &hq->hq_dtable[vd->vd_next];
-
-		if ((vd->vd_flags & VRING_DESC_F_NEXT) == 0)
-			break;
-
-		if (i == VTBLK_MAXSEGS)
-			break;
-
+	iolen = 0;
+	for (i = 1; i < n; i++) {
 		/*
 		 * - write op implies read-only descriptor,
 		 * - read op implies write-only descriptor,
 		 * therefore test the inverse of the descriptor bit
 		 * to the op.
 		 */
-		assert(((vd->vd_flags & VRING_DESC_F_WRITE) == 0) ==
-		       writeop);
-
-		iov[i].iov_base = paddr_guest2host(vtblk_ctx(sc),
-						   vd->vd_addr,
-						   vd->vd_len);
-		iov[i].iov_len = vd->vd_len;
-		iolen += vd->vd_len;
-		i++;
+		assert(((flags[i] & VRING_DESC_F_WRITE) == 0) == writeop);
+		iolen += iov[i].iov_len;
 	}
 
-	/* Lastly, get the address of the status byte */
-	status = paddr_guest2host(vtblk_ctx(sc), vd->vd_addr, 1);
-	assert(vd->vd_len == 1);
-	assert((vd->vd_flags & VRING_DESC_F_NEXT) == 0);
-	assert(vd->vd_flags & VRING_DESC_F_WRITE);
-
 	DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r", 
-		 writeop ? "write" : "read", iolen, i, offset));
+		 writeop ? "write" : "read", iolen, i - 1, offset));
 
 	if (writeop)
-		err = pwritev(sc->vbsc_fd, iov, i, offset);
+		err = pwritev(sc->vbsc_fd, iov + 1, i - 1, offset);
 	else
-		err = preadv(sc->vbsc_fd, iov, i, offset);
+		err = preadv(sc->vbsc_fd, iov + 1, i - 1, offset);
 
 	*status = err < 0 ? VTBLK_S_IOERR : VTBLK_S_OK;
 
 	/*
-	 * Return the single descriptor back to the host
+	 * Return the descriptor back to the host.
+	 * We wrote 1 byte (our status) to host.
 	 */
-	vu = &hq->hq_used_ring[uidx % hq->hq_size];
-	vu->vu_idx = didx;
-	vu->vu_tlen = 1;
-	hq->hq_cur_aidx++;
-	*hq->hq_used_idx += 1;
-
-	/*
-	 * Generate an interrupt if able
-	 */
-	if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) { 
-		if (use_msix) {
-			pci_generate_msix(sc->vbsc_pi, sc->msix_table_idx_req);	
-		} else if (sc->vbsc_isr == 0) {
-			sc->vbsc_isr = 1;
-			pci_generate_msi(sc->vbsc_pi, 0);
-		}
-	}
+	vq_relchain(vq, 1);
 }
 
 static void
-pci_vtblk_qnotify(struct pci_vtblk_softc *sc)
+pci_vtblk_notify(void *vsc, struct vqueue_info *vq)
 {
-	struct vring_hqueue *hq = &sc->vbsc_q;
-	int ndescs;
+	struct pci_vtblk_softc *sc = vsc;
 
-	while ((ndescs = hq_num_avail(hq)) != 0) {
-		/*
-		 * Run through all the entries, placing them into iovecs and
-		 * sending when an end-of-packet is found
-		 */
- 		pci_vtblk_proc(sc, hq);
- 	}
-}
-
-static void
-pci_vtblk_ring_init(struct pci_vtblk_softc *sc, uint64_t pfn)
-{
-	struct vring_hqueue *hq;
-
-	sc->vbsc_pfn = pfn << VRING_PFN;
-	
-	/*
-	 * Set up host pointers to the various parts of the
-	 * queue
-	 */
-	hq = &sc->vbsc_q;
-	hq->hq_size = VTBLK_RINGSZ;
-
-	hq->hq_dtable = paddr_guest2host(vtblk_ctx(sc), pfn << VRING_PFN,
-					 vring_size(VTBLK_RINGSZ));
-	hq->hq_avail_flags =  (uint16_t *)(hq->hq_dtable + hq->hq_size);
-	hq->hq_avail_idx = hq->hq_avail_flags + 1;
-	hq->hq_avail_ring = hq->hq_avail_flags + 2;
-	hq->hq_used_flags = (uint16_t *)roundup2((uintptr_t)hq->hq_avail_ring,
-						 VRING_ALIGN);
-	hq->hq_used_idx = hq->hq_used_flags + 1;
-	hq->hq_used_ring = (struct virtio_used *)(hq->hq_used_flags + 2);
-
-	/*
-	 * Initialize queue indexes
-	 */
-	hq->hq_cur_aidx = 0;
+	vq_startchains(vq);
+	while (vq_has_descs(vq))
+		pci_vtblk_proc(sc, vq);
+	vq_endchains(vq, 1);	/* Generate interrupt if appropriate. */
 }
 
 static int
@@ -371,6 +224,7 @@ pci_vtblk_init(struct vmctx *ctx, struct
 	off_t size;	
 	int fd;
 	int sectsz;
+	int use_msix;
 	const char *env_msi;
 
 	if (opts == NULL) {
@@ -412,10 +266,14 @@ pci_vtblk_init(struct vmctx *ctx, struct
 	sc = malloc(sizeof(struct pci_vtblk_softc));
 	memset(sc, 0, sizeof(struct pci_vtblk_softc));
 
-	pi->pi_arg = sc;
-	sc->vbsc_pi = pi;
+	/* record fd of storage device/file */
 	sc->vbsc_fd = fd;
 
+	/* init virtio softc and virtqueues */
+	vi_softc_linkup(&sc->vbsc_vs, &vtblk_vi_consts, sc, pi, &sc->vbsc_vq);
+	sc->vbsc_vq.vq_qsize = VTBLK_RINGSZ;
+	/* sc->vbsc_vq.vq_notify = we have no per-queue notify */
+
 	/* setup virtio block config space */
 	sc->vbsc_cfg.vbc_capacity = size / sectsz;
 	sc->vbsc_cfg.vbc_seg_max = VTBLK_MAXSEGS;
@@ -426,206 +284,51 @@ pci_vtblk_init(struct vmctx *ctx, struct
 	sc->vbsc_cfg.vbc_geom_s = 0;
 	sc->vbsc_cfg.vbc_sectors_max = 0;
 
-	/* initialize config space */
+	/*
+	 * Should we move some of this into virtio.c?  Could
+	 * have the device, class, and subdev_0 as fields in
+	 * the virtio constants structure.
+	 */
 	pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK);
 	pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
 	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
 	pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK);
 
+	use_msix = 1;
 	if ((env_msi = getenv("BHYVE_USE_MSI"))) {
 		if (strcasecmp(env_msi, "yes") == 0)
 			use_msix = 0;
 	} 
-
-	if (use_msix) {
-		/* MSI-X Support */
-		sc->msix_table_idx_req = VIRTIO_MSI_NO_VECTOR;	
-		sc->msix_table_idx_cfg = VIRTIO_MSI_NO_VECTOR;	
-		
-		if (pci_emul_add_msixcap(pi, 2, 1))
-			return (1);
-	} else {
-		/* MSI Support */	
-		pci_emul_add_msicap(pi, 1);
-	}	
-	
-	pci_emul_alloc_bar(pi, 0, PCIBAR_IO, VTBLK_REGSZ);
-
+	if (vi_intr_init(&sc->vbsc_vs, 1, use_msix))
+		return (1);
+	vi_set_io_bar(&sc->vbsc_vs, 0);
 	return (0);
 }
 
-static uint64_t
-vtblk_adjust_offset(struct pci_devinst *pi, uint64_t offset)
-{
-	/*
-	 * Device specific offsets used by guest would change 
-	 * based on whether MSI-X capability is enabled or not
-	 */ 
-	if (!pci_msix_enabled(pi)) {
-		if (offset >= VTCFG_R_MSIX) 
-			return (offset + (VTCFG_R_CFG1 - VTCFG_R_MSIX));
-	}
-
-	return (offset);
-}
-
-static void
-pci_vtblk_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
-		int baridx, uint64_t offset, int size, uint64_t value)
+static int
+pci_vtblk_cfgwrite(void *vsc, int offset, int size, uint32_t value)
 {
-	struct pci_vtblk_softc *sc = pi->pi_arg;
-
-	if (use_msix) {
-		if (baridx == pci_msix_table_bar(pi) ||
-		    baridx == pci_msix_pba_bar(pi)) {
-			pci_emul_msix_twrite(pi, offset, size, value);
-			return;
-		}
-	}
-	
-	assert(baridx == 0);
 
-	if (offset + size > pci_vtblk_iosize(pi)) {
-		DPRINTF(("vtblk_write: 2big, offset %ld size %d\n",
-			 offset, size));
-		return;
-	}
-
-	offset = vtblk_adjust_offset(pi, offset);
-	
-	switch (offset) {
-	case VTCFG_R_GUESTCAP:
-		assert(size == 4);
-		sc->vbsc_features = value & VTBLK_S_HOSTCAPS;
-		break;
-	case VTCFG_R_PFN:
-		assert(size == 4);
-		pci_vtblk_ring_init(sc, value);
-		break;
-	case VTCFG_R_QSEL:
-		assert(size == 2);
-		sc->vbsc_lastq = value;
-		break;
-	case VTCFG_R_QNOTIFY:
-		assert(size == 2);
-		assert(value == 0);
-		pci_vtblk_qnotify(sc);
-		break;
-	case VTCFG_R_STATUS:
-		assert(size == 1);
-		pci_vtblk_update_status(sc, value);
-		break;
-	case VTCFG_R_CFGVEC:
-		assert(size == 2);
-		sc->msix_table_idx_cfg = value;	
-		break;	
-	case VTCFG_R_QVEC:
-		assert(size == 2);
-		sc->msix_table_idx_req = value;
-		break;	
-	case VTCFG_R_HOSTCAP:
-	case VTCFG_R_QNUM:
-	case VTCFG_R_ISR:
-	case VTBLK_R_CFG ... VTBLK_R_CFG_END:
-		DPRINTF(("vtblk: write to readonly reg %ld\n\r", offset));
-		break;
-	default:
-		DPRINTF(("vtblk: unknown i/o write offset %ld\n\r", offset));
-		value = 0;
-		break;
-	}
+	DPRINTF(("vtblk: write to readonly reg %d\n\r", offset));
+	return (1);
 }
 
-uint64_t
-pci_vtblk_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
-	       int baridx, uint64_t offset, int size)
+static int
+pci_vtblk_cfgread(void *vsc, int offset, int size, uint32_t *retval)
 {
-	struct pci_vtblk_softc *sc = pi->pi_arg;
+	struct pci_vtblk_softc *sc = vsc;
 	void *ptr;
-	uint32_t value;
 
-	if (use_msix) {
-		if (baridx == pci_msix_table_bar(pi) ||
-		    baridx == pci_msix_pba_bar(pi)) {
-			return (pci_emul_msix_tread(pi, offset, size));
-		}
-	}
-
-	assert(baridx == 0);
-
-	if (offset + size > pci_vtblk_iosize(pi)) {
-		DPRINTF(("vtblk_read: 2big, offset %ld size %d\n",
-			 offset, size));
-		return (0);
-	}
-
-	offset = vtblk_adjust_offset(pi, offset);
-
-	switch (offset) {
-	case VTCFG_R_HOSTCAP:
-		assert(size == 4);
-		value = VTBLK_S_HOSTCAPS;
-		break;
-	case VTCFG_R_GUESTCAP:
-		assert(size == 4);
-		value = sc->vbsc_features; /* XXX never read ? */
-		break;
-	case VTCFG_R_PFN:
-		assert(size == 4);
-		value = sc->vbsc_pfn >> VRING_PFN;
-		break;
-	case VTCFG_R_QNUM:
-		value = (sc->vbsc_lastq == 0) ? VTBLK_RINGSZ: 0;
-		break;
-	case VTCFG_R_QSEL:
-		assert(size == 2);
-		value = sc->vbsc_lastq; /* XXX never read ? */
-		break;
-	case VTCFG_R_QNOTIFY:
-		assert(size == 2);
-		value = 0; /* XXX never read ? */
-		break;
-	case VTCFG_R_STATUS:
-		assert(size == 1);
-		value = sc->vbsc_status;
-		break;
-	case VTCFG_R_ISR:
-		assert(size == 1);
-		value = sc->vbsc_isr;
-		sc->vbsc_isr = 0;     /* a read clears this flag */
-		break;
-	case VTCFG_R_CFGVEC:
-		assert(size == 2);
-		value = sc->msix_table_idx_cfg;
-		break;
-	case VTCFG_R_QVEC:
-		assert(size == 2);
-		value = sc->msix_table_idx_req;
-		break;	
-	case VTBLK_R_CFG ... VTBLK_R_CFG_END:
-		assert(size + offset <= (VTBLK_R_CFG_END + 1));
-		ptr = (uint8_t *)&sc->vbsc_cfg + offset - VTBLK_R_CFG;
-		if (size == 1) {
-			value = *(uint8_t *) ptr;
-		} else if (size == 2) {
-			value = *(uint16_t *) ptr;
-		} else {
-			value = *(uint32_t *) ptr;
-		}
-		break;
-	default:
-		DPRINTF(("vtblk: unknown i/o read offset %ld\n\r", offset));
-		value = 0;
-		break;
-	}
-
-	return (value);
+	/* our caller has already verified offset and size */
+	ptr = (uint8_t *)&sc->vbsc_cfg + offset;
+	memcpy(retval, ptr, size);
+	return (0);
 }
 
 struct pci_devemu pci_de_vblk = {
 	.pe_emu =	"virtio-blk",
 	.pe_init =	pci_vtblk_init,
-	.pe_barwrite =	pci_vtblk_write,
-	.pe_barread =	pci_vtblk_read
+	.pe_barwrite =	vi_pci_write,
+	.pe_barread =	vi_pci_read
 };
 PCI_EMUL_SET(pci_de_vblk);

Modified: head/usr.sbin/bhyve/pci_virtio_net.c
==============================================================================
--- head/usr.sbin/bhyve/pci_virtio_net.c	Wed Jul 17 23:29:56 2013	(r253439)
+++ head/usr.sbin/bhyve/pci_virtio_net.c	Wed Jul 17 23:37:33 2013	(r253440)
@@ -59,56 +59,49 @@ __FBSDID("$FreeBSD$");
 #define VTNET_MAXSEGS	32
 
 /*
- * PCI config-space register offsets
+ * Host capabilities.  Note that we only offer a few of these.
  */
-#define VTNET_R_CFG0	24
-#define VTNET_R_CFG1	25
-#define VTNET_R_CFG2	26
-#define VTNET_R_CFG3	27
-#define VTNET_R_CFG4	28
-#define VTNET_R_CFG5	29
-#define VTNET_R_CFG6	30
-#define VTNET_R_CFG7	31
-#define VTNET_R_MAX	31
+#define	VIRTIO_NET_F_CSUM	(1 <<  0) /* host handles partial cksum */
+#define	VIRTIO_NET_F_GUEST_CSUM	(1 <<  1) /* guest handles partial cksum */
+#define	VIRTIO_NET_F_MAC	(1 <<  5) /* host supplies MAC */
+#define	VIRTIO_NET_F_GSO_DEPREC	(1 <<  6) /* deprecated: host handles GSO */
+#define	VIRTIO_NET_F_GUEST_TSO4	(1 <<  7) /* guest can rcv TSOv4 */
+#define	VIRTIO_NET_F_GUEST_TSO6	(1 <<  8) /* guest can rcv TSOv6 */
+#define	VIRTIO_NET_F_GUEST_ECN	(1 <<  9) /* guest can rcv TSO with ECN */
+#define	VIRTIO_NET_F_GUEST_UFO	(1 << 10) /* guest can rcv UFO */
+#define	VIRTIO_NET_F_HOST_TSO4	(1 << 11) /* host can rcv TSOv4 */
+#define	VIRTIO_NET_F_HOST_TSO6	(1 << 12) /* host can rcv TSOv6 */
+#define	VIRTIO_NET_F_HOST_ECN	(1 << 13) /* host can rcv TSO with ECN */
+#define	VIRTIO_NET_F_HOST_UFO	(1 << 14) /* host can rcv UFO */
+#define	VIRTIO_NET_F_MRG_RXBUF	(1 << 15) /* host can merge RX buffers */
+#define	VIRTIO_NET_F_STATUS	(1 << 16) /* config status field available */
+#define	VIRTIO_NET_F_CTRL_VQ	(1 << 17) /* control channel available */
+#define	VIRTIO_NET_F_CTRL_RX	(1 << 18) /* control channel RX mode support */
+#define	VIRTIO_NET_F_CTRL_VLAN	(1 << 19) /* control channel VLAN filtering */
+#define	VIRTIO_NET_F_GUEST_ANNOUNCE \
+				(1 << 21) /* guest can send gratuitous pkts */
 
-#define VTNET_REGSZ	VTNET_R_MAX+1
+#define VTNET_S_HOSTCAPS      \
+  ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_STATUS | \
+    VIRTIO_F_NOTIFY_ON_EMPTY)
 
 /*
- * Host capabilities
+ * PCI config-space "registers"
  */
-#define VTNET_S_HOSTCAPS      \
-  ( 0x00000020 |	/* host supplies MAC */ \
-    0x00008000 |	/* host can merge Rx buffers */ \
-    0x00010000 |	/* config status available */ \
-    VIRTIO_F_NOTIFY_ON_EMPTY)
+struct virtio_net_config {
+	uint8_t  mac[6];
+	uint16_t status;
+} __packed;
 
 /*
  * Queue definitions.
  */
 #define VTNET_RXQ	0
 #define VTNET_TXQ	1
-#define VTNET_CTLQ	2
+#define VTNET_CTLQ	2	/* NB: not yet supported */
 
 #define VTNET_MAXQ	3
 
-static int use_msix = 1;
-
-struct vring_hqueue {
-	/* Internal state */
-	uint16_t	hq_size;
-	uint16_t	hq_cur_aidx;		/* trails behind 'avail_idx' */
-
-	 /* Host-context pointers to the queue */
-	struct virtio_desc *hq_dtable;
-	uint16_t	*hq_avail_flags;
-	uint16_t	*hq_avail_idx;		/* monotonically increasing */
-	uint16_t	*hq_avail_ring;
-
-	uint16_t	*hq_used_flags;
-	uint16_t	*hq_used_idx;		/* monotonically increasing */
-	struct virtio_used *hq_used_ring;
-};
-
 /*
  * Fixed network header size
  */
@@ -133,23 +126,17 @@ static int pci_vtnet_debug;
  * Per-device softc
  */
 struct pci_vtnet_softc {
-	struct pci_devinst *vsc_pi;
+	struct virtio_softc vsc_vs;
+	struct vqueue_info vsc_queues[VTNET_MAXQ - 1];
 	pthread_mutex_t vsc_mtx;
 	struct mevent	*vsc_mevp;
 
-	int		vsc_curq;
-	int		vsc_status;
-	int		vsc_isr;
 	int		vsc_tapfd;
 	int		vsc_rx_ready;
-	int		resetting;
+	volatile int	resetting;	/* set and checked outside lock */
 
 	uint32_t	vsc_features;
-	uint8_t		vsc_macaddr[6];
-
-	uint64_t	vsc_pfn[VTNET_MAXQ];
-	struct	vring_hqueue vsc_hq[VTNET_MAXQ];
-	uint16_t	vsc_msix_table_idx[VTNET_MAXQ];
+	struct virtio_net_config vsc_config;
 
 	pthread_mutex_t	rx_mtx;
 	int		rx_in_progress;
@@ -159,73 +146,22 @@ struct pci_vtnet_softc {
 	pthread_cond_t	tx_cond;
 	int		tx_in_progress;
 };
-#define	vtnet_ctx(sc)		((sc)->vsc_pi->pi_vmctx)
-#define	notify_on_empty(sc)	((sc)->vsc_features & VIRTIO_F_NOTIFY_ON_EMPTY)
-
-/*
- * Return the size of IO BAR that maps virtio header and device specific
- * region. The size would vary depending on whether MSI-X is enabled or
- * not.
- */
-static uint64_t
-pci_vtnet_iosize(struct pci_devinst *pi)
-{
-	if (pci_msix_enabled(pi))
-		return (VTNET_REGSZ);
-	else
-		return (VTNET_REGSZ - (VTCFG_R_CFG1 - VTCFG_R_MSIX));
-}
-
-/*
- * Return the number of available descriptors in the vring taking care
- * of the 16-bit index wraparound.
- */
-static int
-hq_num_avail(struct vring_hqueue *hq)
-{
-	uint16_t ndesc;
 
-	/*
-	 * We're just computing (a-b) mod 2^16
-	 *
-	 * The only glitch here is that in standard C,
-	 * uint16_t promotes to (signed) int when int has
-	 * more than 16 bits (pretty much always now), so
-	 * we have to force it back to unsigned.
-	 */
-	ndesc = (unsigned)*hq->hq_avail_idx - (unsigned)hq->hq_cur_aidx;
-
-	assert(ndesc <= hq->hq_size);
-
-	return (ndesc);
-}
-
-static uint16_t
-pci_vtnet_qsize(int qnum)
-{
-	/* XXX no ctl queue currently */
-	if (qnum == VTNET_CTLQ) {
-		return (0);
-	}
-
-	/* XXX fixed currently. Maybe different for tx/rx/ctl */
-	return (VTNET_RINGSZ);
-}
-
-static void
-pci_vtnet_ring_reset(struct pci_vtnet_softc *sc, int ring)
-{
-	struct vring_hqueue *hq;
-
-	assert(ring < VTNET_MAXQ);
-
-	hq = &sc->vsc_hq[ring];
-
-	/*
-	 * Reset all soft state
-	 */
-	hq->hq_cur_aidx = 0;
-}
+static void pci_vtnet_reset(void *);
+/* static void pci_vtnet_notify(void *, struct vqueue_info *); */
+static int pci_vtnet_cfgread(void *, int, int, uint32_t *);
+static int pci_vtnet_cfgwrite(void *, int, int, uint32_t);
+
+static struct virtio_consts vtnet_vi_consts = {
+	"vtnet",		/* our name */
+	VTNET_MAXQ - 1,		/* we currently support 2 virtqueues */
+	sizeof(struct virtio_net_config), /* config reg size */
+	pci_vtnet_reset,	/* reset */
+	NULL,			/* device-wide qnotify -- not used */
+	pci_vtnet_cfgread,	/* read PCI config */
+	pci_vtnet_cfgwrite,	/* write PCI config */
+	VTNET_S_HOSTCAPS,	/* our capabilities */
+};
 
 /*
  * If the transmit thread is active then stall until it is done.
@@ -260,48 +196,27 @@ pci_vtnet_rxwait(struct pci_vtnet_softc 
 }
 
 static void
-pci_vtnet_update_status(struct pci_vtnet_softc *sc, uint32_t value)
+pci_vtnet_reset(void *vsc)
 {
-	int i;
-
-	if (value == 0) {
-		DPRINTF(("vtnet: device reset requested !\n"));
-		
-		sc->resetting = 1;
-
-		/*
-		 * Wait for the transmit and receive threads to finish their
-		 * processing.
-		 */
-		pci_vtnet_txwait(sc);
-		pci_vtnet_rxwait(sc);
+	struct pci_vtnet_softc *sc = vsc;
 
-		sc->vsc_rx_ready = 0;
-		pci_vtnet_ring_reset(sc, VTNET_RXQ);
-		pci_vtnet_ring_reset(sc, VTNET_TXQ);
+	DPRINTF(("vtnet: device reset requested !\n"));
 
-		for (i = 0; i < VTNET_MAXQ; i++)
-			sc->vsc_msix_table_idx[i] = VIRTIO_MSI_NO_VECTOR;
+	sc->resetting = 1;
 
-		sc->vsc_isr = 0;
-		sc->vsc_features = 0;
+	/*
+	 * Wait for the transmit and receive threads to finish their
+	 * processing.
+	 */
+	pci_vtnet_txwait(sc);
+	pci_vtnet_rxwait(sc);
 
-		sc->resetting = 0;
-	}
+	sc->vsc_rx_ready = 0;
 
-	sc->vsc_status = value;
-}
+	/* now reset rings, MSI-X vectors, and negotiated capabilities */
+	vi_reset_dev(&sc->vsc_vs);
 
-static void
-vtnet_generate_interrupt(struct pci_vtnet_softc *sc, int qidx)
-{
-
-	if (use_msix) {
-		pci_generate_msix(sc->vsc_pi, sc->vsc_msix_table_idx[qidx]);
-	} else {
-		sc->vsc_isr |= 1;
-		pci_generate_msi(sc->vsc_pi, 0);
-	}
+	sc->resetting = 0;
 }
 
 /*
@@ -311,7 +226,7 @@ static void
 pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
 		 int len)
 {
-	char pad[60];
+	static char pad[60]; /* all zero bytes */
 
 	if (sc->vsc_tapfd == -1)
 		return;
@@ -322,7 +237,6 @@ pci_vtnet_tap_tx(struct pci_vtnet_softc 
 	 * there is always an extra iov available by the caller.
 	 */
 	if (len < 60) {
-		memset(pad, 0, 60 - len);
 		iov[iovcnt].iov_base = pad;
 		iov[iovcnt].iov_len = 60 - len;
 		iovcnt++;
@@ -342,15 +256,11 @@ static uint8_t dummybuf[2048];
 static void
 pci_vtnet_tap_rx(struct pci_vtnet_softc *sc)
 {
-	struct virtio_desc *vd;
-	struct virtio_used *vu;
-	struct vring_hqueue *hq;
+	struct vqueue_info *vq;
 	struct virtio_net_rxhdr *vrx;
 	uint8_t *buf;
-	int i;
 	int len;
-	int ndescs;
-	int didx, uidx, aidx;	/* descriptor, avail and used index */
+	struct iovec iov;
 
 	/*
 	 * Should never be called without a valid tap fd
@@ -370,47 +280,45 @@ pci_vtnet_tap_rx(struct pci_vtnet_softc 
 	}
 
 	/*
-	 * Calculate the number of available rx buffers
+	 * Check for available rx buffers
 	 */
-	hq = &sc->vsc_hq[VTNET_RXQ];
-
-	ndescs = hq_num_avail(hq);
-
-	if (ndescs == 0) {
+	vq = &sc->vsc_queues[VTNET_RXQ];
+	vq_startchains(vq);
+	if (!vq_has_descs(vq)) {
 		/*
-		 * Drop the packet and try later
+		 * Drop the packet and try later.  Interrupt on
+		 * empty, if that's negotiated.
 		 */
 		(void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
-
-		if (notify_on_empty(sc))
-			vtnet_generate_interrupt(sc, VTNET_RXQ);
-
+		vq_endchains(vq, 1);
 		return;
 	}
 
-	aidx = hq->hq_cur_aidx;
-	uidx = *hq->hq_used_idx;
-	for (i = 0; i < ndescs; i++) {
+	do {
 		/*
-		 * 'aidx' indexes into the an array of descriptor indexes
+		 * Get descriptor chain, which should have just
+		 * one descriptor in it.
+		 * ??? allow guests to use multiple descs?
 		 */
-		didx = hq->hq_avail_ring[aidx % hq->hq_size];
-		assert(didx >= 0 && didx < hq->hq_size);
-
-		vd = &hq->hq_dtable[didx];
+		assert(vq_getchain(vq, &iov, 1, NULL) == 1);
 
 		/*
 		 * Get a pointer to the rx header, and use the
 		 * data immediately following it for the packet buffer.
 		 */
-		vrx = paddr_guest2host(vtnet_ctx(sc), vd->vd_addr, vd->vd_len);
+		vrx = iov.iov_base;
 		buf = (uint8_t *)(vrx + 1);
 
 		len = read(sc->vsc_tapfd, buf,
-			   vd->vd_len - sizeof(struct virtio_net_rxhdr));
+			   iov.iov_len - sizeof(struct virtio_net_rxhdr));
 
 		if (len < 0 && errno == EWOULDBLOCK) {
-			break;
+			/*
+			 * No more packets, but still some avail ring
+			 * entries.  Interrupt if needed/appropriate.
+			 */
+			vq_endchains(vq, 0);
+			return;
 		}
 
 		/*
@@ -422,23 +330,13 @@ pci_vtnet_tap_rx(struct pci_vtnet_softc 
 		vrx->vrh_bufs = 1;
 
 		/*
-		 * Write this descriptor into the used ring
+		 * Release this chain and handle more chains.
 		 */
-		vu = &hq->hq_used_ring[uidx % hq->hq_size];
-		vu->vu_idx = didx;
-		vu->vu_tlen = len + sizeof(struct virtio_net_rxhdr);
-		uidx++;
-		aidx++;
-	}
+		vq_relchain(vq, len + sizeof(struct virtio_net_rxhdr));
+	} while (vq_has_descs(vq));

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-head mailing list