git: 2a514d377b37 - main - bhyve/virtio-scsi: Preallocate all I/O requests

Reply: Dag-Erling_Smørgrav : "Re: git: 2a514d377b37 - main - bhyve/virtio-scsi: Preallocate all I/O requests"
Go to: [ bottom of page ] [ top of archives ] [ this month ]
From: Ed Maste <emaste_at_FreeBSD.org>
Date: Wed, 04 Mar 2026 17:30:20 UTC
The branch main has been updated by emaste:

URL: https://cgit.FreeBSD.org/src/commit/?id=2a514d377b37a30356c5c27a07b4fb0fd6df4881

commit 2a514d377b37a30356c5c27a07b4fb0fd6df4881
Author:     Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org>
AuthorDate: 2025-10-15 21:07:37 +0000
Commit:     Ed Maste <emaste@FreeBSD.org>
CommitDate: 2026-03-04 17:30:02 +0000

    bhyve/virtio-scsi: Preallocate all I/O requests
    
    By preallocating all I/O requests on all queues, we can take most
    allocations out of the hot I/O code paths and simplify the code
    significantly. While here, make sure we check all allocations for
    success and make sure to handle failures gracefully.
    
    Additionally, check for I/O request validity as early as possible,
    and return illegal requests immediately.
    
    Reviewed by:    markj
    Differential Revision: https://reviews.freebsd.org/D53469
---
 usr.sbin/bhyve/pci_virtio_scsi.c | 476 +++++++++++++++++++++++++++++----------
 1 file changed, 359 insertions(+), 117 deletions(-)

diff --git a/usr.sbin/bhyve/pci_virtio_scsi.c b/usr.sbin/bhyve/pci_virtio_scsi.c
index 5eaf55496467..758e2643f6a0 100644
--- a/usr.sbin/bhyve/pci_virtio_scsi.c
+++ b/usr.sbin/bhyve/pci_virtio_scsi.c
@@ -102,13 +102,46 @@ struct pci_vtscsi_config {
 	uint32_t max_lun;
 } __attribute__((packed));
 
+/*
+ * I/O request state and I/O request queues
+ *
+ * In addition to the control queue and notification queues, each virtio-scsi
+ * device instance has at least one I/O request queue, the state of which is
+ * is kept in an array of struct pci_vtscsi_queue in the device softc.
+ *
+ * Currently there is only one I/O request queue, but it's trivial to support
+ * more than one.
+ *
+ * Each pci_vtscsi_queue has VTSCSI_RINGSZ pci_vtscsi_request structures pre-
+ * allocated on vsq_free_requests. For each I/O request coming in on the I/O
+ * virtqueue, the request queue handler will take a pci_vtscsi_request off
+ * vsq_free_requests, fills in the data from the I/O virtqueue, puts it on
+ * vsq_requests, and signals vsq_cv.
+ *
+ * There are VTSCSI_THR_PER_Q worker threads for each pci_vtscsi_queue which
+ * wait on vsq_cv. When signalled, they repeatedly take one pci_vtscsi_request
+ * off vsq_requests, construct a ctl_io for it, and hand it off to the CTL ioctl
+ * Interface, which processes it synchronously. After completion of the request,
+ * the pci_vtscsi_request is re-initialized and put back onto vsq_free_requests.
+ *
+ * The worker threads exit when vsq_cv is signalled after vsw_exiting was set.
+ *
+ * There are three mutexes to coordinate the accesses to an I/O request queue:
+ * - vsq_rmtx protects vsq_requests and must be held when waiting on vsq_cv
+ * - vsq_fmtx protects vsq_free_requests
+ * - vsq_qmtx must be held when operating on the underlying virtqueue, vsq_vq
+ */
+STAILQ_HEAD(pci_vtscsi_req_queue, pci_vtscsi_request);
+
 struct pci_vtscsi_queue {
 	struct pci_vtscsi_softc *         vsq_sc;
 	struct vqueue_info *              vsq_vq;
-	pthread_mutex_t                   vsq_mtx;
+	pthread_mutex_t                   vsq_rmtx;
+	pthread_mutex_t                   vsq_fmtx;
 	pthread_mutex_t                   vsq_qmtx;
 	pthread_cond_t                    vsq_cv;
-	STAILQ_HEAD(, pci_vtscsi_request) vsq_requests;
+	struct pci_vtscsi_req_queue       vsq_requests;
+	struct pci_vtscsi_req_queue       vsq_free_requests;
 	LIST_HEAD(, pci_vtscsi_worker)    vsq_workers;
 };
 
@@ -124,8 +157,15 @@ struct pci_vtscsi_request {
 	struct iovec              vsr_iov[VTSCSI_MAXSEG + SPLIT_IOV_ADDL_IOV];
 	struct iovec *            vsr_iov_in;
 	struct iovec *            vsr_iov_out;
+	struct iovec *            vsr_data_iov_in;
+	struct iovec *            vsr_data_iov_out;
+	struct pci_vtscsi_req_cmd_rd * vsr_cmd_rd;
+	struct pci_vtscsi_req_cmd_wr * vsr_cmd_wr;
+	union ctl_io *            vsr_ctl_io;
 	size_t                    vsr_niov_in;
 	size_t                    vsr_niov_out;
+	size_t                    vsr_data_niov_in;
+	size_t                    vsr_data_niov_out;
 	uint32_t                  vsr_idx;
 	STAILQ_ENTRY(pci_vtscsi_request) vsr_link;
 };
@@ -237,13 +277,27 @@ static void pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *,
     struct pci_vtscsi_ctrl_tmf *);
 static void pci_vtscsi_an_handle(struct pci_vtscsi_softc *,
     struct pci_vtscsi_ctrl_an *);
-static int pci_vtscsi_request_handle(struct pci_vtscsi_queue *, struct iovec *,
-    size_t, struct iovec *, size_t);
+
+static struct pci_vtscsi_request *pci_vtscsi_alloc_request(
+    struct pci_vtscsi_softc *);
+static void pci_vtscsi_free_request(struct pci_vtscsi_request *);
+static struct pci_vtscsi_request *pci_vtscsi_get_request(
+    struct pci_vtscsi_req_queue *);
+static void pci_vtscsi_put_request(struct pci_vtscsi_req_queue *,
+    struct pci_vtscsi_request *);
+static void pci_vtscsi_queue_request(struct pci_vtscsi_softc *,
+    struct vqueue_info *);
+static void pci_vtscsi_return_request(struct pci_vtscsi_queue *,
+    struct pci_vtscsi_request *, int);
+static int pci_vtscsi_request_handle(struct pci_vtscsi_softc *,
+    struct pci_vtscsi_request *);
+
 static void pci_vtscsi_controlq_notify(void *, struct vqueue_info *);
 static void pci_vtscsi_eventq_notify(void *, struct vqueue_info *);
 static void pci_vtscsi_requestq_notify(void *, struct vqueue_info *);
 static int  pci_vtscsi_init_queue(struct pci_vtscsi_softc *,
     struct pci_vtscsi_queue *, int);
+static void pci_vtscsi_destroy_queue(struct pci_vtscsi_queue *);
 static int pci_vtscsi_init(struct pci_devinst *, nvlist_t *);
 
 static struct virtio_consts vtscsi_vi_consts = {
@@ -262,37 +316,33 @@ pci_vtscsi_proc(void *arg)
 {
 	struct pci_vtscsi_worker *worker = (struct pci_vtscsi_worker *)arg;
 	struct pci_vtscsi_queue *q = worker->vsw_queue;
-	struct pci_vtscsi_request *req;
+	struct pci_vtscsi_softc *sc = q->vsq_sc;
 	int iolen;
 
 	for (;;) {
-		pthread_mutex_lock(&q->vsq_mtx);
+		struct pci_vtscsi_request *req;
 
-		while (STAILQ_EMPTY(&q->vsq_requests)
-		    && !worker->vsw_exiting)
-			pthread_cond_wait(&q->vsq_cv, &q->vsq_mtx);
+		pthread_mutex_lock(&q->vsq_rmtx);
 
-		if (worker->vsw_exiting)
-			break;
+		while (STAILQ_EMPTY(&q->vsq_requests) && !worker->vsw_exiting)
+			pthread_cond_wait(&q->vsq_cv, &q->vsq_rmtx);
 
-		req = STAILQ_FIRST(&q->vsq_requests);
-		STAILQ_REMOVE_HEAD(&q->vsq_requests, vsr_link);
+		if (worker->vsw_exiting) {
+			pthread_mutex_unlock(&q->vsq_rmtx);
+			return (NULL);
+		}
 
-		pthread_mutex_unlock(&q->vsq_mtx);
-		iolen = pci_vtscsi_request_handle(q, req->vsr_iov_in,
-		    req->vsr_niov_in, req->vsr_iov_out, req->vsr_niov_out);
+		req = pci_vtscsi_get_request(&q->vsq_requests);
+		pthread_mutex_unlock(&q->vsq_rmtx);
 
-		pthread_mutex_lock(&q->vsq_qmtx);
-		vq_relchain(q->vsq_vq, req->vsr_idx, iolen);
-		vq_endchains(q->vsq_vq, 0);
-		pthread_mutex_unlock(&q->vsq_qmtx);
+		DPRINTF("I/O request lun %d, data_niov_in %zu, data_niov_out "
+		    "%zu", pci_vtscsi_get_lun(req->vsr_cmd_rd->lun),
+		    req->vsr_data_niov_in, req->vsr_data_niov_out);
 
-		DPRINTF("request <idx=%d> completed", req->vsr_idx);
-		free(req);
-	}
+		iolen = pci_vtscsi_request_handle(sc, req);
 
-	pthread_mutex_unlock(&q->vsq_mtx);
-	return (NULL);
+		pci_vtscsi_return_request(q, req, iolen);
+	}
 }
 
 static void
@@ -395,6 +445,14 @@ pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *sc,
 	int err;
 
 	io = ctl_scsi_alloc_io(sc->vss_iid);
+	if (io == NULL) {
+		WPRINTF("failed to allocate ctl_io: err=%d (%s)",
+		    errno, strerror(errno));
+
+		tmf->response = VIRTIO_SCSI_S_FAILURE;
+		return;
+	}
+
 	ctl_scsi_zero_io(io);
 
 	io->io_hdr.io_type = CTL_IO_TASK;
@@ -460,34 +518,112 @@ pci_vtscsi_an_handle(struct pci_vtscsi_softc *sc __unused,
 {
 }
 
-static int
-pci_vtscsi_request_handle(struct pci_vtscsi_queue *q, struct iovec *iov_in,
-    size_t niov_in, struct iovec *iov_out, size_t niov_out)
+static struct pci_vtscsi_request *
+pci_vtscsi_alloc_request(struct pci_vtscsi_softc *sc)
 {
-	struct pci_vtscsi_softc *sc = q->vsq_sc;
-	struct pci_vtscsi_req_cmd_rd *cmd_rd = NULL;
-	struct pci_vtscsi_req_cmd_wr *cmd_wr;
-	struct iovec *data_iov_in, *data_iov_out;
-	union ctl_io *io;
-	size_t data_niov_in, data_niov_out;
-	void *ext_data_ptr = NULL;
-	uint32_t ext_data_len = 0, ext_sg_entries = 0;
-	int err, nxferred;
+	struct pci_vtscsi_request *req;
+
+	req = calloc(1, sizeof(struct pci_vtscsi_request));
+	if (req == NULL)
+		goto fail;
+
+	req->vsr_cmd_rd = calloc(1, VTSCSI_IN_HEADER_LEN(sc));
+	if (req->vsr_cmd_rd == NULL)
+		goto fail;
+	req->vsr_cmd_wr = calloc(1, VTSCSI_OUT_HEADER_LEN(sc));
+	if (req->vsr_cmd_wr == NULL)
+		goto fail;
+
+	req->vsr_ctl_io = ctl_scsi_alloc_io(sc->vss_iid);
+	if (req->vsr_ctl_io == NULL)
+		goto fail;
+	ctl_scsi_zero_io(req->vsr_ctl_io);
+
+	return (req);
+
+fail:
+	EPRINTLN("failed to allocate request: %s", strerror(errno));
+
+	if (req != NULL)
+		pci_vtscsi_free_request(req);
+
+	return (NULL);
+}
+
+static void
+pci_vtscsi_free_request(struct pci_vtscsi_request *req)
+{
+	if (req->vsr_ctl_io != NULL)
+		ctl_scsi_free_io(req->vsr_ctl_io);
+	if (req->vsr_cmd_rd != NULL)
+		free(req->vsr_cmd_rd);
+	if (req->vsr_cmd_wr != NULL)
+		free(req->vsr_cmd_wr);
+
+	free(req);
+}
+
+static struct pci_vtscsi_request *
+pci_vtscsi_get_request(struct pci_vtscsi_req_queue *req_queue)
+{
+	struct pci_vtscsi_request *req;
+
+	assert(!STAILQ_EMPTY(req_queue));
+
+	req = STAILQ_FIRST(req_queue);
+	STAILQ_REMOVE_HEAD(req_queue, vsr_link);
+
+	return (req);
+}
+
+static void
+pci_vtscsi_put_request(struct pci_vtscsi_req_queue *req_queue,
+    struct pci_vtscsi_request *req)
+{
+	STAILQ_INSERT_TAIL(req_queue, req, vsr_link);
+}
+
+static void
+pci_vtscsi_queue_request(struct pci_vtscsi_softc *sc, struct vqueue_info *vq)
+{
+	struct pci_vtscsi_queue *q = &sc->vss_queues[vq->vq_num - 2];
+	struct pci_vtscsi_request *req;
+	struct vi_req vireq;
+	int n;
+
+	pthread_mutex_lock(&q->vsq_fmtx);
+	req = pci_vtscsi_get_request(&q->vsq_free_requests);
+	assert(req != NULL);
+	pthread_mutex_unlock(&q->vsq_fmtx);
+
+	n = vq_getchain(vq, req->vsr_iov, VTSCSI_MAXSEG, &vireq);
+	assert(n >= 1 && n <= VTSCSI_MAXSEG);
+
+	req->vsr_idx = vireq.idx;
+	req->vsr_queue = q;
+	req->vsr_iov_in = &req->vsr_iov[0];
+	req->vsr_niov_in = vireq.readable;
+	req->vsr_iov_out = &req->vsr_iov[vireq.readable];
+	req->vsr_niov_out = vireq.writable;
 
 	/*
 	 * Make sure we got at least enough space for the VirtIO-SCSI
 	 * command headers. If not, return this request immediately.
 	 */
-	if (check_iov_len(iov_out, niov_out,
+	if (check_iov_len(req->vsr_iov_out, req->vsr_niov_out,
 	    VTSCSI_OUT_HEADER_LEN(q->vsq_sc)) == false) {
 		WPRINTF("ignoring request with insufficient output");
-		return (0);
+		req->vsr_cmd_wr->response = VIRTIO_SCSI_S_FAILURE;
+		pci_vtscsi_return_request(q, req, 1);
+		return;
 	}
 
-	if (check_iov_len(iov_in, niov_in,
+	if (check_iov_len(req->vsr_iov_in, req->vsr_niov_in,
 	    VTSCSI_IN_HEADER_LEN(q->vsq_sc)) == false) {
 		WPRINTF("ignoring request with incomplete header");
-		return (0);
+		req->vsr_cmd_wr->response = VIRTIO_SCSI_S_FAILURE;
+		pci_vtscsi_return_request(q, req, 1);
+		return;
 	}
 
 	/*
@@ -500,8 +636,8 @@ pci_vtscsi_request_handle(struct pci_vtscsi_queue *q, struct iovec *iov_in,
 	 * by one to make room for a new iovec covering the first part of the
 	 * output data portion.
 	 */
-	data_iov_out = split_iov(iov_out, &niov_out,
-	    VTSCSI_OUT_HEADER_LEN(q->vsq_sc), &data_niov_out);
+	req->vsr_data_iov_out = split_iov(req->vsr_iov_out, &req->vsr_niov_out,
+	    VTSCSI_OUT_HEADER_LEN(q->vsq_sc), &req->vsr_data_niov_out);
 
 	/*
 	 * Similarly, to not overwrite the first iovec of the output section,
@@ -509,43 +645,106 @@ pci_vtscsi_request_handle(struct pci_vtscsi_queue *q, struct iovec *iov_in,
 	 * cover the entire iovec array (both input and the already split output
 	 * sections).
 	 */
-	niov_in += niov_out + data_niov_out;
+	req->vsr_niov_in += req->vsr_niov_out + req->vsr_data_niov_out;
 
-	data_iov_in = split_iov(iov_in, &niov_in,
-	    VTSCSI_IN_HEADER_LEN(q->vsq_sc), &data_niov_in);
+	req->vsr_data_iov_in = split_iov(req->vsr_iov_in, &req->vsr_niov_in,
+	    VTSCSI_IN_HEADER_LEN(q->vsq_sc), &req->vsr_data_niov_in);
 
 	/*
 	 * And of course we now have to adjust data_niov_in accordingly.
 	 */
-	data_niov_in -= niov_out + data_niov_out;
+	req->vsr_data_niov_in -= req->vsr_niov_out + req->vsr_data_niov_out;
+
+	/*
+	 * iov_to_buf() realloc()s the buffer given as 3rd argument to the
+	 * total size of all iovecs it will be copying. Since we've just
+	 * truncated it in split_iov(), we know that the size will be
+	 * VTSCSI_IN_HEADER_LEN(q->vsq_sc).
+	 *
+	 * Since we pre-allocated req->vsr_cmd_rd to this size, the realloc()
+	 * should never fail.
+	 *
+	 * This will have to change if we begin allowing config space writes
+	 * to change sense size.
+	 */
+	assert(iov_to_buf(req->vsr_iov_in, req->vsr_niov_in,
+	    (void **)&req->vsr_cmd_rd) == VTSCSI_IN_HEADER_LEN(q->vsq_sc));
 
-	iov_to_buf(iov_in, niov_in, (void **)&cmd_rd);
+	pthread_mutex_lock(&q->vsq_rmtx);
+	pci_vtscsi_put_request(&q->vsq_requests, req);
+	pthread_cond_signal(&q->vsq_cv);
+	pthread_mutex_unlock(&q->vsq_rmtx);
 
-	cmd_wr = calloc(1, VTSCSI_OUT_HEADER_LEN(sc));
-	io = ctl_scsi_alloc_io(sc->vss_iid);
-	ctl_scsi_zero_io(io);
+	DPRINTF("request <idx=%d> enqueued", vireq.idx);
+}
+
+static void
+pci_vtscsi_return_request(struct pci_vtscsi_queue *q,
+    struct pci_vtscsi_request *req, int iolen)
+{
+	void *cmd_rd = req->vsr_cmd_rd;
+	void *cmd_wr = req->vsr_cmd_wr;
+	void *ctl_io = req->vsr_ctl_io;
+	int idx = req->vsr_idx;
+
+	DPRINTF("request <idx=%d> completed, response %d", idx,
+	    req->vsr_cmd_wr->response);
+
+	iolen += buf_to_iov(cmd_wr, VTSCSI_OUT_HEADER_LEN(q->vsq_sc),
+	    req->vsr_iov_out, req->vsr_niov_out);
+
+	ctl_scsi_zero_io(req->vsr_ctl_io);
+
+	memset(cmd_rd, 0, VTSCSI_IN_HEADER_LEN(q->vsq_sc));
+	memset(cmd_wr, 0, VTSCSI_OUT_HEADER_LEN(q->vsq_sc));
+	memset(req, 0, sizeof(struct pci_vtscsi_request));
+
+	req->vsr_cmd_rd = cmd_rd;
+	req->vsr_cmd_wr = cmd_wr;
+	req->vsr_ctl_io = ctl_io;
+
+	pthread_mutex_lock(&q->vsq_fmtx);
+	pci_vtscsi_put_request(&q->vsq_free_requests, req);
+	pthread_mutex_unlock(&q->vsq_fmtx);
+
+	pthread_mutex_lock(&q->vsq_qmtx);
+	vq_relchain(q->vsq_vq, idx, iolen);
+	vq_endchains(q->vsq_vq, 0);
+	pthread_mutex_unlock(&q->vsq_qmtx);
+}
+
+static int
+pci_vtscsi_request_handle(struct pci_vtscsi_softc *sc,
+    struct pci_vtscsi_request *req)
+{
+	union ctl_io *io = req->vsr_ctl_io;
+	void *ext_data_ptr = NULL;
+	uint32_t ext_data_len = 0, ext_sg_entries = 0;
+	int err, nxferred;
 
 	io->io_hdr.nexus.initid = sc->vss_iid;
-	io->io_hdr.nexus.targ_lun = pci_vtscsi_get_lun(cmd_rd->lun);
+	io->io_hdr.nexus.targ_lun = pci_vtscsi_get_lun(req->vsr_cmd_rd->lun);
 
 	io->io_hdr.io_type = CTL_IO_SCSI;
 
-	if (data_niov_in > 0) {
-		ext_data_ptr = (void *)data_iov_in;
-		ext_sg_entries = data_niov_in;
-		ext_data_len = count_iov(data_iov_in, data_niov_in);
+	if (req->vsr_data_niov_in > 0) {
+		ext_data_ptr = (void *)req->vsr_data_iov_in;
+		ext_sg_entries = req->vsr_data_niov_in;
+		ext_data_len = count_iov(req->vsr_data_iov_in,
+		    req->vsr_data_niov_in);
 		io->io_hdr.flags |= CTL_FLAG_DATA_OUT;
-	} else if (data_niov_out > 0) {
-		ext_data_ptr = (void *)data_iov_out;
-		ext_sg_entries = data_niov_out;
-		ext_data_len = count_iov(data_iov_out, data_niov_out);
+	} else if (req->vsr_data_niov_out > 0) {
+		ext_data_ptr = (void *)req->vsr_data_iov_out;
+		ext_sg_entries = req->vsr_data_niov_out;
+		ext_data_len = count_iov(req->vsr_data_iov_out,
+		    req->vsr_data_niov_out);
 		io->io_hdr.flags |= CTL_FLAG_DATA_IN;
 	}
 
 	io->scsiio.sense_len = sc->vss_config.sense_size;
-	io->scsiio.tag_num = cmd_rd->id;
+	io->scsiio.tag_num = req->vsr_cmd_rd->id;
 	io->io_hdr.flags |= CTL_FLAG_USER_TAG;
-	switch (cmd_rd->task_attr) {
+	switch (req->vsr_cmd_rd->task_attr) {
 	case VIRTIO_SCSI_S_ORDERED:
 		io->scsiio.tag_type = CTL_TAG_ORDERED;
 		break;
@@ -565,7 +764,7 @@ pci_vtscsi_request_handle(struct pci_vtscsi_queue *q, struct iovec *iov_in,
 	io->scsiio.ext_data_len = ext_data_len;
 	io->scsiio.ext_data_filled = 0;
 	io->scsiio.cdb_len = sc->vss_config.cdb_size;
-	memcpy(io->scsiio.cdb, cmd_rd->cdb, sc->vss_config.cdb_size);
+	memcpy(io->scsiio.cdb, req->vsr_cmd_rd->cdb, sc->vss_config.cdb_size);
 
 	if (pci_vtscsi_debug) {
 		struct sbuf *sb = sbuf_new_auto();
@@ -578,22 +777,19 @@ pci_vtscsi_request_handle(struct pci_vtscsi_queue *q, struct iovec *iov_in,
 	err = ioctl(sc->vss_ctl_fd, CTL_IO, io);
 	if (err != 0) {
 		WPRINTF("CTL_IO: err=%d (%s)", errno, strerror(errno));
-		cmd_wr->response = VIRTIO_SCSI_S_FAILURE;
+		req->vsr_cmd_wr->response = VIRTIO_SCSI_S_FAILURE;
 	} else {
-		cmd_wr->sense_len = MIN(io->scsiio.sense_len,
-		    sc->vss_config.sense_size);
-		cmd_wr->residual = ext_data_len - io->scsiio.ext_data_filled;
-		cmd_wr->status = io->scsiio.scsi_status;
-		cmd_wr->response = VIRTIO_SCSI_S_OK;
-		memcpy(&cmd_wr->sense, &io->scsiio.sense_data,
-		    cmd_wr->sense_len);
+		req->vsr_cmd_wr->sense_len =
+		    MIN(io->scsiio.sense_len, sc->vss_config.sense_size);
+		req->vsr_cmd_wr->residual = ext_data_len -
+		    io->scsiio.ext_data_filled;
+		req->vsr_cmd_wr->status = io->scsiio.scsi_status;
+		req->vsr_cmd_wr->response = VIRTIO_SCSI_S_OK;
+		memcpy(&req->vsr_cmd_wr->sense, &io->scsiio.sense_data,
+		    req->vsr_cmd_wr->sense_len);
 	}
 
-	buf_to_iov(cmd_wr, VTSCSI_OUT_HEADER_LEN(sc), iov_out, niov_out);
-	nxferred = VTSCSI_OUT_HEADER_LEN(sc) + io->scsiio.ext_data_filled;
-	free(cmd_rd);
-	free(cmd_wr);
-	ctl_scsi_free_io(io);
+	nxferred = io->scsiio.ext_data_filled;
 	return (nxferred);
 }
 
@@ -635,34 +831,8 @@ pci_vtscsi_eventq_notify(void *vsc __unused, struct vqueue_info *vq)
 static void
 pci_vtscsi_requestq_notify(void *vsc, struct vqueue_info *vq)
 {
-	struct pci_vtscsi_softc *sc;
-	struct pci_vtscsi_queue *q;
-	struct pci_vtscsi_request *req;
-	struct vi_req vireq;
-	int n;
-
-	sc = vsc;
-	q = &sc->vss_queues[vq->vq_num - 2];
-
 	while (vq_has_descs(vq)) {
-		req = calloc(1, sizeof(struct pci_vtscsi_request));
-
-		n = vq_getchain(vq, req->vsr_iov, VTSCSI_MAXSEG, &vireq);
-		assert(n >= 1 && n <= VTSCSI_MAXSEG);
-
-		req->vsr_idx = vireq.idx;
-		req->vsr_queue = q;
-		req->vsr_iov_in = &req->vsr_iov[0];
-		req->vsr_niov_in = vireq.readable;
-		req->vsr_iov_out = &req->vsr_iov[vireq.readable];
-		req->vsr_niov_out = vireq.writable;
-
-		pthread_mutex_lock(&q->vsq_mtx);
-		STAILQ_INSERT_TAIL(&q->vsq_requests, req, vsr_link);
-		pthread_cond_signal(&q->vsq_cv);
-		pthread_mutex_unlock(&q->vsq_mtx);
-
-		DPRINTF("request <idx=%d> enqueued", vireq.idx);
+		pci_vtscsi_queue_request(vsc, vq);
 	}
 }
 
@@ -670,32 +840,75 @@ static int
 pci_vtscsi_init_queue(struct pci_vtscsi_softc *sc,
     struct pci_vtscsi_queue *queue, int num)
 {
-	struct pci_vtscsi_worker *worker;
+	struct pci_vtscsi_worker *workers;
 	char tname[MAXCOMLEN + 1];
 	int i;
 
 	queue->vsq_sc = sc;
 	queue->vsq_vq = &sc->vss_vq[num + 2];
 
-	pthread_mutex_init(&queue->vsq_mtx, NULL);
+	pthread_mutex_init(&queue->vsq_rmtx, NULL);
+	pthread_mutex_init(&queue->vsq_fmtx, NULL);
 	pthread_mutex_init(&queue->vsq_qmtx, NULL);
 	pthread_cond_init(&queue->vsq_cv, NULL);
 	STAILQ_INIT(&queue->vsq_requests);
+	STAILQ_INIT(&queue->vsq_free_requests);
 	LIST_INIT(&queue->vsq_workers);
 
+	for (i = 0; i < VTSCSI_RINGSZ; i++) {
+		struct pci_vtscsi_request *req;
+
+		req = pci_vtscsi_alloc_request(sc);
+		if (req == NULL)
+			goto fail;
+
+		pci_vtscsi_put_request(&queue->vsq_free_requests, req);
+	}
+
+	workers = calloc(VTSCSI_THR_PER_Q, sizeof(struct pci_vtscsi_worker));
+	if (workers == NULL)
+		goto fail;
+
 	for (i = 0; i < VTSCSI_THR_PER_Q; i++) {
-		worker = calloc(1, sizeof(struct pci_vtscsi_worker));
-		worker->vsw_queue = queue;
+		workers[i].vsw_queue = queue;
 
-		pthread_create(&worker->vsw_thread, NULL, &pci_vtscsi_proc,
-		    (void *)worker);
+		pthread_create(&workers[i].vsw_thread, NULL, &pci_vtscsi_proc,
+		    (void *)&workers[i]);
 
 		snprintf(tname, sizeof(tname), "vtscsi:%d-%d", num, i);
-		pthread_set_name_np(worker->vsw_thread, tname);
-		LIST_INSERT_HEAD(&queue->vsq_workers, worker, vsw_link);
+		pthread_set_name_np(workers[i].vsw_thread, tname);
+		LIST_INSERT_HEAD(&queue->vsq_workers, &workers[i], vsw_link);
 	}
 
 	return (0);
+
+fail:
+	pci_vtscsi_destroy_queue(queue);
+
+	return (-1);
+
+}
+
+static void
+pci_vtscsi_destroy_queue(struct pci_vtscsi_queue *queue)
+{
+	if (queue->vsq_sc == NULL)
+		return;
+
+	for (int i = VTSCSI_RINGSZ; i > 0; i--) {
+		struct pci_vtscsi_request *req;
+
+		if (STAILQ_EMPTY(&queue->vsq_free_requests))
+			break;
+
+		req = pci_vtscsi_get_request(&queue->vsq_free_requests);
+		pci_vtscsi_free_request(req);
+	}
+
+	pthread_cond_destroy(&queue->vsq_cv);
+	pthread_mutex_destroy(&queue->vsq_qmtx);
+	pthread_mutex_destroy(&queue->vsq_fmtx);
+	pthread_mutex_destroy(&queue->vsq_rmtx);
 }
 
 static int
@@ -722,9 +935,13 @@ pci_vtscsi_init(struct pci_devinst *pi, nvlist_t *nvl)
 {
 	struct pci_vtscsi_softc *sc;
 	const char *devname, *value;
+	int err;
 	int i;
 
 	sc = calloc(1, sizeof(struct pci_vtscsi_softc));
+	if (sc == NULL)
+		return (-1);
+
 	value = get_config_value_node(nvl, "iid");
 	if (value != NULL)
 		sc->vss_iid = strtoul(value, NULL, 10);
@@ -733,8 +950,7 @@ pci_vtscsi_init(struct pci_devinst *pi, nvlist_t *nvl)
 	if (value != NULL) {
 		if (pci_emul_add_boot_device(pi, atoi(value))) {
 			EPRINTLN("Invalid bootindex %d", atoi(value));
-			free(sc);
-			return (-1);
+			goto fail;
 		}
 	}
 
@@ -744,8 +960,7 @@ pci_vtscsi_init(struct pci_devinst *pi, nvlist_t *nvl)
 	sc->vss_ctl_fd = open(devname, O_RDWR);
 	if (sc->vss_ctl_fd < 0) {
 		WPRINTF("cannot open %s: %s", devname, strerror(errno));
-		free(sc);
-		return (1);
+		goto fail;
 	}
 
 	pthread_mutex_init(&sc->vss_mtx, NULL);
@@ -753,6 +968,19 @@ pci_vtscsi_init(struct pci_devinst *pi, nvlist_t *nvl)
 	vi_softc_linkup(&sc->vss_vs, &vtscsi_vi_consts, sc, pi, sc->vss_vq);
 	sc->vss_vs.vs_mtx = &sc->vss_mtx;
 
+	/*
+	 * Perform a "reset" before we set up our queues.
+	 *
+	 * This will write the default config into vss_config, which is used
+	 * by the rest of the driver to get the request header size. Note that
+	 * if we ever allow the guest to override sense size through config
+	 * space writes, pre-allocation of I/O requests will have to change
+	 * accordingly.
+	 */
+	pthread_mutex_lock(&sc->vss_mtx);
+	pci_vtscsi_reset(sc);
+	pthread_mutex_unlock(&sc->vss_mtx);
+
 	/* controlq */
 	sc->vss_vq[0].vq_qsize = VTSCSI_RINGSZ;
 	sc->vss_vq[0].vq_notify = pci_vtscsi_controlq_notify;
@@ -765,7 +993,10 @@ pci_vtscsi_init(struct pci_devinst *pi, nvlist_t *nvl)
 	for (i = 2; i < VTSCSI_MAXQ; i++) {
 		sc->vss_vq[i].vq_qsize = VTSCSI_RINGSZ;
 		sc->vss_vq[i].vq_notify = pci_vtscsi_requestq_notify;
-		pci_vtscsi_init_queue(sc, &sc->vss_queues[i - 2], i - 2);
+
+		err = pci_vtscsi_init_queue(sc, &sc->vss_queues[i - 2], i - 2);
+		if (err != 0)
+			goto fail;
 	}
 
 	/* initialize config space */
@@ -776,10 +1007,21 @@ pci_vtscsi_init(struct pci_devinst *pi, nvlist_t *nvl)
 	pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
 
 	if (vi_intr_init(&sc->vss_vs, 1, fbsdrun_virtio_msix()))
-		return (1);
+		goto fail;
+
 	vi_set_io_bar(&sc->vss_vs, 0);
 
 	return (0);
+
+fail:
+	for (i = 2; i < VTSCSI_MAXQ; i++)
+		pci_vtscsi_destroy_queue(&sc->vss_queues[i - 2]);
+
+	if (sc->vss_ctl_fd > 0)
+		close(sc->vss_ctl_fd);
+
+	free(sc);
+	return (-1);
 }