svn commit: r280370 - stable/10/usr.sbin/bhyve

Alexander Motin mav at FreeBSD.org
Mon Mar 23 14:36:55 UTC 2015


Author: mav
Date: Mon Mar 23 14:36:53 2015
New Revision: 280370
URL: https://svnweb.freebsd.org/changeset/base/280370

Log:
  MFC r279957, r280017: Add DSM TRIM command support for virtual AHCI disks.
  
  It works only for virtual disks backed by ZVOLs and raw devices supporting
  BIO_DELETE.  Virtual disks backed by files won't report this capability.
  
  Relnotes:	yes

Modified:
  stable/10/usr.sbin/bhyve/block_if.c
  stable/10/usr.sbin/bhyve/block_if.h
  stable/10/usr.sbin/bhyve/pci_ahci.c
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/usr.sbin/bhyve/block_if.c
==============================================================================
--- stable/10/usr.sbin/bhyve/block_if.c	Mon Mar 23 13:38:33 2015	(r280369)
+++ stable/10/usr.sbin/bhyve/block_if.c	Mon Mar 23 14:36:53 2015	(r280370)
@@ -59,7 +59,8 @@ __FBSDID("$FreeBSD$");
 enum blockop {
 	BOP_READ,
 	BOP_WRITE,
-	BOP_FLUSH
+	BOP_FLUSH,
+	BOP_DELETE
 };
 
 enum blockstat {
@@ -81,6 +82,7 @@ struct blockif_ctxt {
 	int			bc_magic;
 	int			bc_fd;
 	int			bc_ischr;
+	int			bc_candelete;
 	int			bc_rdonly;
 	off_t			bc_size;
 	int			bc_sectsz;
@@ -172,6 +174,7 @@ static void
 blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be)
 {
 	struct blockif_req *br;
+	off_t arg[2];
 	int err;
 
 	br = be->be_req;
@@ -197,6 +200,19 @@ blockif_proc(struct blockif_ctxt *bc, st
 		} else if (fsync(bc->bc_fd))
 			err = errno;
 		break;
+	case BOP_DELETE:
+		if (!bc->bc_candelete)
+			err = EOPNOTSUPP;
+		else if (bc->bc_rdonly)
+			err = EROFS;
+		else if (bc->bc_ischr) {
+			arg[0] = br->br_offset;
+			arg[1] = br->br_iov[0].iov_len;
+			if (ioctl(bc->bc_fd, DIOCGDELETE, arg))
+				err = errno;
+		} else
+			err = EOPNOTSUPP;
+		break;
 	default:
 		err = EINVAL;
 		break;
@@ -276,9 +292,10 @@ blockif_open(const char *optstr, const c
 	char *nopt, *xopts;
 	struct blockif_ctxt *bc;
 	struct stat sbuf;
+	struct diocgattr_arg arg;
 	off_t size, psectsz, psectoff;
 	int extra, fd, i, sectsz;
-	int nocache, sync, ro;
+	int nocache, sync, ro, candelete;
 
 	pthread_once(&blockif_once, blockif_init);
 
@@ -332,6 +349,7 @@ blockif_open(const char *optstr, const c
         size = sbuf.st_size;
 	sectsz = DEV_BSIZE;
 	psectsz = psectoff = 0;
+	candelete = 0;
 	if (S_ISCHR(sbuf.st_mode)) {
 		if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 ||
 		    ioctl(fd, DIOCGSECTORSIZE, &sectsz)) {
@@ -343,6 +361,10 @@ blockif_open(const char *optstr, const c
 		assert(sectsz != 0);
 		if (ioctl(fd, DIOCGSTRIPESIZE, &psectsz) == 0 && psectsz > 0)
 			ioctl(fd, DIOCGSTRIPEOFFSET, &psectoff);
+		strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
+		arg.len = sizeof(arg.value.i);
+		if (ioctl(fd, DIOCGATTR, &arg) == 0)
+			candelete = arg.value.i;
 	} else
 		psectsz = sbuf.st_blksize;
 
@@ -355,6 +377,7 @@ blockif_open(const char *optstr, const c
 	bc->bc_magic = BLOCKIF_SIG;
 	bc->bc_fd = fd;
 	bc->bc_ischr = S_ISCHR(sbuf.st_mode);
+	bc->bc_candelete = candelete;
 	bc->bc_rdonly = ro;
 	bc->bc_size = size;
 	bc->bc_sectsz = sectsz;
@@ -434,6 +457,14 @@ blockif_flush(struct blockif_ctxt *bc, s
 }
 
 int
+blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	return (blockif_request(bc, breq, BOP_DELETE));
+}
+
+int
 blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq)
 {
 	struct blockif_elem *be;
@@ -634,3 +665,11 @@ blockif_is_ro(struct blockif_ctxt *bc)
 	assert(bc->bc_magic == BLOCKIF_SIG);
 	return (bc->bc_rdonly);
 }
+
+int
+blockif_candelete(struct blockif_ctxt *bc)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	return (bc->bc_candelete);
+}

Modified: stable/10/usr.sbin/bhyve/block_if.h
==============================================================================
--- stable/10/usr.sbin/bhyve/block_if.h	Mon Mar 23 13:38:33 2015	(r280369)
+++ stable/10/usr.sbin/bhyve/block_if.h	Mon Mar 23 14:36:53 2015	(r280370)
@@ -58,9 +58,11 @@ int	blockif_sectsz(struct blockif_ctxt *
 void	blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off);
 int	blockif_queuesz(struct blockif_ctxt *bc);
 int	blockif_is_ro(struct blockif_ctxt *bc);
+int	blockif_candelete(struct blockif_ctxt *bc);
 int	blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq);
 int	blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq);
 int	blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq);
+int	blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq);
 int	blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq);
 int	blockif_close(struct blockif_ctxt *bc);
 

Modified: stable/10/usr.sbin/bhyve/pci_ahci.c
==============================================================================
--- stable/10/usr.sbin/bhyve/pci_ahci.c	Mon Mar 23 13:38:33 2015	(r280369)
+++ stable/10/usr.sbin/bhyve/pci_ahci.c	Mon Mar 23 14:36:53 2015	(r280370)
@@ -644,6 +644,100 @@ ahci_handle_flush(struct ahci_port *p, i
 }
 
 static inline void
+read_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
+		void *buf, int size)
+{
+	struct ahci_cmd_hdr *hdr;
+	struct ahci_prdt_entry *prdt;
+	void *to;
+	int i, len;
+
+	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
+	len = size;
+	to = buf;
+	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
+	for (i = 0; i < hdr->prdtl && len; i++) {
+		uint8_t *ptr;
+		uint32_t dbcsz;
+		int sublen;
+
+		dbcsz = (prdt->dbc & DBCMASK) + 1;
+		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
+		sublen = len < dbcsz ? len : dbcsz;
+		memcpy(to, ptr, sublen);
+		len -= sublen;
+		to += sublen;
+		prdt++;
+	}
+}
+
+static void
+ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
+{
+	struct ahci_ioreq *aior;
+	struct blockif_req *breq;
+	uint8_t *entry;
+	uint64_t elba;
+	uint32_t len, elen;
+	int err;
+	uint8_t buf[512];
+
+	len = (uint16_t)cfis[13] << 8 | cfis[12];
+	len *= 512;
+	read_prdt(p, slot, cfis, buf, sizeof(buf));
+
+next:
+	entry = &buf[done];
+	elba = ((uint64_t)entry[5] << 40) |
+		((uint64_t)entry[4] << 32) |
+		((uint64_t)entry[3] << 24) |
+		((uint64_t)entry[2] << 16) |
+		((uint64_t)entry[1] << 8) |
+		entry[0];
+	elen = (uint16_t)entry[7] << 8 | entry[6];
+	done += 8;
+	if (elen == 0) {
+		if (done >= len) {
+			ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
+			p->pending &= ~(1 << slot);
+			ahci_check_stopped(p);
+			return;
+		}
+		goto next;
+	}
+
+	/*
+	 * Pull request off free list
+	 */
+	aior = STAILQ_FIRST(&p->iofhd);
+	assert(aior != NULL);
+	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
+	aior->cfis = cfis;
+	aior->slot = slot;
+	aior->len = len;
+	aior->done = done;
+	aior->prdtl = 0;
+
+	breq = &aior->io_req;
+	breq->br_offset = elba * blockif_sectsz(p->bctx);
+	breq->br_iovcnt = 1;
+	breq->br_iov[0].iov_len = elen * blockif_sectsz(p->bctx);
+
+	/*
+	 * Mark this command in-flight.
+	 */
+	p->pending |= 1 << slot;
+
+	/*
+	 * Stuff request onto busy list
+	 */
+	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
+
+	err = blockif_delete(p->bctx, breq);
+	assert(err == 0);
+}
+
+static inline void
 write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
 		void *buf, int size)
 {
@@ -684,10 +778,12 @@ handle_identify(struct ahci_port *p, int
 	} else {
 		uint16_t buf[256];
 		uint64_t sectors;
-		int sectsz, psectsz, psectoff;
+		int sectsz, psectsz, psectoff, candelete, ro;
 		uint16_t cyl;
 		uint8_t sech, heads;
 
+		ro = blockif_is_ro(p->bctx);
+		candelete = blockif_candelete(p->bctx);
 		sectsz = blockif_sectsz(p->bctx);
 		sectors = blockif_size(p->bctx) / sectsz;
 		blockif_chs(p->bctx, &cyl, &heads, &sech);
@@ -718,6 +814,7 @@ handle_identify(struct ahci_port *p, int
 		buf[66] = 100;
 		buf[67] = 100;
 		buf[68] = 100;
+		buf[69] = 0;
 		buf[75] = 31;
 		buf[76] = (1 << 8 | 1 << 2);
 		buf[80] = 0x1f0;
@@ -736,6 +833,11 @@ handle_identify(struct ahci_port *p, int
 		buf[101] = (sectors >> 16);
 		buf[102] = (sectors >> 32);
 		buf[103] = (sectors >> 48);
+		if (candelete && !ro) {
+			buf[69] |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT;
+			buf[105] = 1;
+			buf[169] = ATA_SUPPORT_DSM_TRIM;
+		}
 		buf[106] = 0x4000;
 		buf[209] = 0x4000;
 		if (psectsz > sectsz) {
@@ -1394,6 +1496,15 @@ ahci_handle_cmd(struct ahci_port *p, int
 	case ATA_FLUSHCACHE48:
 		ahci_handle_flush(p, slot, cfis);
 		break;
+	case ATA_DATA_SET_MANAGEMENT:
+		if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
+		    cfis[13] == 0 && cfis[12] == 1) {
+			ahci_handle_dsm_trim(p, slot, cfis, 0);
+			break;
+		}
+		ahci_write_fis_d2h(p, slot, cfis,
+		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
+		break;
 	case ATA_STANDBY_CMD:
 		break;
 	case ATA_NOP:
@@ -1505,7 +1616,7 @@ ata_ioreq_cb(struct blockif_req *br, int
 	struct pci_ahci_softc *sc;
 	uint32_t tfd;
 	uint8_t *cfis;
-	int pending, slot, ncq;
+	int pending, slot, ncq, dsm;
 
 	DPRINTF("%s %d\n", __func__, err);
 
@@ -1521,6 +1632,8 @@ ata_ioreq_cb(struct blockif_req *br, int
 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
 			cfis[2] == ATA_READ_FPDMA_QUEUED)
 		ncq = 1;
+	if (cfis[2] == ATA_DATA_SET_MANAGEMENT)
+		dsm = 1;
 
 	pthread_mutex_lock(&sc->mtx);
 
@@ -1534,10 +1647,17 @@ ata_ioreq_cb(struct blockif_req *br, int
 	 */
 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
 
-	if (pending && !err) {
-		ahci_handle_dma(p, slot, cfis, aior->done,
-		    hdr->prdtl - pending);
-		goto out;
+	if (dsm) {
+		if (aior->done != aior->len && !err) {
+			ahci_handle_dsm_trim(p, slot, cfis, aior->done);
+			goto out;
+		}
+	} else {
+		if (pending && !err) {
+			ahci_handle_dma(p, slot, cfis, aior->done,
+			    hdr->prdtl - pending);
+			goto out;
+		}
 	}
 
 	if (!err && aior->done == aior->len) {


More information about the svn-src-all mailing list