svn commit: r363255 - head/sys/dev/virtio/block

Allan Jude allanjude at FreeBSD.org
Thu Jul 16 16:32:17 UTC 2020


Author: allanjude
Date: Thu Jul 16 16:32:16 2020
New Revision: 363255
URL: https://svnweb.freebsd.org/changeset/base/363255

Log:
  Add VIRTIO_BLK_T_DISCARD support to the virtio-blk driver
  
  If the hypervisor advertises support for the DISCARD command then the
  guest can perform TRIM commands, freeing space on the backing store.
  
  If VIRTIO_BLK_F_DISCARD is enabled, advertise DISKFLAG_CANDELETE
  
  Tested with FreeBSD guests on bhyve and KVM
  
  Reviewed by:	jhb
  Tested by:	freqlabs
  MFC after:	1 month
  Relnotes:	yes
  Sponsored by:	Klara Inc.
  Differential Revision:	https://reviews.freebsd.org/D21708

Modified:
  head/sys/dev/virtio/block/virtio_blk.c
  head/sys/dev/virtio/block/virtio_blk.h

Modified: head/sys/dev/virtio/block/virtio_blk.c
==============================================================================
--- head/sys/dev/virtio/block/virtio_blk.c	Thu Jul 16 15:12:52 2020	(r363254)
+++ head/sys/dev/virtio/block/virtio_blk.c	Thu Jul 16 16:32:16 2020	(r363255)
@@ -81,6 +81,7 @@ struct vtblk_softc {
 #define VTBLK_FLAG_SUSPEND	0x0008
 #define VTBLK_FLAG_BARRIER	0x0010
 #define VTBLK_FLAG_WC_CONFIG	0x0020
+#define VTBLK_FLAG_DISCARD	0x0040
 
 	struct virtqueue	*vtblk_vq;
 	struct sglist		*vtblk_sglist;
@@ -112,6 +113,7 @@ static struct virtio_feature_desc vtblk_feature_desc[]
 	{ VIRTIO_BLK_F_WCE,		"WriteCache"	},
 	{ VIRTIO_BLK_F_TOPOLOGY,	"Topology"	},
 	{ VIRTIO_BLK_F_CONFIG_WCE,	"ConfigWCE"	},
+	{ VIRTIO_BLK_F_DISCARD,		"Discard"	},
 
 	{ 0, NULL }
 };
@@ -210,6 +212,7 @@ TUNABLE_INT("hw.vtblk.writecache_mode", &vtblk_writeca
      VIRTIO_BLK_F_WCE			| \
      VIRTIO_BLK_F_TOPOLOGY		| \
      VIRTIO_BLK_F_CONFIG_WCE		| \
+     VIRTIO_BLK_F_DISCARD		| \
      VIRTIO_RING_F_INDIRECT_DESC)
 
 #define VTBLK_MTX(_sc)		&(_sc)->vtblk_mtx
@@ -459,7 +462,7 @@ vtblk_config_change(device_t dev)
 	vtblk_read_config(sc, &blkcfg);
 
 	/* Capacity is always in 512-byte units. */
-	capacity = blkcfg.capacity * 512;
+	capacity = blkcfg.capacity * VTBLK_BSIZE;
 
 	if (sc->vtblk_disk->d_mediasize != capacity)
 		vtblk_resize_disk(sc, capacity);
@@ -544,13 +547,14 @@ vtblk_strategy(struct bio *bp)
 	 * be a better way to report our readonly'ness to GEOM above.
 	 */
 	if (sc->vtblk_flags & VTBLK_FLAG_READONLY &&
-	    (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_FLUSH)) {
+	    (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_FLUSH ||
+	    bp->bio_cmd == BIO_DELETE)) {
 		vtblk_bio_done(sc, bp, EROFS);
 		return;
 	}
 
 	if ((bp->bio_cmd != BIO_READ) && (bp->bio_cmd != BIO_WRITE) &&
-	    (bp->bio_cmd != BIO_FLUSH)) {
+	    (bp->bio_cmd != BIO_FLUSH) && (bp->bio_cmd != BIO_DELETE)) {
 		vtblk_bio_done(sc, bp, EOPNOTSUPP);
 		return;
 	}
@@ -563,6 +567,13 @@ vtblk_strategy(struct bio *bp)
 		return;
 	}
 
+	if ((bp->bio_cmd == BIO_DELETE) &&
+	    !(sc->vtblk_flags & VTBLK_FLAG_DISCARD)) {
+		VTBLK_UNLOCK(sc);
+		vtblk_bio_done(sc, bp, EOPNOTSUPP);
+		return;
+	}
+
 	bioq_insert_tail(&sc->vtblk_bioq, bp);
 	vtblk_startio(sc);
 
@@ -598,6 +609,8 @@ vtblk_setup_features(struct vtblk_softc *sc)
 		sc->vtblk_flags |= VTBLK_FLAG_BARRIER;
 	if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE))
 		sc->vtblk_flags |= VTBLK_FLAG_WC_CONFIG;
+	if (virtio_with_feature(dev, VIRTIO_BLK_F_DISCARD))
+		sc->vtblk_flags |= VTBLK_FLAG_DISCARD;
 }
 
 static int
@@ -687,12 +700,12 @@ vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio
 		dp->d_dump = vtblk_dump;
 
 	/* Capacity is always in 512-byte units. */
-	dp->d_mediasize = blkcfg->capacity * 512;
+	dp->d_mediasize = blkcfg->capacity * VTBLK_BSIZE;
 
 	if (virtio_with_feature(dev, VIRTIO_BLK_F_BLK_SIZE))
 		dp->d_sectorsize = blkcfg->blk_size;
 	else
-		dp->d_sectorsize = 512;
+		dp->d_sectorsize = VTBLK_BSIZE;
 
 	/*
 	 * The VirtIO maximum I/O size is given in terms of segments.
@@ -726,6 +739,11 @@ vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio
 		    dp->d_stripesize;
 	}
 
+	if (virtio_with_feature(dev, VIRTIO_BLK_F_DISCARD)) {
+		dp->d_flags |= DISKFLAG_CANDELETE;
+		dp->d_delmaxsize = blkcfg->max_discard_sectors * VTBLK_BSIZE;
+	}
+
 	if (vtblk_write_cache_enabled(sc, blkcfg) != 0)
 		sc->vtblk_write_cache = VTBLK_CACHE_WRITEBACK;
 	else
@@ -876,12 +894,16 @@ vtblk_request_bio(struct vtblk_softc *sc)
 		break;
 	case BIO_READ:
 		req->vbr_hdr.type = VIRTIO_BLK_T_IN;
-		req->vbr_hdr.sector = bp->bio_offset / 512;
+		req->vbr_hdr.sector = bp->bio_offset / VTBLK_BSIZE;
 		break;
 	case BIO_WRITE:
 		req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
-		req->vbr_hdr.sector = bp->bio_offset / 512;
+		req->vbr_hdr.sector = bp->bio_offset / VTBLK_BSIZE;
 		break;
+	case BIO_DELETE:
+		req->vbr_hdr.type = VIRTIO_BLK_T_DISCARD;
+		req->vbr_hdr.sector = bp->bio_offset / VTBLK_BSIZE;
+		break;
 	default:
 		panic("%s: bio with unhandled cmd: %d", __func__, bp->bio_cmd);
 	}
@@ -935,6 +957,20 @@ vtblk_request_execute(struct vtblk_softc *sc, struct v
 		/* BIO_READ means the host writes into our buffer. */
 		if (bp->bio_cmd == BIO_READ)
 			writable = sg->sg_nseg - 1;
+	} else if (bp->bio_cmd == BIO_DELETE) {
+		struct virtio_blk_discard_write_zeroes *discard;
+
+		discard = malloc(sizeof(*discard), M_DEVBUF, M_NOWAIT | M_ZERO);
+		if (discard == NULL)
+			return (ENOMEM);
+		discard->sector = bp->bio_offset / VTBLK_BSIZE;
+		discard->num_sectors = bp->bio_bcount / VTBLK_BSIZE;
+		bp->bio_driver1 = discard;
+		error = sglist_append(sg, discard, sizeof(*discard));
+		if (error || sg->sg_nseg == sg->sg_maxseg) {
+			panic("%s: bio %p data buffer too big %d",
+			    __func__, bp, error);
+		}
 	}
 
 	writable++;
@@ -1095,6 +1131,11 @@ vtblk_bio_done(struct vtblk_softc *sc, struct bio *bp,
 		bp->bio_flags |= BIO_ERROR;
 	}
 
+	if (bp->bio_driver1 != NULL) {
+		free(bp->bio_driver1, M_DEVBUF);
+		bp->bio_driver1 = NULL;
+	}
+
 	biodone(bp);
 }
 
@@ -1124,7 +1165,12 @@ vtblk_read_config(struct vtblk_softc *sc, struct virti
 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY, geometry, blkcfg);
 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_BLK_SIZE, blk_size, blkcfg);
 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY, topology, blkcfg);
-	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_CONFIG_WCE, writeback, blkcfg);
+	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_CONFIG_WCE, wce, blkcfg);
+	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_sectors,
+	    blkcfg);
+	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_seg, blkcfg);
+	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, discard_sector_alignment,
+	    blkcfg);
 }
 
 #undef VTBLK_GET_CONFIG
@@ -1282,7 +1328,7 @@ vtblk_dump_write(struct vtblk_softc *sc, void *virtual
 	req->vbr_ack = -1;
 	req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
 	req->vbr_hdr.ioprio = 1;
-	req->vbr_hdr.sector = offset / 512;
+	req->vbr_hdr.sector = offset / VTBLK_BSIZE;
 
 	req->vbr_bp = &buf;
 	g_reset_bio(&buf);
@@ -1331,7 +1377,7 @@ vtblk_set_write_cache(struct vtblk_softc *sc, int wc)
 
 	/* Set either writeback (1) or writethrough (0) mode. */
 	virtio_write_dev_config_1(sc->vtblk_dev,
-	    offsetof(struct virtio_blk_config, writeback), wc);
+	    offsetof(struct virtio_blk_config, wce), wc);
 }
 
 static int
@@ -1346,7 +1392,7 @@ vtblk_write_cache_enabled(struct vtblk_softc *sc,
 		if (wc >= 0 && wc < VTBLK_CACHE_MAX)
 			vtblk_set_write_cache(sc, wc);
 		else
-			wc = blkcfg->writeback;
+			wc = blkcfg->wce;
 	} else
 		wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_WCE);
 

Modified: head/sys/dev/virtio/block/virtio_blk.h
==============================================================================
--- head/sys/dev/virtio/block/virtio_blk.h	Thu Jul 16 15:12:52 2020	(r363254)
+++ head/sys/dev/virtio/block/virtio_blk.h	Thu Jul 16 16:32:16 2020	(r363255)
@@ -33,20 +33,27 @@
 #ifndef _VIRTIO_BLK_H
 #define _VIRTIO_BLK_H
 
+#define	VTBLK_BSIZE	512
+
 /* Feature bits */
-#define VIRTIO_BLK_F_BARRIER	0x0001	/* Does host support barriers? */
-#define VIRTIO_BLK_F_SIZE_MAX	0x0002	/* Indicates maximum segment size */
-#define VIRTIO_BLK_F_SEG_MAX	0x0004	/* Indicates maximum # of segments */
-#define VIRTIO_BLK_F_GEOMETRY	0x0010	/* Legacy geometry available  */
-#define VIRTIO_BLK_F_RO		0x0020	/* Disk is read-only */
-#define VIRTIO_BLK_F_BLK_SIZE	0x0040	/* Block size of disk is available*/
-#define VIRTIO_BLK_F_SCSI	0x0080	/* Supports scsi command passthru */
-#define VIRTIO_BLK_F_WCE	0x0200	/* Writeback mode enabled after reset */
-#define VIRTIO_BLK_F_TOPOLOGY	0x0400	/* Topology information is available */
-#define VIRTIO_BLK_F_CONFIG_WCE 0x0800	/* Writeback mode available in config */
 
-#define VIRTIO_BLK_ID_BYTES	20	/* ID string length */
+#define VIRTIO_BLK_F_BARRIER		0x0001	/* Does host support barriers? */
+#define VIRTIO_BLK_F_SIZE_MAX		0x0002	/* Indicates maximum segment size */
+#define VIRTIO_BLK_F_SEG_MAX		0x0004	/* Indicates maximum # of segments */
+#define VIRTIO_BLK_F_GEOMETRY		0x0010	/* Legacy geometry available  */
+#define VIRTIO_BLK_F_RO			0x0020	/* Disk is read-only */
+#define VIRTIO_BLK_F_BLK_SIZE		0x0040	/* Block size of disk is available*/
+#define VIRTIO_BLK_F_SCSI		0x0080	/* Supports scsi command passthru */
+#define VIRTIO_BLK_F_FLUSH		0x0200	/* Flush command supported */
+#define VIRTIO_BLK_F_WCE		0x0200	/* Legacy alias for FLUSH */
+#define VIRTIO_BLK_F_TOPOLOGY		0x0400	/* Topology information is available */
+#define VIRTIO_BLK_F_CONFIG_WCE		0x0800	/* Writeback mode available in config */
+#define VIRTIO_BLK_F_MQ			0x1000	/* Support more than one vq */
+#define VIRTIO_BLK_F_DISCARD		0x2000	/* Trim blocks */
+#define VIRTIO_BLK_F_WRITE_ZEROES	0x4000	/* Write zeros */
 
+#define VIRTIO_BLK_ID_BYTES		20	/* ID string length */
+
 struct virtio_blk_config {
 	/* The capacity (in 512-byte sectors). */
 	uint64_t capacity;
@@ -66,15 +73,29 @@ struct virtio_blk_config {
 
 	/* Topology of the device (if VIRTIO_BLK_F_TOPOLOGY) */
 	struct virtio_blk_topology {
+		/* Exponent for physical block per logical block. */
 		uint8_t physical_block_exp;
+		/* Alignment offset in logical blocks. */
 		uint8_t alignment_offset;
+		/* Minimum I/O size without performance penalty in logical
+		 * blocks. */
 		uint16_t min_io_size;
+		/* Optimal sustained I/O size in logical blocks. */
 		uint32_t opt_io_size;
 	} topology;
 
 	/* Writeback mode (if VIRTIO_BLK_F_CONFIG_WCE) */
-	uint8_t writeback;
-
+	uint8_t wce;
+	uint8_t unused;
+	/* Number of vqs, only available when VIRTIO_BLK_F_MQ is set */
+	uint16_t num_queues;
+	uint32_t max_discard_sectors;
+	uint32_t max_discard_seg;
+	uint32_t discard_sector_alignment;
+	uint32_t max_write_zeroes_sectors;
+	uint32_t max_write_zeroes_seg;
+	uint8_t write_zeroes_may_unmap;
+	uint8_t unused1[3];
 } __packed;
 
 /*
@@ -89,24 +110,35 @@ struct virtio_blk_config {
  */
 
 /* These two define direction. */
-#define VIRTIO_BLK_T_IN		0
-#define VIRTIO_BLK_T_OUT	1
+#define VIRTIO_BLK_T_IN			0
+#define VIRTIO_BLK_T_OUT		1
 
 /* This bit says it's a scsi command, not an actual read or write. */
-#define VIRTIO_BLK_T_SCSI_CMD	2
+#define VIRTIO_BLK_T_SCSI_CMD		2
+#define VIRTIO_BLK_T_SCSI_CMD_OUT	3
 
 /* Cache flush command */
-#define VIRTIO_BLK_T_FLUSH	4
+#define VIRTIO_BLK_T_FLUSH		4
+#define VIRTIO_BLK_T_FLUSH_OUT		5
 
 /* Get device ID command */
-#define VIRTIO_BLK_T_GET_ID	8
+#define VIRTIO_BLK_T_GET_ID		8
 
+/* Discard command */
+#define VIRTIO_BLK_T_DISCARD		11
+
+/* Write zeros command */
+#define VIRTIO_BLK_T_WRITE_ZEROES	13
+
 /* Barrier before this op. */
-#define VIRTIO_BLK_T_BARRIER	0x80000000
+#define VIRTIO_BLK_T_BARRIER		0x80000000
 
 /* ID string length */
-#define VIRTIO_BLK_ID_BYTES	20
+#define VIRTIO_BLK_ID_BYTES		20
 
+/* Unmap this range (only valid for write zeroes command) */
+#define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP	0x00000001
+
 /* This is the first element of the read scatter-gather list. */
 struct virtio_blk_outhdr {
 	/* VIRTIO_BLK_T* */
@@ -115,6 +147,15 @@ struct virtio_blk_outhdr {
 	uint32_t ioprio;
 	/* Sector (ie. 512 byte offset) */
 	uint64_t sector;
+};
+
+struct virtio_blk_discard_write_zeroes {
+	uint64_t sector;
+	uint32_t num_sectors;
+	struct {
+		uint32_t unmap:1;
+		uint32_t reserved:31;
+	} flags;
 };
 
 struct virtio_scsi_inhdr {


More information about the svn-src-head mailing list