svn commit: r343562 - head/sys/dev/nvd

Alexander Motin mav at FreeBSD.org
Tue Jan 29 20:35:11 UTC 2019


Author: mav
Date: Tue Jan 29 20:35:09 2019
New Revision: 343562
URL: https://svnweb.freebsd.org/changeset/base/343562

Log:
  Reimplement BIO_ORDERED handling in nvd(4).
  
  This fixes BIO_ORDERED semantics while also improving performance by:
   - sleeping also before BIO_ORDERED bio, as defined, not only after;
   - not queueing BIO_ORDERED bio to taskqueue if no other bios running;
   - waking up sleeping taskqueue explicitly rather then rely on polling.
  
  On Samsung SSD 970 PRO this shows sync write latency, measured with
  `diskinfo -wS`, reduction from ~2ms to ~1.1ms by not sleeping without
  reason till next HZ tick.
  
  On the same device ZFS pool with 8 ZVOLs synchronously writing 4KB blocks
  shows ~950 IOPS instead of ~750 IOPS before.  I suspect ZFS does not need
  BIO_ORDERED on BIO_FLUSH at all, but that will be next question.
  
  MFC after:	2 weeks
  Sponsored by:	iXsystems, Inc.

Modified:
  head/sys/dev/nvd/nvd.c

Modified: head/sys/dev/nvd/nvd.c
==============================================================================
--- head/sys/dev/nvd/nvd.c	Tue Jan 29 20:10:27 2019	(r343561)
+++ head/sys/dev/nvd/nvd.c	Tue Jan 29 20:35:09 2019	(r343562)
@@ -82,6 +82,7 @@ struct nvd_disk {
 	struct nvme_namespace	*ns;
 
 	uint32_t		cur_depth;
+#define	NVD_ODEPTH	(1 << 31)
 	uint32_t		ordered_in_flight;
 	u_int			unit;
 
@@ -181,39 +182,50 @@ nvd_unload()
 	mtx_destroy(&nvd_lock);
 }
 
-static int
+static void
 nvd_bio_submit(struct nvd_disk *ndisk, struct bio *bp)
 {
 	int err;
 
 	bp->bio_driver1 = NULL;
-	atomic_add_int(&ndisk->cur_depth, 1);
+	if (__predict_false(bp->bio_flags & BIO_ORDERED))
+		atomic_add_int(&ndisk->cur_depth, NVD_ODEPTH);
+	else
+		atomic_add_int(&ndisk->cur_depth, 1);
 	err = nvme_ns_bio_process(ndisk->ns, bp, nvd_done);
 	if (err) {
-		atomic_add_int(&ndisk->cur_depth, -1);
-		if (__predict_false(bp->bio_flags & BIO_ORDERED))
+		if (__predict_false(bp->bio_flags & BIO_ORDERED)) {
+			atomic_add_int(&ndisk->cur_depth, -NVD_ODEPTH);
 			atomic_add_int(&ndisk->ordered_in_flight, -1);
+			wakeup(&ndisk->cur_depth);
+		} else {
+			if (atomic_fetchadd_int(&ndisk->cur_depth, -1) == 1 &&
+			    __predict_false(ndisk->ordered_in_flight != 0))
+				wakeup(&ndisk->cur_depth);
+		}
 		bp->bio_error = err;
 		bp->bio_flags |= BIO_ERROR;
 		bp->bio_resid = bp->bio_bcount;
 		biodone(bp);
-		return (-1);
 	}
-
-	return (0);
 }
 
 static void
 nvd_strategy(struct bio *bp)
 {
-	struct nvd_disk *ndisk;
+	struct nvd_disk *ndisk = (struct nvd_disk *)bp->bio_disk->d_drv1;
 
-	ndisk = (struct nvd_disk *)bp->bio_disk->d_drv1;
-
-	if (__predict_false(bp->bio_flags & BIO_ORDERED))
-		atomic_add_int(&ndisk->ordered_in_flight, 1);
-
-	if (__predict_true(ndisk->ordered_in_flight == 0)) {
+	/*
+	 * bio with BIO_ORDERED flag must be executed after all previous
+	 * bios in the queue, and before any successive bios.
+	 */
+	if (__predict_false(bp->bio_flags & BIO_ORDERED)) {
+		if (atomic_fetchadd_int(&ndisk->ordered_in_flight, 1) == 0 &&
+		    ndisk->cur_depth == 0 && bioq_first(&ndisk->bioq) == NULL) {
+			nvd_bio_submit(ndisk, bp);
+			return;
+		}
+	} else if (__predict_true(ndisk->ordered_in_flight == 0)) {
 		nvd_bio_submit(ndisk, bp);
 		return;
 	}
@@ -281,28 +293,27 @@ nvd_ioctl(struct disk *ndisk, u_long cmd, void *data, 
 static int
 nvd_dump(void *arg, void *virt, vm_offset_t phys, off_t offset, size_t len)
 {
-	struct nvd_disk *ndisk;
-	struct disk *dp;
+	struct disk *dp = arg;
+	struct nvd_disk *ndisk = dp->d_drv1;
 
-	dp = arg;
-	ndisk = dp->d_drv1;
-
 	return (nvme_ns_dump(ndisk->ns, virt, offset, len));
 }
 
 static void
 nvd_done(void *arg, const struct nvme_completion *cpl)
 {
-	struct bio *bp;
-	struct nvd_disk *ndisk;
+	struct bio *bp = (struct bio *)arg;
+	struct nvd_disk *ndisk = bp->bio_disk->d_drv1;
 
-	bp = (struct bio *)arg;
-
-	ndisk = bp->bio_disk->d_drv1;
-
-	atomic_add_int(&ndisk->cur_depth, -1);
-	if (__predict_false(bp->bio_flags & BIO_ORDERED))
+	if (__predict_false(bp->bio_flags & BIO_ORDERED)) {
+		atomic_add_int(&ndisk->cur_depth, -NVD_ODEPTH);
 		atomic_add_int(&ndisk->ordered_in_flight, -1);
+		wakeup(&ndisk->cur_depth);
+	} else {
+		if (atomic_fetchadd_int(&ndisk->cur_depth, -1) == 1 &&
+		    __predict_false(ndisk->ordered_in_flight != 0))
+			wakeup(&ndisk->cur_depth);
+	}
 
 	biodone(bp);
 }
@@ -320,22 +331,23 @@ nvd_bioq_process(void *arg, int pending)
 		if (bp == NULL)
 			break;
 
-		if (nvd_bio_submit(ndisk, bp) != 0) {
-			continue;
+		if (__predict_false(bp->bio_flags & BIO_ORDERED)) {
+			/*
+			 * bio with BIO_ORDERED flag set must be executed
+			 * after all previous bios.
+			 */
+			while (ndisk->cur_depth > 0)
+				tsleep(&ndisk->cur_depth, 0, "nvdorb", 1);
+		} else {
+			/*
+			 * bio with BIO_ORDERED flag set must be completed
+			 * before proceeding with additional bios.
+			 */
+			while (ndisk->cur_depth >= NVD_ODEPTH)
+				tsleep(&ndisk->cur_depth, 0, "nvdora", 1);
 		}
 
-#ifdef BIO_ORDERED
-		/*
-		 * BIO_ORDERED flag dictates that the bio with BIO_ORDERED
-		 *  flag set must be completed before proceeding with
-		 *  additional bios.
-		 */
-		if (bp->bio_flags & BIO_ORDERED) {
-			while (ndisk->cur_depth > 0) {
-				pause("nvd flush", 1);
-			}
-		}
-#endif
+		nvd_bio_submit(ndisk, bp);
 	}
 }
 


More information about the svn-src-head mailing list