svn commit: r256151 - in head/sys/dev: nvd nvme

Jim Harris jimharris at FreeBSD.org
Tue Oct 8 15:44:05 UTC 2013


Author: jimharris
Date: Tue Oct  8 15:44:04 2013
New Revision: 256151
URL: http://svnweb.freebsd.org/changeset/base/256151

Log:
  Add driver-assisted striping for upcoming Intel NVMe controllers that can
  benefit from it.
  
  Sponsored by:	Intel
  Reviewed by:	kib (earlier version), carl
  Approved by:	re (hrs)
  MFC after:	1 week

Modified:
  head/sys/dev/nvd/nvd.c
  head/sys/dev/nvme/nvme.h
  head/sys/dev/nvme/nvme_ns.c
  head/sys/dev/nvme/nvme_private.h

Modified: head/sys/dev/nvd/nvd.c
==============================================================================
--- head/sys/dev/nvd/nvd.c	Tue Oct  8 14:10:32 2013	(r256150)
+++ head/sys/dev/nvd/nvd.c	Tue Oct  8 15:44:04 2013	(r256151)
@@ -187,17 +187,6 @@ nvd_done(void *arg, const struct nvme_co
 
 	atomic_add_int(&ndisk->cur_depth, -1);
 
-	/*
-	 * TODO: add more extensive translation of NVMe status codes
-	 *  to different bio error codes (i.e. EIO, EINVAL, etc.)
-	 */
-	if (nvme_completion_is_error(cpl)) {
-		bp->bio_error = EIO;
-		bp->bio_flags |= BIO_ERROR;
-		bp->bio_resid = bp->bio_bcount;
-	} else
-		bp->bio_resid = 0;
-
 	biodone(bp);
 }
 

Modified: head/sys/dev/nvme/nvme.h
==============================================================================
--- head/sys/dev/nvme/nvme.h	Tue Oct  8 14:10:32 2013	(r256150)
+++ head/sys/dev/nvme/nvme.h	Tue Oct  8 15:44:04 2013	(r256151)
@@ -535,7 +535,7 @@ struct nvme_controller_data {
 	uint8_t			reserved6[1024];
 
 	/* bytes 3072-4095: vendor specific */
-	uint8_t			reserved7[1024];
+	uint8_t			vs[1024];
 } __packed __aligned(4);
 
 struct nvme_namespace_data {

Modified: head/sys/dev/nvme/nvme_ns.c
==============================================================================
--- head/sys/dev/nvme/nvme_ns.c	Tue Oct  8 14:10:32 2013	(r256150)
+++ head/sys/dev/nvme/nvme_ns.c	Tue Oct  8 15:44:04 2013	(r256151)
@@ -34,13 +34,31 @@ __FBSDID("$FreeBSD$");
 #include <sys/disk.h>
 #include <sys/fcntl.h>
 #include <sys/ioccom.h>
+#include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/proc.h>
 
 #include <dev/pci/pcivar.h>
 
+#include <geom/geom.h>
+
 #include "nvme_private.h"
 
+static void		nvme_bio_child_inbed(struct bio *parent, int bio_error);
+static void		nvme_bio_child_done(void *arg,
+					    const struct nvme_completion *cpl);
+static uint32_t		nvme_get_num_segments(uint64_t addr, uint64_t size,
+					      uint32_t alignment);
+static void		nvme_free_child_bios(int num_bios,
+					     struct bio **child_bios);
+static struct bio **	nvme_allocate_child_bios(int num_bios);
+static struct bio **	nvme_construct_child_bios(struct bio *bp,
+						  uint32_t alignment,
+						  int *num_bios);
+static int		nvme_ns_split_bio(struct nvme_namespace *ns,
+					  struct bio *bp,
+					  uint32_t alignment);
+
 static int
 nvme_ns_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag,
     struct thread *td)
@@ -202,18 +220,218 @@ nvme_ns_bio_done(void *arg, const struct
 	if (bp->bio_driver2)
 		free(bp->bio_driver2, M_NVME);
 
+	if (nvme_completion_is_error(status)) {
+		bp->bio_flags |= BIO_ERROR;
+		if (bp->bio_error == 0)
+			bp->bio_error = EIO;
+	}
+
+	if ((bp->bio_flags & BIO_ERROR) == 0)
+		bp->bio_resid = 0;
+	else
+		bp->bio_resid = bp->bio_bcount;
+
 	bp_cb_fn(bp, status);
 }
 
+static void
+nvme_bio_child_inbed(struct bio *parent, int bio_error)
+{
+	struct nvme_completion	parent_cpl;
+	int			inbed;
+
+	if (bio_error != 0) {
+		parent->bio_flags |= BIO_ERROR;
+		parent->bio_error = bio_error;
+	}
+
+	/*
+	 * atomic_fetchadd will return value before adding 1, so we still
+	 *  must add 1 to get the updated inbed number.
+	 */
+	inbed = atomic_fetchadd_int(&parent->bio_inbed, 1) + 1;
+	if (inbed == parent->bio_children) {
+		bzero(&parent_cpl, sizeof(parent_cpl));
+		if (parent->bio_flags & BIO_ERROR)
+			parent_cpl.status.sc = NVME_SC_DATA_TRANSFER_ERROR;
+		nvme_ns_bio_done(parent, &parent_cpl);
+	}
+}
+
+static void
+nvme_bio_child_done(void *arg, const struct nvme_completion *cpl)
+{
+	struct bio		*child = arg;
+	struct bio		*parent;
+	int			bio_error;
+
+	parent = child->bio_parent;
+	g_destroy_bio(child);
+	bio_error = nvme_completion_is_error(cpl) ? EIO : 0;
+	nvme_bio_child_inbed(parent, bio_error);
+}
+
+static uint32_t
+nvme_get_num_segments(uint64_t addr, uint64_t size, uint32_t align)
+{
+	uint32_t	num_segs, offset, remainder;
+
+	if (align == 0)
+		return (1);
+
+	KASSERT((align & (align - 1)) == 0, ("alignment not power of 2\n"));
+
+	num_segs = size / align;
+	remainder = size & (align - 1);
+	offset = addr & (align - 1);
+	if (remainder > 0 || offset > 0)
+		num_segs += 1 + (remainder + offset - 1) / align;
+	return (num_segs);
+}
+
+static void
+nvme_free_child_bios(int num_bios, struct bio **child_bios)
+{
+	int i;
+
+	for (i = 0; i < num_bios; i++) {
+		if (child_bios[i] != NULL)
+			g_destroy_bio(child_bios[i]);
+	}
+
+	free(child_bios, M_NVME);
+}
+
+static struct bio **
+nvme_allocate_child_bios(int num_bios)
+{
+	struct bio **child_bios;
+	int err = 0, i;
+
+	child_bios = malloc(num_bios * sizeof(struct bio *), M_NVME, M_NOWAIT);
+	if (child_bios == NULL)
+		return (NULL);
+
+	for (i = 0; i < num_bios; i++) {
+		child_bios[i] = g_new_bio();
+		if (child_bios[i] == NULL)
+			err = ENOMEM;
+	}
+
+	if (err == ENOMEM) {
+		nvme_free_child_bios(num_bios, child_bios);
+		return (NULL);
+	}
+
+	return (child_bios);
+}
+
+static struct bio **
+nvme_construct_child_bios(struct bio *bp, uint32_t alignment, int *num_bios)
+{
+	struct bio	**child_bios;
+	struct bio	*child;
+	uint64_t	cur_offset;
+	caddr_t		data;
+	uint32_t	rem_bcount;
+	int		i;
+#ifdef NVME_UNMAPPED_BIO_SUPPORT
+	struct vm_page	**ma;
+	uint32_t	ma_offset;
+#endif
+
+	*num_bios = nvme_get_num_segments(bp->bio_offset, bp->bio_bcount,
+	    alignment);
+	child_bios = nvme_allocate_child_bios(*num_bios);
+	if (child_bios == NULL)
+		return (NULL);
+
+	bp->bio_children = *num_bios;
+	bp->bio_inbed = 0;
+	cur_offset = bp->bio_offset;
+	rem_bcount = bp->bio_bcount;
+	data = bp->bio_data;
+#ifdef NVME_UNMAPPED_BIO_SUPPORT
+	ma_offset = bp->bio_ma_offset;
+	ma = bp->bio_ma;
+#endif
+
+	for (i = 0; i < *num_bios; i++) {
+		child = child_bios[i];
+		child->bio_parent = bp;
+		child->bio_cmd = bp->bio_cmd;
+		child->bio_offset = cur_offset;
+		child->bio_bcount = min(rem_bcount,
+		    alignment - (cur_offset & (alignment - 1)));
+		child->bio_flags = bp->bio_flags;
+#ifdef NVME_UNMAPPED_BIO_SUPPORT
+		if (bp->bio_flags & BIO_UNMAPPED) {
+			child->bio_ma_offset = ma_offset;
+			child->bio_ma = ma;
+			child->bio_ma_n =
+			    nvme_get_num_segments(child->bio_ma_offset,
+				child->bio_bcount, PAGE_SIZE);
+			ma_offset = (ma_offset + child->bio_bcount) &
+			    PAGE_MASK;
+			ma += child->bio_ma_n;
+			if (ma_offset != 0)
+				ma -= 1;
+		} else
+#endif
+		{
+			child->bio_data = data;
+			data += child->bio_bcount;
+		}
+		cur_offset += child->bio_bcount;
+		rem_bcount -= child->bio_bcount;
+	}
+
+	return (child_bios);
+}
+
+static int
+nvme_ns_split_bio(struct nvme_namespace *ns, struct bio *bp,
+    uint32_t alignment)
+{
+	struct bio	*child;
+	struct bio	**child_bios;
+	int		err, i, num_bios;
+
+	child_bios = nvme_construct_child_bios(bp, alignment, &num_bios);
+	if (child_bios == NULL)
+		return (ENOMEM);
+
+	for (i = 0; i < num_bios; i++) {
+		child = child_bios[i];
+		err = nvme_ns_bio_process(ns, child, nvme_bio_child_done);
+		if (err != 0) {
+			nvme_bio_child_inbed(bp, err);
+			g_destroy_bio(child);
+		}
+	}
+
+	free(child_bios, M_NVME);
+	return (0);
+}
+
 int
 nvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp,
 	nvme_cb_fn_t cb_fn)
 {
 	struct nvme_dsm_range	*dsm_range;
+	uint32_t		num_bios;
 	int			err;
 
 	bp->bio_driver1 = cb_fn;
 
+	if (ns->stripesize > 0 &&
+	    (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) {
+		num_bios = nvme_get_num_segments(bp->bio_offset,
+		    bp->bio_bcount, ns->stripesize);
+		if (num_bios > 1)
+			return (nvme_ns_split_bio(ns, bp, ns->stripesize));
+	}
+
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 		err = nvme_ns_cmd_read_bio(ns, bp, nvme_ns_bio_done, bp);
@@ -276,6 +494,11 @@ nvme_ns_construct(struct nvme_namespace 
 
 	ns->ctrlr = ctrlr;
 	ns->id = id;
+	ns->stripesize = 0;
+
+	if (pci_get_devid(ctrlr->dev) == 0x09538086 && ctrlr->cdata.vs[3] != 0)
+		ns->stripesize =
+		    (1 << ctrlr->cdata.vs[3]) * ctrlr->min_page_size;
 
 	/*
 	 * Namespaces are reconstructed after a controller reset, so check

Modified: head/sys/dev/nvme/nvme_private.h
==============================================================================
--- head/sys/dev/nvme/nvme_private.h	Tue Oct  8 14:10:32 2013	(r256150)
+++ head/sys/dev/nvme/nvme_private.h	Tue Oct  8 15:44:04 2013	(r256151)
@@ -238,6 +238,7 @@ struct nvme_namespace {
 	uint16_t			flags;
 	struct cdev			*cdev;
 	void				*cons_cookie[NVME_MAX_CONSUMERS];
+	uint32_t			stripesize;
 	struct mtx			lock;
 };
 


More information about the svn-src-head mailing list