svn commit: r341549 - head/sys/dev/mlx4/mlx4_en

Slava Shwartsman slavash at FreeBSD.org
Wed Dec 5 13:32:48 UTC 2018


Author: slavash
Date: Wed Dec  5 13:32:46 2018
New Revision: 341549
URL: https://svnweb.freebsd.org/changeset/base/341549

Log:
  mlx4en: Add support for receiving all data using one or more MCLBYTES sized mbufs.
  Also when the MTU is greater than MCLBYTES.
  
  Submitted by:   hselasky@
  Approved by:    hselasky (mentor)
  MFC after:      1 week
  Sponsored by:   Mellanox Technologies

Modified:
  head/sys/dev/mlx4/mlx4_en/en.h
  head/sys/dev/mlx4/mlx4_en/mlx4_en_netdev.c
  head/sys/dev/mlx4/mlx4_en/mlx4_en_rx.c

Modified: head/sys/dev/mlx4/mlx4_en/en.h
==============================================================================
--- head/sys/dev/mlx4/mlx4_en/en.h	Wed Dec  5 13:32:15 2018	(r341548)
+++ head/sys/dev/mlx4/mlx4_en/en.h	Wed Dec  5 13:32:46 2018	(r341549)
@@ -75,6 +75,15 @@
 #define MAX_RX_RINGS		128
 #define MIN_RX_RINGS		4
 #define TXBB_SIZE		64
+
+#ifndef MLX4_EN_MAX_RX_SEGS
+#define	MLX4_EN_MAX_RX_SEGS 1	/* or 8 */
+#endif
+
+#ifndef MLX4_EN_MAX_RX_BYTES
+#define	MLX4_EN_MAX_RX_BYTES MCLBYTES
+#endif
+
 #define HEADROOM		(2048 / TXBB_SIZE + 1)
 #define INIT_OWNER_BIT		0xffffffff
 #define STAMP_STRIDE		64
@@ -297,10 +306,12 @@ struct mlx4_en_tx_ring {
 };
 
 struct mlx4_en_rx_desc {
-	/* actual number of entries depends on rx ring stride */
-	struct mlx4_wqe_data_seg data[0];
+	struct mlx4_wqe_data_seg data[MLX4_EN_MAX_RX_SEGS];
 };
 
+/* the size of the structure above must be power of two */
+CTASSERT(powerof2(sizeof(struct mlx4_en_rx_desc)));
+
 struct mlx4_en_rx_mbuf {
 	bus_dmamap_t dma_map;
 	struct mbuf *mbuf;
@@ -309,7 +320,7 @@ struct mlx4_en_rx_mbuf {
 struct mlx4_en_rx_spare {
 	bus_dmamap_t dma_map;
 	struct mbuf *mbuf;
-	u64 paddr_be;
+	bus_dma_segment_t segs[MLX4_EN_MAX_RX_SEGS];
 };
 
 struct mlx4_en_rx_ring {
@@ -319,7 +330,6 @@ struct mlx4_en_rx_ring {
 	u32 size ;	/* number of Rx descs*/
 	u32 actual_size;
 	u32 size_mask;
-	u16 stride;
 	u16 log_stride;
 	u16 cqn;	/* index of port CQ associated with this ring */
 	u32 prod;
@@ -327,6 +337,7 @@ struct mlx4_en_rx_ring {
 	u32 buf_size;
 	u8  fcs_del;
 	u32 rx_mb_size;
+	u32 rx_mr_key_be;
 	int qpn;
 	u8 *buf;
 	struct mlx4_en_rx_mbuf *mbuf;
@@ -559,7 +570,6 @@ struct mlx4_en_priv {
 	int registered;
 	int gone;
 	int allocated;
-	int stride;
 	unsigned char current_mac[ETH_ALEN + 2];
         u64 mac;
 	int mac_index;
@@ -805,7 +815,7 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
 			   u32 size, int node);
 void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
 			     struct mlx4_en_rx_ring **pring,
-			     u32 size, u16 stride);
+			     u32 size);
 void mlx4_en_rx_que(void *context, int pending);
 int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv);
 void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,

Modified: head/sys/dev/mlx4/mlx4_en/mlx4_en_netdev.c
==============================================================================
--- head/sys/dev/mlx4/mlx4_en/mlx4_en_netdev.c	Wed Dec  5 13:32:15 2018	(r341548)
+++ head/sys/dev/mlx4/mlx4_en/mlx4_en_netdev.c	Wed Dec  5 13:32:46 2018	(r341549)
@@ -1683,7 +1683,7 @@ void mlx4_en_free_resources(struct mlx4_en_priv *priv)
 	for (i = 0; i < priv->rx_ring_num; i++) {
 		if (priv->rx_ring[i])
 			mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i],
-				priv->prof->rx_ring_size, priv->stride);
+				priv->prof->rx_ring_size);
 		if (priv->rx_cq[i])
 			mlx4_en_destroy_cq(priv, &priv->rx_cq[i]);
 	}
@@ -1734,8 +1734,7 @@ err:
 	for (i = 0; i < priv->rx_ring_num; i++) {
 		if (priv->rx_ring[i])
 			mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i],
-						prof->rx_ring_size,
-						priv->stride);
+						prof->rx_ring_size);
 		if (priv->rx_cq[i])
 			mlx4_en_destroy_cq(priv, &priv->rx_cq[i]);
 	}
@@ -2236,9 +2235,6 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int 
                 err = -EINVAL;
                 goto out;
         }
-
-	priv->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
-					  DS_SIZE);
 
 	mlx4_en_sysctl_conf(priv);
 

Modified: head/sys/dev/mlx4/mlx4_en/mlx4_en_rx.c
==============================================================================
--- head/sys/dev/mlx4/mlx4_en/mlx4_en_rx.c	Wed Dec  5 13:32:15 2018	(r341548)
+++ head/sys/dev/mlx4/mlx4_en/mlx4_en_rx.c	Wed Dec  5 13:32:46 2018	(r341549)
@@ -44,14 +44,13 @@
 
 #include "en.h"
 
-
+#if (MLX4_EN_MAX_RX_SEGS == 1)
 static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv,
 				 struct mlx4_en_rx_ring *ring,
 				 int index)
 {
-	struct mlx4_en_rx_desc *rx_desc = (struct mlx4_en_rx_desc *)
-	    (ring->buf + (ring->stride * index));
-	int possible_frags;
+	struct mlx4_en_rx_desc *rx_desc =
+	    ((struct mlx4_en_rx_desc *)ring->buf) + index;
 	int i;
 
 	/* Set size and memtype fields */
@@ -63,38 +62,75 @@ static void mlx4_en_init_rx_desc(struct mlx4_en_priv *
 	 * stride, remaining (unused) fragments must be padded with
 	 * null address/size and a special memory key:
 	 */
-	possible_frags = (ring->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE;
-	for (i = 1; i < possible_frags; i++) {
+	for (i = 1; i < MLX4_EN_MAX_RX_SEGS; i++) {
 		rx_desc->data[i].byte_count = 0;
 		rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD);
 		rx_desc->data[i].addr = 0;
 	}
 }
+#endif
 
+static inline struct mbuf *
+mlx4_en_alloc_mbuf(struct mlx4_en_rx_ring *ring)
+{
+	struct mbuf *mb;
+
+#if (MLX4_EN_MAX_RX_SEGS == 1)
+        mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, ring->rx_mb_size);
+        if (likely(mb != NULL))
+		mb->m_pkthdr.len = mb->m_len = ring->rx_mb_size;
+#else
+	mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MLX4_EN_MAX_RX_BYTES);
+	if (likely(mb != NULL)) {
+		struct mbuf *mb_head = mb;
+		int i;
+
+		mb->m_len = MLX4_EN_MAX_RX_BYTES;
+		mb->m_pkthdr.len = MLX4_EN_MAX_RX_BYTES;
+
+		for (i = 1; i != MLX4_EN_MAX_RX_SEGS; i++) {
+			if (mb_head->m_pkthdr.len >= ring->rx_mb_size)
+				break;
+			mb = (mb->m_next = m_getjcl(M_NOWAIT, MT_DATA, 0, MLX4_EN_MAX_RX_BYTES));
+			if (unlikely(mb == NULL)) {
+				m_freem(mb_head);
+				return (NULL);
+			}
+			mb->m_len = MLX4_EN_MAX_RX_BYTES;
+			mb_head->m_pkthdr.len += MLX4_EN_MAX_RX_BYTES;
+		}
+		/* rewind to first mbuf in chain */
+		mb = mb_head;
+	}
+#endif
+	return (mb);
+}
+
 static int
-mlx4_en_alloc_buf(struct mlx4_en_rx_ring *ring,
-     __be64 *pdma, struct mlx4_en_rx_mbuf *mb_list)
+mlx4_en_alloc_buf(struct mlx4_en_rx_ring *ring, struct mlx4_en_rx_desc *rx_desc,
+    struct mlx4_en_rx_mbuf *mb_list)
 {
-	bus_dma_segment_t segs[1];
+	bus_dma_segment_t segs[MLX4_EN_MAX_RX_SEGS];
 	bus_dmamap_t map;
 	struct mbuf *mb;
 	int nsegs;
 	int err;
+#if (MLX4_EN_MAX_RX_SEGS != 1)
+	int i;
+#endif
 
 	/* try to allocate a new spare mbuf */
 	if (unlikely(ring->spare.mbuf == NULL)) {
-		mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, ring->rx_mb_size);
+		mb = mlx4_en_alloc_mbuf(ring);
 		if (unlikely(mb == NULL))
 			return (-ENOMEM);
-		/* setup correct length */
-		mb->m_pkthdr.len = mb->m_len = ring->rx_mb_size;
 
 		/* make sure IP header gets aligned */
 		m_adj(mb, MLX4_NET_IP_ALIGN);
 
 		/* load spare mbuf into BUSDMA */
 		err = -bus_dmamap_load_mbuf_sg(ring->dma_tag, ring->spare.dma_map,
-		    mb, segs, &nsegs, BUS_DMA_NOWAIT);
+		    mb, ring->spare.segs, &nsegs, BUS_DMA_NOWAIT);
 		if (unlikely(err != 0)) {
 			m_freem(mb);
 			return (err);
@@ -102,8 +138,14 @@ mlx4_en_alloc_buf(struct mlx4_en_rx_ring *ring,
 
 		/* store spare info */
 		ring->spare.mbuf = mb;
-		ring->spare.paddr_be = cpu_to_be64(segs[0].ds_addr);
 
+#if (MLX4_EN_MAX_RX_SEGS != 1)
+		/* zero remaining segs */
+		for (i = nsegs; i != MLX4_EN_MAX_RX_SEGS; i++) {
+			ring->spare.segs[i].ds_addr = 0;
+			ring->spare.segs[i].ds_len = 0;
+		}
+#endif
 		bus_dmamap_sync(ring->dma_tag, ring->spare.dma_map,
 		    BUS_DMASYNC_PREREAD);
 	}
@@ -115,13 +157,10 @@ mlx4_en_alloc_buf(struct mlx4_en_rx_ring *ring,
 		bus_dmamap_unload(ring->dma_tag, mb_list->dma_map);
 	}
 
-	mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, ring->rx_mb_size);
+	mb = mlx4_en_alloc_mbuf(ring);
 	if (unlikely(mb == NULL))
 		goto use_spare;
 
-	/* setup correct length */
-	mb->m_pkthdr.len = mb->m_len = ring->rx_mb_size;
-
 	/* make sure IP header gets aligned */
 	m_adj(mb, MLX4_NET_IP_ALIGN);
 
@@ -132,7 +171,20 @@ mlx4_en_alloc_buf(struct mlx4_en_rx_ring *ring,
 		goto use_spare;
 	}
 
-	*pdma = cpu_to_be64(segs[0].ds_addr);
+#if (MLX4_EN_MAX_RX_SEGS == 1)
+	rx_desc->data[0].addr = cpu_to_be64(segs[0].ds_addr);
+#else
+	for (i = 0; i != nsegs; i++) {
+		rx_desc->data[i].byte_count = cpu_to_be32(segs[i].ds_len);
+		rx_desc->data[i].lkey = ring->rx_mr_key_be;
+		rx_desc->data[i].addr = cpu_to_be64(segs[i].ds_addr);
+	}
+	for (; i != MLX4_EN_MAX_RX_SEGS; i++) {
+		rx_desc->data[i].byte_count = 0;
+		rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD);
+		rx_desc->data[i].addr = 0;
+	}
+#endif
 	mb_list->mbuf = mb;
 
 	bus_dmamap_sync(ring->dma_tag, mb_list->dma_map, BUS_DMASYNC_PREREAD);
@@ -149,7 +201,21 @@ use_spare:
 	ring->spare.mbuf = NULL;
 
 	/* store physical address */
-	*pdma = ring->spare.paddr_be;
+#if (MLX4_EN_MAX_RX_SEGS == 1)
+	rx_desc->data[0].addr = cpu_to_be64(ring->spare.segs[0].ds_addr);
+#else
+	for (i = 0; i != MLX4_EN_MAX_RX_SEGS; i++) {
+		if (ring->spare.segs[i].ds_len != 0) {
+			rx_desc->data[i].byte_count = cpu_to_be32(ring->spare.segs[i].ds_len);
+			rx_desc->data[i].lkey = ring->rx_mr_key_be;
+			rx_desc->data[i].addr = cpu_to_be64(ring->spare.segs[i].ds_addr);
+		} else {
+			rx_desc->data[i].byte_count = 0;
+			rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD);
+			rx_desc->data[i].addr = 0;
+		}
+	}
+#endif
 	return (0);
 }
 
@@ -167,13 +233,13 @@ static int
 mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
     struct mlx4_en_rx_ring *ring, int index)
 {
-	struct mlx4_en_rx_desc *rx_desc = (struct mlx4_en_rx_desc *)
-	    (ring->buf + (index * ring->stride));
+	struct mlx4_en_rx_desc *rx_desc =
+	    ((struct mlx4_en_rx_desc *)ring->buf) + index;
 	struct mlx4_en_rx_mbuf *mb_list = ring->mbuf + index;
 
 	mb_list->mbuf = NULL;
 
-	if (mlx4_en_alloc_buf(ring, &rx_desc->data[0].addr, mb_list)) {
+	if (mlx4_en_alloc_buf(ring, rx_desc, mb_list)) {
 		priv->port_stats.rx_alloc_failed++;
 		return (-ENOMEM);
 	}
@@ -321,7 +387,7 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
 	    MJUM16BYTES,		/* maxsize */
-	    1,				/* nsegments */
+	    MLX4_EN_MAX_RX_SEGS,	/* nsegments */
 	    MJUM16BYTES,		/* maxsegsize */
 	    0,				/* flags */
 	    NULL, NULL,			/* lockfunc, lockfuncarg */
@@ -334,11 +400,10 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
 	ring->cons = 0;
 	ring->size = size;
 	ring->size_mask = size - 1;
-	ring->stride = roundup_pow_of_two(
-	    sizeof(struct mlx4_en_rx_desc) + DS_SIZE);
-	ring->log_stride = ffs(ring->stride) - 1;
-	ring->buf_size = ring->size * ring->stride + TXBB_SIZE;
 
+	ring->log_stride = ilog2(sizeof(struct mlx4_en_rx_desc));
+	ring->buf_size = (ring->size * sizeof(struct mlx4_en_rx_desc)) + TXBB_SIZE;
+
 	tmp = size * sizeof(struct mlx4_en_rx_mbuf);
 
         ring->mbuf = kzalloc(tmp, GFP_KERNEL);
@@ -398,11 +463,11 @@ err_ring:
 int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv)
 {
 	struct mlx4_en_rx_ring *ring;
+#if (MLX4_EN_MAX_RX_SEGS == 1)
 	int i;
+#endif
 	int ring_ind;
 	int err;
-	int stride = roundup_pow_of_two(
-	    sizeof(struct mlx4_en_rx_desc) + DS_SIZE);
 
 	for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
 		ring = priv->rx_ring[ring_ind];
@@ -413,8 +478,7 @@ int mlx4_en_activate_rx_rings(struct mlx4_en_priv *pri
 		ring->cqn = priv->rx_cq[ring_ind]->mcq.cqn;
                 ring->rx_mb_size = priv->rx_mb_size;
 
-		ring->stride = stride;
-		if (ring->stride <= TXBB_SIZE) {
+		if (sizeof(struct mlx4_en_rx_desc) <= TXBB_SIZE) {
 			/* Stamp first unused send wqe */
 			__be32 *ptr = (__be32 *)ring->buf;
 			__be32 stamp = cpu_to_be32(1 << STAMP_SHIFT);
@@ -423,15 +487,18 @@ int mlx4_en_activate_rx_rings(struct mlx4_en_priv *pri
 			ring->buf += TXBB_SIZE;
 		}
 
-		ring->log_stride = ffs(ring->stride) - 1;
-		ring->buf_size = ring->size * ring->stride;
+		ring->log_stride = ilog2(sizeof(struct mlx4_en_rx_desc));
+		ring->buf_size = ring->size * sizeof(struct mlx4_en_rx_desc);
 
 		memset(ring->buf, 0, ring->buf_size);
 		mlx4_en_update_rx_prod_db(ring);
 
+#if (MLX4_EN_MAX_RX_SEGS == 1)
 		/* Initialize all descriptors */
 		for (i = 0; i < ring->size; i++)
 			mlx4_en_init_rx_desc(priv, ring, i);
+#endif
+		ring->rx_mr_key_be = cpu_to_be32(priv->mdev->mr.key);
 
 #ifdef INET
 		/* Configure lro mngr */
@@ -466,7 +533,7 @@ err_buffers:
 
 	while (ring_ind >= 0) {
 		ring = priv->rx_ring[ring_ind];
-		if (ring->stride <= TXBB_SIZE)
+		if (sizeof(struct mlx4_en_rx_desc) <= TXBB_SIZE)
 			ring->buf -= TXBB_SIZE;
 		ring_ind--;
 	}
@@ -477,14 +544,14 @@ err_buffers:
 
 void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
 			     struct mlx4_en_rx_ring **pring,
-			     u32 size, u16 stride)
+			     u32 size)
 {
 	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_en_rx_ring *ring = *pring;
 	uint32_t x;
 
 	mlx4_en_unmap_buffer(&ring->wqres.buf);
-	mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
+	mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * sizeof(struct mlx4_en_rx_desc) + TXBB_SIZE);
 	for (x = 0; x != size; x++)
 		bus_dmamap_destroy(ring->dma_tag, ring->mbuf[x].dma_map);
 	/* free spare mbuf, if any */
@@ -511,7 +578,7 @@ void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *p
 	tcp_lro_free(&ring->lro);
 #endif
 	mlx4_en_free_rx_buf(priv, ring);
-	if (ring->stride <= TXBB_SIZE)
+	if (sizeof(struct mlx4_en_rx_desc) <= TXBB_SIZE)
 		ring->buf -= TXBB_SIZE;
 }
 
@@ -557,21 +624,43 @@ mlx4_en_rx_mb(struct mlx4_en_priv *priv, struct mlx4_e
     struct mlx4_en_rx_desc *rx_desc, struct mlx4_en_rx_mbuf *mb_list,
     int length)
 {
+#if (MLX4_EN_MAX_RX_SEGS != 1)
+	struct mbuf *mb_head;
+#endif
 	struct mbuf *mb;
 
 	/* get mbuf */
 	mb = mb_list->mbuf;
 
 	/* collect used fragment while atomically replacing it */
-	if (mlx4_en_alloc_buf(ring, &rx_desc->data[0].addr, mb_list))
+	if (mlx4_en_alloc_buf(ring, rx_desc, mb_list))
 		return (NULL);
 
 	/* range check hardware computed value */
-	if (unlikely(length > mb->m_len))
-		length = mb->m_len;
+	if (unlikely(length > mb->m_pkthdr.len))
+		length = mb->m_pkthdr.len;
 
+#if (MLX4_EN_MAX_RX_SEGS == 1)
 	/* update total packet length in packet header */
 	mb->m_len = mb->m_pkthdr.len = length;
+#else
+	mb->m_pkthdr.len = length;
+	for (mb_head = mb; mb != NULL; mb = mb->m_next) {
+		if (mb->m_len > length)
+			mb->m_len = length;
+		length -= mb->m_len;
+		if (likely(length == 0)) {
+			if (likely(mb->m_next != NULL)) {
+				/* trim off empty mbufs */
+				m_freem(mb->m_next);
+				mb->m_next = NULL;
+			}
+			break;
+		}
+	}
+	/* rewind to first mbuf in chain */
+	mb = mb_head;
+#endif
 	return (mb);
 }
 
@@ -660,8 +749,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, stru
 	while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
 		    cons_index & size)) {
 		mb_list = ring->mbuf + index;
-		rx_desc = (struct mlx4_en_rx_desc *)
-		    (ring->buf + (index << ring->log_stride));
+		rx_desc = ((struct mlx4_en_rx_desc *)ring->buf) + index;
 
 		/*
 		 * make sure we read the CQE after we read the ownership bit
@@ -830,7 +918,7 @@ static int mlx4_en_config_rss_qp(struct mlx4_en_priv *
 	qp->event = mlx4_en_sqp_event;
 
 	memset(context, 0, sizeof *context);
-	mlx4_en_fill_qp_context(priv, ring->actual_size, ring->stride, 0, 0,
+	mlx4_en_fill_qp_context(priv, ring->actual_size, sizeof(struct mlx4_en_rx_desc), 0, 0,
 				qpn, ring->cqn, -1, context);
 	context->db_rec_addr = cpu_to_be64(ring->wqres.db.dma);
 


More information about the svn-src-head mailing list