PERFORCE change 117123 for review

Kip Macy kmacy at FreeBSD.org
Sun Apr 1 20:20:55 UTC 2007


http://perforce.freebsd.org/chv.cgi?CH=117123

Change 117123 by kmacy at kmacy_vt-x:opentoe_init on 2007/04/01 20:20:02

	add entry busdma tag to each each queue to allow mappings to be created in parallel
	track zone and type for rx freelist for simplifying later free
	convert rx freelists over to only using clusters, allocating the mbuf right before the 
	call to get_packet
	
	This change alleviates a good portion of the recent (last 2 weeks) 18% performance drop 
	in peak TCP throughput

Affected files ...

.. //depot/projects/opentoe/sys/dev/cxgb/cxgb_adapter.h#4 edit
.. //depot/projects/opentoe/sys/dev/cxgb/cxgb_sge.c#3 edit

Differences ...

==== //depot/projects/opentoe/sys/dev/cxgb/cxgb_adapter.h#4 (text+ko) ====

@@ -175,7 +175,9 @@
 	uint64_t	empty;
 	bus_dma_tag_t	desc_tag;
 	bus_dmamap_t	desc_map;
-	struct mtx      fl_locks[8];
+	bus_dma_tag_t	entry_tag;
+	uma_zone_t      zone;
+	int             type;
 };
 
 struct tx_desc;
@@ -201,6 +203,7 @@
 	uint64_t	restarts;
 	bus_dma_tag_t	desc_tag;
 	bus_dmamap_t	desc_map;
+	bus_dma_tag_t	entry_tag;
 	struct mtx      lock;
 };
      	

==== //depot/projects/opentoe/sys/dev/cxgb/cxgb_sge.c#3 (text+ko) ====

@@ -126,7 +126,7 @@
 };
 
 struct rx_sw_desc {                /* SW state per Rx descriptor */
-	struct mbuf	*m;
+	void            *cl;
 	bus_dmamap_t	map;
 	int		flags;
 };
@@ -137,6 +137,12 @@
 	unsigned int pidx;
 };
 
+struct refill_fl_cb_arg {
+	int               error;
+	bus_dma_segment_t seg;
+	int               nseg;
+};
+
 /*
  * Maps a number of flits to the number of Tx descriptors that can hold them.
  * The formula is
@@ -440,6 +446,16 @@
 	qs->rspq.polling = 0 /* p->polling */;
 }
 
+static void
+refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
+{
+	struct refill_fl_cb_arg *cb_arg = arg;
+	
+	cb_arg->error = error;
+	cb_arg->seg = segs[0];
+	cb_arg->nseg = nseg;
+
+}
 
 /**
  *	refill_fl - refill an SGE free-buffer list
@@ -453,41 +469,50 @@
 static void
 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
 {
-	bus_dma_segment_t seg;
 	struct rx_sw_desc *sd = &q->sdesc[q->pidx];
 	struct rx_desc *d = &q->desc[q->pidx];
-	struct mbuf *m;
-	int err, nsegs;
+	void *cl;
+	int err;
+	struct refill_fl_cb_arg cb_arg;
 
+	cb_arg.error = 0;
+	
 	while (n--) {
-		m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, q->buf_size);
+		/*
+		 * We only allocate a cluster, mbuf allocation happens after rx
+		 */
+		cl = m_cljget(NULL, M_DONTWAIT, q->buf_size);
 		
-		if (m == NULL) {
-			log(LOG_WARNING, "Failed to allocate mbuf\n");
+		if (cl == NULL) {
+			log(LOG_WARNING, "Failed to allocate cluster\n");
 			goto done;
 		}
 		
 		if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
-			if ((err = bus_dmamap_create(sc->rx_jumbo_dmat, 0, &sd->map))) {
+			if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
 				log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
+				/*
+				 * XXX free cluster 
+				 */
 				goto done;
 			}
 			sd->flags |= RX_SW_DESC_MAP_CREATED;
 		}
-		sd->flags |= RX_SW_DESC_INUSE;
 		
-		m->m_pkthdr.len = m->m_len = q->buf_size;
-		err = bus_dmamap_load_mbuf_sg(sc->rx_jumbo_dmat, sd->map, m, &seg,
-		    &nsegs, BUS_DMA_NOWAIT);
-		if (err != 0) {
-			log(LOG_WARNING, "failure in refill_fl %d\n", err);
-			m_freem(m);
+		err = bus_dmamap_load(q->entry_tag, sd->map, cl, q->buf_size, refill_fl_cb, &cb_arg, 0);
+
+		if (err || cb_arg.error) {
+			log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error);
+			/*
+			 * XXX free cluster 
+			 */			
 			return;
 		}
 
-		sd->m = m;
-		d->addr_lo = htobe32(seg.ds_addr & 0xffffffff);
-		d->addr_hi = htobe32(((uint64_t)seg.ds_addr >>32) & 0xffffffff);
+		sd->flags |= RX_SW_DESC_INUSE;
+		sd->cl = cl;
+		d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
+		d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
 		d->len_gen = htobe32(V_FLD_GEN1(q->gen));
 		d->gen2 = htobe32(V_FLD_GEN2(q->gen));
 
@@ -520,16 +545,17 @@
 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
 {
 	u_int cidx = q->cidx;
-
+	
 	while (q->credits--) {
 		struct rx_sw_desc *d = &q->sdesc[cidx];
 
 		if (d->flags & RX_SW_DESC_INUSE) {
-			bus_dmamap_unload(sc->rx_jumbo_dmat, d->map);
-			bus_dmamap_destroy(sc->rx_jumbo_dmat, d->map);
-			m_freem(d->m);
+			bus_dmamap_unload(q->entry_tag, d->map);
+			bus_dmamap_destroy(q->entry_tag, d->map);
+			uma_zfree(q->zone, d->cl);
 		}
-		d->m = NULL;
+		
+		d->cl = NULL;
 		if (++cidx == q->size)
 			cidx = 0;
 	}
@@ -552,8 +578,8 @@
 
 static int
 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
-	   bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
-	   bus_dmamap_t *map)
+           bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
+           bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
 {
 	size_t len = nelem * elem_size;
 	void *s = NULL;
@@ -584,6 +610,19 @@
 		bzero(s, len);
 		*(void **)sdesc = s;
 	}
+	
+	if (parent_entry_tag == NULL)
+		return (0);
+	    
+	if ((err = bus_dma_tag_create(parent_entry_tag, PAGE_SIZE, 0,
+				      BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
+		                      NULL, NULL, PAGE_SIZE, 1,
+				      PAGE_SIZE, BUS_DMA_ALLOCNOW,
+		                      NULL, NULL, entry_tag)) != 0) {
+		device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
+		return (ENOMEM);
+	}
+
 	return (0);
 }
 
@@ -1361,23 +1400,26 @@
 	
 	if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
 			      sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
-			      &q->fl[0].desc, &q->fl[0].sdesc,
-			      &q->fl[0].desc_tag, &q->fl[0].desc_map)) != 0) {
+		              &q->fl[0].desc, &q->fl[0].sdesc, 
+		              &q->fl[0].desc_tag, &q->fl[0].desc_map,
+		              sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
 		printf("error %d from alloc ring fl0\n", ret);
 		goto err;
 	}
 
 	if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
 			      sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
-			      &q->fl[1].desc, &q->fl[1].sdesc,
-			      &q->fl[1].desc_tag, &q->fl[1].desc_map)) != 0) {
+		              &q->fl[1].desc, &q->fl[1].sdesc, 
+		              &q->fl[1].desc_tag, &q->fl[1].desc_map,
+		              sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
 		printf("error %d from alloc ring fl1\n", ret);
 		goto err;
 	}
 
 	if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
-			      &q->rspq.phys_addr, &q->rspq.desc, NULL,
-			      &q->rspq.desc_tag, &q->rspq.desc_map)) != 0) {
+			      &q->rspq.phys_addr, &q->rspq.desc, NULL, 
+		              &q->rspq.desc_tag, &q->rspq.desc_map,
+		              NULL, NULL)) != 0) {
 		printf("error %d from alloc ring rspq\n", ret);
 		goto err;
 	}
@@ -1393,8 +1435,8 @@
 		if ((ret = alloc_ring(sc, p->txq_size[i],
 				      sizeof(struct tx_desc), sz,
 				      &q->txq[i].phys_addr, &q->txq[i].desc,
-				      &q->txq[i].sdesc, &q->txq[i].desc_tag,
-				      &q->txq[i].desc_map)) != 0) {
+			              &q->txq[i].sdesc, &q->txq[i].desc_tag, &q->txq[i].desc_map,
+			              sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
 			printf("error %d from alloc ring tx %i\n", ret, i);
 			goto err;
 		}
@@ -1416,7 +1458,13 @@
 	    flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
 
 	q->fl[0].buf_size = MCLBYTES;
+	q->fl[0].zone = zone_clust;
+	q->fl[0].type = EXT_CLUSTER;
+	
 	q->fl[1].buf_size = MJUMPAGESIZE;
+	q->fl[1].zone = zone_jumbop;
+	q->fl[1].type = EXT_JUMBOP;
+	
 	q->lro.enabled = lro_default;
 	
 	mtx_lock(&sc->sge.reg_lock);
@@ -1614,8 +1662,8 @@
 		DPRINTF("cidx=%d d=%p\n", cidx, d);
 		if (d->m) {
 			if (d->flags & TX_SW_DESC_MAPPED) {
-				bus_dmamap_unload(sc->tx_dmat, d->map);
-				bus_dmamap_destroy(sc->tx_dmat, d->map);
+				bus_dmamap_unload(q->entry_tag, d->map);
+				bus_dmamap_destroy(q->entry_tag, d->map);
 				d->flags &= ~TX_SW_DESC_MAPPED;
 			}
 			m_vec[nbufs] = d->m;
@@ -1742,12 +1790,14 @@
  *	threshold and the packet is too big to copy, or (b) the packet should
  *	be copied but there is no memory for the copy.
  */
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
 static int
 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
-    struct t3_mbuf_hdr *mh, struct rsp_desc *r)
+    struct t3_mbuf_hdr *mh, struct rsp_desc *r, struct mbuf *m)
 {
 	
-	struct mbuf *m = NULL;
 	unsigned int len_cq =  ntohl(r->len_cq);
 	struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
 	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
@@ -1756,12 +1806,13 @@
 	uint8_t sopeop = G_RSPD_SOP_EOP(flags);
 	int ret = 0;
 	
-	prefetch(sd->m->m_data);
+	prefetch(sd->cl);
 	
 	fl->credits--;
-	bus_dmamap_sync(adap->rx_jumbo_dmat, sd->map, BUS_DMASYNC_POSTREAD);
-	bus_dmamap_unload(adap->rx_jumbo_dmat, sd->map);
-	m = sd->m;
+	bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
+	bus_dmamap_unload(fl->entry_tag, sd->map);
+
+	m_cljset(m, sd->cl, fl->buf_size);
 	m->m_len = len;
 
 	switch(sopeop) {
@@ -1941,9 +1992,14 @@
 			rspq->imm_data++;
 		} else if (r->len_cq) {			
 			int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
-			
+			struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA);
+
+			if (m == NULL) {
+				log(LOG_WARNING, "failed to get mbuf for packet\n");
+				break;
+			} 
 			ethpad = 2;
-			eop = get_packet(adap, drop_thresh, qs, &rspq->mh, r);
+			eop = get_packet(adap, drop_thresh, qs, &rspq->mh, r, m);
 		} else {
 			DPRINTF("pure response\n");
 			rspq->pure_rsps++;
@@ -2044,7 +2100,6 @@
 	adapter_t *adap = data;
 	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
 	struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
-
 	
 	t3_write_reg(adap, A_PL_CLI, 0);
 	map = t3_read_reg(adap, A_SG_DATA_INTR);


More information about the p4-projects mailing list