PERFORCE change 126763 for review

Kip Macy kmacy at FreeBSD.org
Sun Sep 23 22:02:22 PDT 2007


http://perforce.freebsd.org/chv.cgi?CH=126763

Change 126763 by kmacy at kmacy_home:ethng on 2007/09/24 05:01:29

	avoid extra cache/tlb misses and allocations by embedding the 
	mbuf in the cluster on receive

Affected files ...

.. //depot/projects/ethng/src/sys/dev/cxgb/cxgb_sge.c#23 edit

Differences ...

==== //depot/projects/ethng/src/sys/dev/cxgb/cxgb_sge.c#23 (text+ko) ====

@@ -140,18 +140,13 @@
 };
 
 struct rx_sw_desc {                /* SW state per Rx descriptor */
-	union {
-		void	 *cl;
-		uint32_t *ref;
-	} u;
-	uint8_t	         *data;
+	caddr_t	         rxsd_cl;
+	uint32_t         *rxsd_ref;
+	caddr_t	         data;
 	bus_dmamap_t	  map;
 	int		  flags;
 };
 
-#define rxsd_cl         u.cl
-#define rxsd_ref        u.ref
-
 struct txq_state {
 	unsigned int compl;
 	unsigned int gen;
@@ -547,8 +542,7 @@
 	struct rx_sw_desc *sd = &q->sdesc[q->pidx];
 	struct rx_desc *d = &q->desc[q->pidx];
 	struct refill_fl_cb_arg cb_arg;
-	void *cl;
-	uint32_t *ref;
+	caddr_t cl;
 	int err;
 
 	cb_arg.error = 0;
@@ -560,7 +554,6 @@
 			log(LOG_WARNING, "Failed to allocate cluster\n");
 			goto done;
 		}
-		ref = cl;
 		
 		if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
 			if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
@@ -571,7 +564,8 @@
 			sd->flags |= RX_SW_DESC_MAP_CREATED;
 		}
 #if !defined(__i386__) && !defined(__amd64__)
-		err = bus_dmamap_load(q->entry_tag, sd->map, (uint32_t *)cl + 1, q->buf_size,
+		err = bus_dmamap_load(q->entry_tag, sd->map,
+		    cl + sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t), q->buf_size,
 		    refill_fl_cb, &cb_arg, 0);
 		
 		if (err != 0 || cb_arg.error) {
@@ -582,11 +576,13 @@
 			return;
 		}
 #else
-		cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)((uint32_t *)cl + 1));
+		cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)(cl + sizeof(struct m_hdr) +
+			sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t)));
 #endif		
 		sd->flags |= RX_SW_DESC_INUSE;
 		sd->rxsd_cl = cl;
-		sd->data = (uint8_t *)(sd->rxsd_ref + 1);
+		sd->rxsd_ref = (uint32_t *)(cl + sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_));
+		sd->data = cl + sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t);
 		d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
 		d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
 		d->len_gen = htobe32(V_FLD_GEN1(q->gen));
@@ -1026,7 +1022,7 @@
  * 	packet.  Ethernet packets require addition of WR and CPL headers.
  */
 static __inline unsigned int
-calc_tx_descs(const struct mbuf *m, int nsegs, int tsoinfo)
+calc_tx_descs(const struct mbuf *m, int nsegs)
 {
 	unsigned int flits;
 
@@ -1035,7 +1031,7 @@
 
 	flits = sgl_len(nsegs) + 2;
 #ifdef TSO_SUPPORTED
-	if (tsoinfo)
+	if (m->m_pkthdr.csum_flags & CSUM_TSO)
 		flits++;
 #endif	
 	return flits_to_desc(flits);
@@ -1057,7 +1053,7 @@
 		goto done;
 	} else
 #endif
-		err = bus_dmamap_load_mvec_sg(txq->entry_tag, txsd->map, m0, segs, nsegs, 0);
+		err = bus_dmamap_load_mbuf_sg(txq->entry_tag, txsd->map, m0, segs, nsegs, 0);
 
 	if (err == 0) {
 		goto done;
@@ -1473,7 +1469,7 @@
 		return (err);
 	m0 = *m;
 #endif
-	ndesc = calc_tx_descs(mi, nsegs, tso_info);
+	ndesc = calc_tx_descs(m0, nsegs);
 	
 	sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
 	make_sgl(sgp, segs, nsegs);
@@ -1487,7 +1483,7 @@
 	write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo);
 	check_ring_tx_db(pi->adapter, txq);
 
-	if ((m0->m_type == MT_DATA) && (m0->m_flags & M_EXT)) {
+	if ((m0->m_type == MT_DATA) && ((m0->m_flags & (M_EXT|M_NOFREE)) == M_EXT)) {
 		m0->m_flags = 0;
 		m_free(m0);
 	}
@@ -2343,15 +2339,15 @@
 	q->txq[TXQ_ETH].stop_thres = nports *
 	    flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
 
-	q->fl[0].buf_size = MCLBYTES - sizeof(uint32_t);
+	q->fl[0].buf_size = (MCLBYTES - sizeof(uint32_t) - sizeof(struct m_hdr) - sizeof(struct pkthdr) - sizeof(struct m_ext_));
 	q->fl[0].zone = zone_clust;
 	q->fl[0].type = EXT_CLUSTER;
 	if (jumbo_phys_contig) {
-		q->fl[1].buf_size = MJUM9BYTES - sizeof(uint32_t);
+		q->fl[1].buf_size = MJUM9BYTES - sizeof(uint32_t) - sizeof(struct m_hdr) - sizeof(struct pkthdr) - sizeof(struct m_ext_);
 		q->fl[1].zone = zone_jumbo9;
 		q->fl[1].type = EXT_JUMBO9;
 	} else {
-		q->fl[1].buf_size = MJUMPAGESIZE - sizeof(uint32_t);
+		q->fl[1].buf_size = MJUMPAGESIZE - sizeof(uint32_t) - sizeof(struct m_hdr) - sizeof(struct pkthdr) - sizeof(struct m_ext_);
 		q->fl[1].zone = zone_jumbop;
 		q->fl[1].type = EXT_JUMBOP;
 	}
@@ -2567,9 +2563,32 @@
 }
 
 #else
+static void
+init_cluster_mbuf(caddr_t cl, int flags, int type)
+{
+	struct mbuf *m;
+	int header_size;
+	
+	header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t);
+	
+	bzero(cl, header_size);
+	m = (struct mbuf *)cl;
+
+	SLIST_INIT(&m->m_pkthdr.tags);
+	m->m_type = MT_DATA;
+	m->m_flags = flags | M_NOFREE | M_EXT;
+	m->m_data = cl + header_size;
+	m->m_ext.ext_buf = cl;
+	m->m_ext.ref_cnt = (uint32_t *)(cl + header_size - sizeof(uint32_t));
+	m->m_ext.ext_size = m_getsizefromtype(type);
+	m->m_ext.ext_type = type;
+	*(m->m_ext.ref_cnt) = 1;
+	DPRINTF("data=%p ref_cnt=%p\n", m->m_data, m->m_ext.ref_cnt); 
+}
+
 static int
 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
-    struct mbuf *m, struct rsp_desc *r)
+    struct mbuf **m, struct rsp_desc *r)
 {
 	
 	unsigned int len_cq =  ntohl(r->len_cq);
@@ -2579,8 +2598,8 @@
 	uint32_t flags = ntohl(r->flags);
 	uint8_t sopeop = G_RSPD_SOP_EOP(flags);
 	void *cl;
-	uint32_t *ref = NULL;
 	int ret = 0;
+	struct mbuf *m0;
 
 	prefetch((sd + 1)->rxsd_cl);
 	prefetch((sd + 2)->rxsd_cl);
@@ -2590,43 +2609,46 @@
 	bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
 
 	if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
-		cl = mtod(m, void *);
+		if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
+			goto skip_recycle;
+		cl = mtod(m0, void *);
 		memcpy(cl, sd->data, len);
 		recycle_rx_buf(adap, fl, fl->cidx);
+		*m = m0;
 	} else {
+	skip_recycle:
 		bus_dmamap_unload(fl->entry_tag, sd->map);
 		cl = sd->rxsd_cl;
-		ref = sd->rxsd_ref;
+		*m = m0 = (struct mbuf *)cl;
 	}
 
 	switch(sopeop) {
 	case RSPQ_SOP_EOP:
 		DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
-		if (cl == sd->rxsd_cl) {
-			m_cljset(m, cl, fl->type, ref);
-			*ref = 1; 
-			m->m_data = sd->data;
-		}
-		m->m_len = m->m_pkthdr.len = len;
+		if (cl == sd->rxsd_cl)
+			init_cluster_mbuf(cl, M_PKTHDR, fl->type);
+		m0->m_len = m0->m_pkthdr.len = len;
 		ret = 1;
 		goto done;
 		break;
 	case RSPQ_NSOP_NEOP:
 		DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
+		panic("chaining unsupported");
 		ret = 0;
 		break;
 	case RSPQ_SOP:
 		DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
-		m_iovinit(m);
+		panic("chaining unsupported");
+		m_iovinit(m0);
 		ret = 0;
 		break;
 	case RSPQ_EOP:
 		DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
+		panic("chaining unsupported");
 		ret = 1;
 		break;
 	}
-	m_iovappend(m, cl, fl->buf_size, len, sizeof(uint32_t), ref);
-	*ref = 1;
+	m_iovappend(m0, cl, fl->buf_size, len, sizeof(uint32_t), sd->rxsd_ref);
 done:	
 	if (++fl->cidx == fl->size)
 		fl->cidx = 0;
@@ -2779,17 +2801,9 @@
 
 			eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r, m);
 #else
-			if (rspq->rspq_mbuf == NULL) 
-				rspq->rspq_mbuf = m_gethdr(M_DONTWAIT, MT_DATA);
-			if (rspq->rspq_mbuf == NULL) { 
-				rspq->next_holdoff = NOMEM_INTR_DELAY;
-				log(LOG_WARNING, "failed to get mbuf for packet\n"); 
-				break;
-			} else {
-				rspq->rspq_mbuf->m_pkthdr.rss_hash = rss_hash;
-				rspq->rspq_mbuf->m_next = rspq->rspq_mbuf->m_nextpkt = NULL;
-			}
-			eop = get_packet(adap, drop_thresh, qs, rspq->rspq_mbuf, r);
+			eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mbuf, r);
+			rspq->rspq_mbuf->m_pkthdr.rss_hash = rss_hash;
+
 #endif
 			ethpad = 2;
 		} else {


More information about the p4-projects mailing list