PERFORCE change 125525 for review

Kip Macy kmacy at FreeBSD.org
Tue Aug 21 18:40:01 PDT 2007


http://perforce.freebsd.org/chv.cgi?CH=125525

Change 125525 by kmacy at kmacy_home:ethng on 2007/08/22 01:39:39

	- change the interface to t3_free_tx_desc to account for the fact that 
	  we may be freeing more mbuf chains than descriptors reclaimed
	- genericize fetching the VLAN tag since we can't just grab it once
	- add encapsulation of "batch" packet when t3_encap is given more than one mbuf
	  (still untested with coalescing enabled)

Affected files ...

.. //depot/projects/ethng/src/sys/dev/cxgb/cxgb_adapter.h#12 edit
.. //depot/projects/ethng/src/sys/dev/cxgb/cxgb_multiq.c#14 edit
.. //depot/projects/ethng/src/sys/dev/cxgb/cxgb_sge.c#16 edit

Differences ...

==== //depot/projects/ethng/src/sys/dev/cxgb/cxgb_adapter.h#12 (text+ko) ====

@@ -529,7 +529,7 @@
 int t3_sge_init_adapter(adapter_t *);
 int t3_sge_init_port(struct port_info *);
 void t3_sge_deinit_sw(adapter_t *);
-int t3_free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec);
+int t3_free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec, int m_vec_size, int *desc_reclaimed);
 
 void t3_rx_eth_lro(adapter_t *adap, struct sge_rspq *rq, struct mbuf *m,
     int ethpad, uint32_t rss_hash, uint32_t rss_csum, int lro);

==== //depot/projects/ethng/src/sys/dev/cxgb/cxgb_multiq.c#14 (text+ko) ====

@@ -389,19 +389,19 @@
 static int
 cxgb_pcpu_reclaim_tx(struct sge_txq *txq)
 {
-	int reclaimable, reclaimed, freed, i, j, n;
+	int reclaimable, total_reclaimed, reclaimed, freed, i, j, n;
 	struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
 	struct sge_qset *qs = txq_to_qset(txq, TXQ_ETH);
 		
 	KASSERT(qs->qs_cpuid == curcpu, ("cpu qset mismatch cpuid=%d curcpu=%d",
 			qs->qs_cpuid, curcpu));
 	
-	freed = reclaimed = j = 0;
-	reclaimable = min(desc_reclaimable(txq), TX_CLEAN_MAX_DESC);
-	while (reclaimable > 0) {
-		n = t3_free_tx_desc(txq, reclaimable, m_vec);
+	freed = total_reclaimed = j = 0;
+	
+	while ((reclaimable = desc_reclaimable(txq)) > 0) {
+		n = t3_free_tx_desc(txq, reclaimable, m_vec, TX_CLEAN_MAX_DESC, &reclaimed);
 		
-		reclaimed += reclaimable;
+		total_reclaimed += reclaimed;
 		
 		if (j > 10 || cxgb_debug)
 			printf("n=%d reclaimable=%d txq->processed=%d txq->cleaned=%d txq->in_use=%d\n",
@@ -412,11 +412,10 @@
 		freed += n;
 		j++;
 		
-		txq->cleaned += reclaimable;
-		txq->in_use -= reclaimable;
+		txq->cleaned += reclaimed;
+		txq->in_use -= reclaimed;
 		if (isset(&qs->txq_stopped, TXQ_ETH))
 			clrbit(&qs->txq_stopped, TXQ_ETH);
-		reclaimable = min(desc_reclaimable(txq), TX_CLEAN_MAX_DESC);
 	}
 
 	txq->txq_frees += freed;

==== //depot/projects/ethng/src/sys/dev/cxgb/cxgb_sge.c#16 (text+ko) ====

@@ -64,6 +64,12 @@
 #include <dev/cxgb/cxgb_include.h>
 #endif
 
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+
+
+
 uint32_t collapse_free = 0;
 uint32_t mb_free_vec_free = 0;
 int      txq_fills = 0;
@@ -207,8 +213,7 @@
 
 	mtx_assert(&q->lock, MA_OWNED);
 	if (reclaim > 0) {
-		n = t3_free_tx_desc(q, min(reclaim, nbufs), mvec);
-		reclaimed = min(reclaim, nbufs);
+		n = t3_free_tx_desc(q, reclaim, mvec, nbufs, &reclaimed);
 		q->cleaned += reclaimed;
 		q->in_use -= reclaimed;
 	} 
@@ -1178,13 +1183,23 @@
 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */
 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20)
 
+#ifdef VLAN_SUPPORTED
+#define GET_VTAG(cntrl, m) \
+do { \
+	if ((m)->m_flags & M_VLANTAG)					            \
+		cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
+} while (0)
+#else
+#define GET_VTAG(cntrl, m)
+#endif
+
+
 int
 t3_encap(struct sge_qset *qs, struct mbuf **m, int count)
 {
 	adapter_t *sc;
 	struct mbuf *m0;
 	struct sge_txq *txq;
-	struct tx_sw_desc *stx;
 	struct txq_state txqs;
 	struct port_info *p;
 	unsigned int ndesc, flits, cntrl, mlen;
@@ -1197,23 +1212,18 @@
 	uint32_t wr_hi, wr_lo, sgl_flits; 
 
 	struct tx_desc *txd;
-	struct cpl_tx_pkt *cpl;
 	
 #if defined(IFNET_MULTIQUEUE) && defined(STRICT_AFFINITY)
 	KASSERT(qs->qs_cpuid == curcpu, ("cpu qset mismatch cpuid=%d curcpu=%d", qs->qs_cpuid, curcpu));
 #endif	
 	DPRINTF("t3_encap cpu=%d ", curcpu);
-	m0 = *m;
+
 	p = qs->port;
 	sc = p->adapter;
 	txq = &qs->txq[TXQ_ETH];
-	stx = &txq->sdesc[txq->pidx];
+	txsd = &txq->sdesc[txq->pidx];
 	txd = &txq->desc[txq->pidx];
-	cpl = (struct cpl_tx_pkt *)txd;
-	mlen = m0->m_pkthdr.len;
-	cpl->len = htonl(mlen | 0x80000000);
 	
-	DPRINTF("mlen=%d\n", mlen);
 	/*
 	 * XXX handle checksum, TSO, and VLAN here
 	 *	 
@@ -1224,22 +1234,60 @@
 	 * XXX need to add VLAN support for 6.x
 	 */
 #ifdef VLAN_SUPPORTED
-	if (m0->m_flags & M_VLANTAG) 
-		cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag);
-	if  (m0->m_pkthdr.csum_flags & (CSUM_TSO))
-		tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
-#endif		
-	if (tso_info) {
+	if  (m[0]->m_pkthdr.csum_flags & (CSUM_TSO))
+		tso_info = V_LSO_MSS(m[0]->m_pkthdr.tso_segsz);
+#endif
+	txsd->count = count;
+	
+	if (count > 1) {
+		struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
+		int i;
+		
+		wrp = (struct work_request_hdr *)txd;
+		
+		flits = count*2 + 1;
+		txq_prod(txq, 1, &txqs);
+
+		for (i = 0; i < count; i++) {
+			struct cpl_tx_pkt_batch_entry *cbe = &cpl_batch->pkt_entry[i];
+
+			cntrl = V_TXPKT_INTF(p->port_id);
+			GET_VTAG(cntrl, m[i]);
+			cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT) | (1 << 24);
+			cbe->cntrl = htonl(cntrl);
+			cbe->len = htonl(m[i]->m_pkthdr.len | 0x80000000);
+			m_set_priority(m[i], txqs.pidx); 
+			txsd->m[i] = m[i];
+			/*
+			 * XXX - NOT PORTABLE outside of x86
+			 */
+			cbe->addr = htobe64(pmap_kextract(mtod(m[i], vm_offset_t)));
+		}
+
+		wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
+		    V_WR_SGLSFLT(flits)) | htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
+		wmb();
+		wrp->wr_lo = htonl(V_WR_LEN(flits) |
+		    V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
+		/* XXX gen? */
+		wr_gen2(txd, txqs.gen);
+
+		return (0);
+	} else if (tso_info) {
 		int eth_type;
-		struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *) cpl;
+		struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
 		struct ip *ip;
 		struct tcphdr *tcp;
 		char *pkthdr, tmp[TCPPKTHDRSIZE]; /* is this too large for the stack? */
 		
 		txd->flit[2] = 0;
+		m0 = m[0];
+		GET_VTAG(cntrl, m0);
 		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
 		hdr->cntrl = htonl(cntrl);
-		
+		mlen = m0->m_pkthdr.len;
+		hdr->len = htonl(mlen | 0x80000000);
+
 		if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
 			pkthdr = &tmp[0];
 			m_copydata(m0, 0, TCPPKTHDRSIZE, pkthdr);
@@ -1264,20 +1312,25 @@
 		hdr->lso_info = htonl(tso_info);
 		flits = 3;	
 	} else {
+		struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
+		
+		m0 = m[0];
+		GET_VTAG(cntrl, m0);
 		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
 		cpl->cntrl = htonl(cntrl);
-		
+		mlen = m0->m_pkthdr.len;
+		cpl->len = htonl(mlen | 0x80000000);
+
 		if (mlen <= WR_LEN - sizeof(*cpl)) {
 			txq_prod(txq, 1, &txqs);
-			txq->sdesc[txqs.pidx].m[0] = m0;
-			txq->sdesc[txqs.pidx].count = 1;
-			m_set_priority(m0, txqs.pidx);
+			txq->sdesc[txqs.pidx].count = 0;
 			
 			if (m0->m_len == m0->m_pkthdr.len)
 				memcpy(&txd->flit[2], mtod(m0, uint8_t *), mlen);
 			else
 				m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
 
+			m_freem(m0);
 			flits = (mlen + 7) / 8 + 2;
 			cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
 					  V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
@@ -1292,10 +1345,9 @@
 		}
 		flits = 2;
 	}
-
 	wrp = (struct work_request_hdr *)txd;
 	
-	if ((err = busdma_map_mbufs(m, txq, stx, segs, &nsegs)) != 0) {
+	if ((err = busdma_map_mbufs(m, txq, txsd, segs, &nsegs)) != 0) {
 		return (err);
 	}
 	m0 = *m;
@@ -1311,8 +1363,8 @@
 	txsd = &txq->sdesc[txqs.pidx];
 	wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
 	wr_lo = htonl(V_WR_TID(txq->token));
+	txsd->count = count;
 	txsd->m[0] = m0;
-	txsd->count = 1;
 	m_set_priority(m0, txqs.pidx); 
 
 	write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo);
@@ -1671,37 +1723,44 @@
  *	t3_free_tx_desc - reclaims Tx descriptors and their buffers
  *	@adapter: the adapter
  *	@q: the Tx queue to reclaim descriptors from
- *	@n: the number of descriptors to reclaim
+ *	@reclaimable: the number of descriptors to reclaim
+ *      @m_vec_size: maximum number of buffers to reclaim
+ *      @desc_reclaimed: returns the number of descriptors reclaimed
  *
  *	Reclaims Tx descriptors from an SGE Tx queue and frees the associated
  *	Tx buffers.  Called with the Tx queue lock held.
+ *
+ *      Returns number of buffers of reclaimed   
  */
 int
-t3_free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec)
+t3_free_tx_desc(struct sge_txq *q, int reclaimable, struct mbuf **m_vec,
+    int m_vec_size, int *desc_reclaimed)
 {
 	struct tx_sw_desc *txsd;
 	unsigned int cidx;
-	int nbufs = 0;
+	int i, reclaimed, nbufs;
 	
 #ifdef T3_TRACE
 	T3_TRACE2(sc->tb[q->cntxt_id & 7],
-		  "reclaiming %u Tx descriptors at cidx %u", n, cidx);
+		  "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
 #endif
 	cidx = q->cidx;
 	txsd = &q->sdesc[cidx];
+	reclaimed = nbufs = 0;
 	
-	while (n-- > 0) {
+	for (reclaimed = 0; reclaimed < reclaimable;) {
 		DPRINTF("cidx=%d d=%p\n", cidx, d);
 		if (txsd->count > 0) {
+			if (nbufs + txsd->count > m_vec_size)
+				break;
 			if (txsd->flags & TX_SW_DESC_MAPPED) {
 				bus_dmamap_unload(q->entry_tag, txsd->map);
 				txsd->flags &= ~TX_SW_DESC_MAPPED;
 			}
 			if (m_get_priority(txsd->m[0]) == cidx) {
-				m_vec[nbufs] = txsd->m[0];
-				txsd->m[0] = NULL;
+				for (i = 0; i < txsd->count; i++, nbufs++)
+					m_vec[nbufs] = txsd->m[i];
 				txsd->count = 0;
-				nbufs++;
 			} else {
 				printf("pri=%d cidx=%d\n", (int)m_get_priority(txsd->m[0]), cidx);
 			}
@@ -1713,7 +1772,9 @@
 			cidx = 0;
 			txsd = q->sdesc;
 		}
+		reclaimed++;
 	}
+	*desc_reclaimed = reclaimed;
 	q->cidx = cidx;
 
 	return (nbufs);


More information about the p4-projects mailing list