PERFORCE change 118707 for review

Tue Apr 24 06:30:38 UTC 2007

http://perforce.freebsd.org/chv.cgi?CH=118707

Change 118707 by kmacy at kmacy_vt-x:opentoe_init on 2007/04/24 06:30:12

	factor out write work request for sgl header 
	implement write_ofld_wr
	call rx_offload when receiving offloaded packets

Affected files ...

.. //depot/projects/opentoe/sys/dev/cxgb/cxgb_sge.c#14 edit

Differences ...

==== //depot/projects/opentoe/sys/dev/cxgb/cxgb_sge.c#14 (text+ko) ====

@@ -998,6 +998,95 @@
 #endif
 }
 
+
+
+/**
+ *	write_wr_hdr_sgl - write a WR header and, optionally, SGL
+ *	@ndesc: number of Tx descriptors spanned by the SGL
+ *	@txd: first Tx descriptor to be written
+ *	@txqs: txq state (generation and producer index)
+ *	@txq: the SGE Tx queue
+ *	@sgl: the SGL
+ *	@flits: number of flits to the start of the SGL in the first descriptor
+ *	@sgl_flits: the SGL size in flits
+ *	@wr_hi: top 32 bits of WR header based on WR type (big endian)
+ *	@wr_lo: low 32 bits of WR header based on WR type (big endian)
+ *
+ *	Write a work request header and an associated SGL.  If the SGL is
+ *	small enough to fit into one Tx descriptor it has already been written
+ *	and we just need to write the WR header.  Otherwise we distribute the
+ *	SGL across the number of descriptors it spans.
+ */
+
+static void
+write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
+    const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
+    unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
+{
+
+	struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
+	struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
+	
+	if (__predict_true(ndesc == 1)) {
+		wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
+		    V_WR_SGLSFLT(flits)) | wr_hi;
+		wmb();
+		wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
+		    V_WR_GEN(txqs->gen)) | wr_lo;
+		/* XXX gen? */
+		wr_gen2(txd, txqs->gen);
+	} else {
+		unsigned int ogen = txqs->gen;
+		const uint64_t *fp = (const uint64_t *)sgl;
+		struct work_request_hdr *wp = wrp;
+		
+		wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
+		    V_WR_SGLSFLT(flits)) | wr_hi;
+		
+		while (sgl_flits) {
+			unsigned int avail = WR_FLITS - flits;
+
+			if (avail > sgl_flits)
+				avail = sgl_flits;
+			memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
+			sgl_flits -= avail;
+			ndesc--;
+			if (!sgl_flits)
+				break;
+			
+			fp += avail;
+			txd++;
+			txsd++;
+			if (++txqs->pidx == txq->size) {
+				txqs->pidx = 0;
+				txqs->gen ^= 1;
+				txd = txq->desc;
+				txsd = txq->sdesc;
+			}
+			
+			/*
+			 * when the head of the mbuf chain
+			 * is freed all clusters will be freed
+			 * with it
+			 */
+			txsd->m = NULL;
+			wrp = (struct work_request_hdr *)txd;
+			wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
+			    V_WR_SGLSFLT(1)) | wr_hi;
+			wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
+				    sgl_flits + 1)) |
+			    V_WR_GEN(txqs->gen)) | wr_lo;
+			wr_gen2(txd, txqs->gen);
+			flits = 1;
+		}
+		wrp->wr_hi |= htonl(F_WR_EOP);
+		wmb();
+		wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
+		wr_gen2((struct tx_desc *)wp, ogen);
+	}
+}
+
+	
 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */
 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20)
 
@@ -1120,7 +1209,7 @@
 	m0 = *m;
 	ndesc = calc_tx_descs(m0, nsegs);
 	
-	sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : &sgl[0];
+	sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
 	make_sgl(sgp, segs, nsegs);
 
 	sgl_flits = sgl_len(nsegs);
@@ -1132,65 +1221,8 @@
 	wr_lo = htonl(V_WR_TID(txq->token));
 	txsd->m = m0;
 	m0->m_priority = txqs.pidx; 
-	
-	if (__predict_true(ndesc == 1)) {
-		wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
-		    V_WR_SGLSFLT(flits)) | wr_hi;
-		wmb();
-		wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
-		    V_WR_GEN(txqs.gen)) | wr_lo;
-		/* XXX gen? */
-		wr_gen2(txd, txqs.gen);
-	} else {
-		unsigned int ogen = txqs.gen;
-		const uint64_t *fp = (const uint64_t *)sgl;
-		struct work_request_hdr *wp = wrp;
-		
-		/* XXX - CHECK ME */
-		wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
-		    V_WR_SGLSFLT(flits)) | wr_hi;
-		
-		while (sgl_flits) {
-			unsigned int avail = WR_FLITS - flits;
 
-			if (avail > sgl_flits)
-				avail = sgl_flits;
-			memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
-			sgl_flits -= avail;
-			ndesc--;
-			if (!sgl_flits)
-				break;
-			
-			fp += avail;
-			txd++;
-			txsd++;
-			if (++txqs.pidx == txq->size) {
-				txqs.pidx = 0;
-				txqs.gen ^= 1;
-				txd = txq->desc;
-				txsd = txq->sdesc;
-			}
-			
-			/*
-			 * when the head of the mbuf chain
-			 * is freed all clusters will be freed
-			 * with it
-			 */
-			txsd->m = NULL;
-			wrp = (struct work_request_hdr *)txd;
-			wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
-			    V_WR_SGLSFLT(1)) | wr_hi;
-			wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
-				    sgl_flits + 1)) |
-			    V_WR_GEN(txqs.gen)) | wr_lo;
-			wr_gen2(txd, txqs.gen);
-			flits = 1;
-		}
-		wrp->wr_hi |= htonl(F_WR_EOP);
-		wmb();
-		wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
-		wr_gen2((struct tx_desc *)wp, ogen);
-	}
+	write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo);
 	check_ring_tx_db(p->adapter, txq);
 
 	return (0);
@@ -1788,15 +1820,16 @@
  */
 static void
 write_ofld_wr(adapter_t *adap, struct mbuf *m,
-	  struct sge_txq *q, unsigned int pidx,
-	  unsigned int gen, unsigned int ndesc)
+    struct sge_txq *q, unsigned int pidx,
+    unsigned int gen, unsigned int ndesc,
+    bus_dma_segment_t *segs, unsigned int nsegs)
 {
-#ifdef notyet
 	unsigned int sgl_flits, flits;
 	struct work_request_hdr *from;
 	struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
 	struct tx_desc *d = &q->desc[pidx];
-
+	struct txq_state txqs;
+	
 	if (immediate(m)) {
 		q->sdesc[pidx].m = NULL;
 		write_imm(d, m, m->m_len, gen);
@@ -1806,20 +1839,20 @@
 	/* Only TX_DATA builds SGLs */
 
 	from = mtod(m, struct work_request_hdr *);
-	memcpy(&d->flit[1], &from[1], skb->h.raw - skb->data - sizeof(*from));
+	memcpy(&d->flit[1], &from[1],
+	    (uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *) - sizeof(*from));
+
+	flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8;
+	sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
+
+	make_sgl(sgp, segs, nsegs);
+	sgl_flits = sgl_len(nsegs);
 
-	flits = (skb->h.raw - skb->data) / 8;
-	sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
-	sgl_flits = make_sgl(skb, sgp, skb->h.raw, skb->tail - skb->h.raw,
-		       	     adap->pdev);
-	if (need_skb_unmap()) {
-		setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
-		skb->destructor = deferred_unmap_destructor;
-		((struct unmap_info *)skb->cb)->len = skb->tail - skb->h.raw;
-	}
-	write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
-			 gen, from->wr_hi, from->wr_lo);
-#endif
+	txqs.gen = q->gen;
+	txqs.pidx = q->pidx;
+	txqs.compl = (q->unacked & 8) << (S_WR_COMPL - 3);
+	write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
+	    from->wr_hi, from->wr_lo);
 }
 
 /**
@@ -1830,7 +1863,7 @@
  * 	packet.  These packets are already fully constructed.
  */
 static __inline unsigned int
-calc_tx_descs_ofld(const struct mbuf *mbuf)
+calc_tx_descs_ofld(const struct mbuf *m, unsigned int nsegs)
 {
 #ifdef notyet	
 	unsigned int flits, cnt = skb_shinfo(skb)->nr_frags;
@@ -1860,12 +1893,19 @@
 ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
 {
 	int ret;
-	unsigned int pidx, gen;
-	unsigned int ndesc = calc_tx_descs_ofld(m);
+	unsigned int pidx, gen, nsegs;
+	unsigned int ndesc;
 	struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
+	bus_dma_segment_t segs[TX_MAX_SEGS];
 	int i, cleaned;
-	
+	struct tx_sw_desc *stx = &q->sdesc[q->pidx];
+
 	mtx_lock(&q->lock);
+	if ((ret = busdma_map_mbufs(&m, q, stx, segs, &nsegs)) != 0) {
+		mtx_unlock(&q->lock);
+		return (ret);
+	}
+	ndesc = calc_tx_descs_ofld(m, nsegs);
 again:	cleaned = reclaim_completed_tx(adap, q, TX_CLEAN_MAX_DESC, m_vec);
 
 	ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
@@ -1894,7 +1934,7 @@
 #endif
 	mtx_unlock(&q->lock);
 
-	write_ofld_wr(adap, m, q, pidx, gen, ndesc);
+	write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
 	check_ring_tx_db(adap, q);
 	
 	for (i = 0; i < cleaned; i++) {
@@ -1952,7 +1992,7 @@
 		__skb_unlink(skb, &q->sendq);
 #endif		
 		mtx_unlock(&q->lock);
-		write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
+		write_ofld_wr(adap, skb, q, pidx, gen, ndesc, segs, nsegs);
 		mtx_lock(&q->lock);
 	}
 #endif	
@@ -2360,16 +2400,14 @@
 
 				rspq->m = NULL;
 			} else {
-#ifdef notyet
-				m->pkthdr.csum_data = rss_csum;
+				rspq->m->m_pkthdr.csum_data = rss_csum;
 				/*
 				 * XXX size mismatch
 				 */
-				m->m_priority = rss_hash;
+				rspq->m->m_priority = rss_hash;
 				
-				ngathered = rx_offload(&adap->tdev, rspq, m,
+				ngathered = rx_offload(&adap->tdev, rspq, rspq->m,
 				    offload_mbufs, ngathered);
-#endif
 			}
 #ifdef notyet			
 			taskqueue_enqueue(adap->tq, &adap->timer_reclaim_task);