PERFORCE change 125128 for review

Kip Macy kmacy at FreeBSD.org
Mon Aug 13 17:44:06 PDT 2007


http://perforce.freebsd.org/chv.cgi?CH=125128

Change 125128 by kmacy at kmacy_home:ethng on 2007/08/14 00:43:10

	Allow any cpu to use any tx queue by protecting the txq with its lock as opposed to cpu binding
	- it isn't clear how much the strong affinity buys us without complicating the scheduler and it 
	  doesn't really reduce locking because we still have to go through the motions of waking up the 
	  bound service thread
	+ this greatly simplifies the logic because we don't have to special case the local queue any more
	  the txq lock is either free and the queue is not stalled in which case the packet is directly 
	         transmitted or the queue is busy in which case the packet is enqueue on the packets ring buffer
	+ this also eliminates the need for the rss_hash to be "correct" as an ack can always be transmitted
	  directly without any queueing delay, the rss_hash simply ends up being there for load balancing
	  and as an affinity hint
	+ the one interesting thing that this approach brings up is the possibility of adding an interface
	  that allows the TCP stack to tell the driver to put a packet at the *head* of a queue so that an 
	  inbound stream is not slowed down by an outbound stream

Affected files ...

.. //depot/projects/ethng/src/sys/dev/cxgb/cxgb_multiq.c#8 edit

Differences ...

==== //depot/projects/ethng/src/sys/dev/cxgb/cxgb_multiq.c#8 (text+ko) ====

@@ -396,11 +396,9 @@
 			qs->qs_cpuid, curcpu));
 	
 	reclaimed = j = 0;
-	while  ((reclaimable = desc_reclaimable(txq)) > 0) {
-		critical_enter();
-		
+	reclaimable = desc_reclaimable(txq);
+	while (reclaimable > 0) {
 		n = t3_free_tx_desc(txq, min(reclaimable, TX_CLEAN_MAX_DESC), m_vec);
-		critical_exit();
 		
 		reclaimed += min(reclaimable, TX_CLEAN_MAX_DESC);
 		
@@ -412,12 +410,11 @@
 			m_freem_vec(m_vec[i]);
 		j++;
 		
-		critical_enter();
 		txq->cleaned += reclaimed;
 		txq->in_use -= reclaimed;
 		if (isset(&qs->txq_stopped, TXQ_ETH))
 			clrbit(&qs->txq_stopped, TXQ_ETH);
-		critical_exit();
+		reclaimable = desc_reclaimable(txq);
 	}
 
 	return (reclaimed);
@@ -455,7 +452,6 @@
 		return (err);
 	}
 	
-	immpkt = NULL;
 
 	if (desc_reclaimable(txq) > 0) {
 		int reclaimed = 0;
@@ -495,70 +491,35 @@
 	return (err);
 }
 
-static int
-cxgb_pcpu_txq_trylock(struct sge_txq *txq)
-{
-	critical_enter();
-	if (txq->flags & TXQ_TRANSMITTING) {
-		critical_exit();
-		DPRINTF("transmit in progress\n");
-		return (0);
-	}
-	txq->flags |= TXQ_TRANSMITTING;
-	critical_exit();
-	return (1);
-}
-
-static void
-cxgb_pcpu_txq_unlock(struct sge_txq *txq)
-{
-	critical_enter();
-	txq->flags &= ~TXQ_TRANSMITTING;
-	critical_exit();
-}
 int
 cxgb_pcpu_start(struct ifnet *ifp, struct mbuf *immpkt)
 {
 	uint32_t cookie;
 	int err, qidx, locked;
 	struct port_info *pi;
-	struct sge_qset *immqs, *curqs;
+	struct sge_qset *qs;
 	struct sge_txq *txq = NULL /* gcc is dumb */;
 
 	
 	pi = ifp->if_softc;
-	immqs = curqs = NULL;
+	qs = NULL;
 	err = cookie = locked = 0;
-	sched_pin();
+
 	if (immpkt && (immpkt->m_pkthdr.rss_hash != 0)) {
 		cookie = immpkt->m_pkthdr.rss_hash;
 		qidx = cxgb_pcpu_cookie_to_qidx(pi, cookie);
 		DPRINTF("hash=0x%x qidx=%d cpu=%d\n", immpkt->m_pkthdr.rss_hash, qidx, curcpu);
-		immqs = &pi->adapter->sge.qs[qidx];
-		if (immqs->qs_cpuid != curcpu) {
-			cxgb_pcpu_enqueue_packet_(immqs, immpkt);
-			immpkt = NULL;
-		}
-	}
-	if (curcpu < pi->first_qset || curcpu >= (pi->first_qset + pi->nqsets)) {
-		/*
-		 * If packet isn't tagged and there is no queue for this cpu
-		 */
-		if (immpkt) {
-			immqs = &pi->adapter->sge.qs[pi->first_qset];
-			cxgb_pcpu_enqueue_packet_(immqs, immpkt);
-		}
-		goto done;
-	}
-	curqs = &pi->adapter->sge.qs[curcpu];
-	txq = &curqs->txq[TXQ_ETH];
-	if (cxgb_pcpu_txq_trylock(txq)) {
-		err = cxgb_pcpu_start_(curqs, immpkt, FALSE);
-		cxgb_pcpu_txq_unlock(txq);
+		qs = &pi->adapter->sge.qs[qidx];
+	} else 
+		qs = &pi->adapter->sge.qs[pi->first_qset];
+	
+	txq = &qs->txq[TXQ_ETH];
+	
+	if (mtx_trylock(&txq->lock)) {
+		err = cxgb_pcpu_start_(qs, immpkt, FALSE);
+		mtx_unlock(&txq->lock);
 	} else if (immpkt)
-		err = cxgb_pcpu_enqueue_packet_(curqs, immpkt);
-done:
-	sched_unpin();
+		err = cxgb_pcpu_enqueue_packet_(qs, immpkt);
 
 	return ((err == ENOSPC) ? 0 : err);
 }
@@ -584,7 +545,6 @@
 	IFQ_UNLOCK(&ifp->if_snd);
 	printf("dequeued %d packets\n", i);
 	lhead = ltail = NULL;
-	sched_pin();
 	for (m = head; m != NULL; m = head->m_nextpkt) {
 		calc_cookie = cxgb_pcpu_calc_cookie(ifp, m);
 		qidx = cxgb_pcpu_cookie_to_qidx(pi, calc_cookie);
@@ -608,18 +568,12 @@
 		 * Assume one-to-one mapping of qset to CPU for now XXX 
 		 */
 
-		if (cxgb_pcpu_txq_trylock(&qs->txq[TXQ_ETH])) {
-			(void)cxgb_pcpu_start_(qs, NULL, TRUE);
-			cxgb_pcpu_txq_unlock(&qs->txq[TXQ_ETH]);
-		} else {
-			/*
+		(void)cxgb_pcpu_start_(qs, NULL, TRUE);
+		/*
 			 * XXX multiple packets
 			 */
 			cxgb_pcpu_enqueue_packet_(qs, lhead);
-
-		}
 	}
-	sched_unpin();
 }
 
 static void
@@ -638,15 +592,16 @@
 	thread_unlock(td);
 
 	DELAY(qs->qs_cpuid*100000);
-	printf("bound to %d running on %d\n", qs->qs_cpuid, curcpu);
+	if (bootverbose)
+		printf("bound to %d running on %d\n", qs->qs_cpuid, curcpu);
 	
 	for (;;) {
 		if (qs->qs_flags & QS_EXITING)
 			break;
 		
-		if (cxgb_pcpu_txq_trylock(&qs->txq[TXQ_ETH])) {
+		if (mtx_trylock(&qs->txq[TXQ_ETH].lock)) {
 			err = cxgb_pcpu_start_(qs, NULL, TRUE);
-			cxgb_pcpu_txq_unlock(&qs->txq[TXQ_ETH]);
+			mtx_unlock(&qs->txq[TXQ_ETH].lock);
 		} else
 			err = EINPROGRESS;
 	 	


More information about the p4-projects mailing list