PERFORCE change 125095 for review

Kip Macy kmacy at FreeBSD.org
Sun Aug 12 18:24:37 PDT 2007


http://perforce.freebsd.org/chv.cgi?CH=125095

Change 125095 by kmacy at kmacy_home:ethng on 2007/08/13 01:24:15

	- only explicitly bind the interrupt for a qset when using multiple queues
	- set queue stopped bit when the hardware queue is full and clear when descriptors
	  have been reclaimed
	- remove use of OACTIVE flag
	- return ENOBUFS when queues are full
	- only coalesce packets from ring onto sendq while the thread has exclusive access to the txq
	- make sure to free mbuf when enqueueing is not possible 
	- free remaining mbufs on sendq and ring when service thread exits
	- reclaim as many descriptors as possible in reclaim_tx
	- simplify logic in cxgb_pcpu_start_ to ensure packet is always either queued or freed
		- don't start tx if running on a cpu for which the ifnet has no active qset
	- save rss_hash in incoming mbuf
	- avoid panic by disallowing change of intr_coal until initialization has happened

Affected files ...

.. //depot/projects/ethng/src/sys/dev/cxgb/cxgb_adapter.h#7 edit
.. //depot/projects/ethng/src/sys/dev/cxgb/cxgb_main.c#10 edit
.. //depot/projects/ethng/src/sys/dev/cxgb/cxgb_multiq.c#5 edit
.. //depot/projects/ethng/src/sys/dev/cxgb/cxgb_sge.c#8 edit
.. //depot/projects/ethng/src/sys/dev/cxgb/sys/mvec.h#2 edit

Differences ...

==== //depot/projects/ethng/src/sys/dev/cxgb/cxgb_adapter.h#7 (text+ko) ====

@@ -573,16 +573,16 @@
 }
 
 #ifdef IFNET_MULTIQUEUE
-int cxgb_pcpu_enqueue_packet(struct ifnet *ifp, int32_t cpuid, struct mbuf *m);
-int cxgb_pcpu_start(struct ifnet *ifp, int32_t cpuid, struct mbuf *m);
+int cxgb_pcpu_enqueue_packet(struct ifnet *ifp, struct mbuf *m);
+int cxgb_pcpu_start(struct ifnet *ifp, struct mbuf *m);
 int32_t cxgb_pcpu_get_cookie(struct ifnet *ifp, struct in6_addr *lip, uint16_t lport,
     struct in6_addr *rip, uint16_t rport, int ipv6);
 void cxgb_pcpu_shutdown_threads(struct adapter *sc);
 void cxgb_pcpu_startup_threads(struct adapter *sc);
+int cxgb_tx_common(struct ifnet *ifp, struct sge_qset  *qs, uint32_t txmax);
 #endif
 
 void t3_free_qset(adapter_t *sc, struct sge_qset *q);
-int cxgb_tx_common(struct ifnet *ifp, struct sge_qset  *qs, uint32_t txmax);
 struct mbuf *cxgb_dequeue_packet(struct ifnet *ifp, struct sge_txq *unused);
 void cxgb_start(struct ifnet *ifp);
 void refill_fl_service(adapter_t *adap, struct sge_fl *fl);

==== //depot/projects/ethng/src/sys/dev/cxgb/cxgb_main.c#10 (text+ko) ====

@@ -91,7 +91,6 @@
 static void cxgb_stop_locked(struct port_info *);
 static void cxgb_set_rxmode(struct port_info *);
 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
-static void cxgb_start_proc(void *, int ncount);
 static int cxgb_media_change(struct ifnet *);
 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
 static int setup_sge_qsets(adapter_t *);
@@ -102,6 +101,10 @@
 static void cxgb_tick(void *);
 static void setup_rss(adapter_t *sc);
 
+#ifndef IFNET_MULTIQUEUE
+static void cxgb_start_proc(void *, int ncount);
+#endif
+
 /* Attachment glue for the PCI controller end of the device.  Each port of
  * the device is attached separately, as defined later.
  */
@@ -118,7 +121,6 @@
 static int offload_close(struct toedev *tdev);
 #endif
 
-
 static device_method_t cxgb_controller_methods[] = {
 	DEVMETHOD(device_probe,		cxgb_controller_probe),
 	DEVMETHOD(device_attach,	cxgb_controller_attach),
@@ -860,7 +862,7 @@
 static int
 cxgb_setup_msix(adapter_t *sc, int msix_count)
 {
-	int i, j, k, nqsets, rid, vector;
+	int i, j, k, nqsets, rid;
 
 	/* The first message indicates link changes and error conditions */
 	sc->irq_rid = 1;
@@ -904,12 +906,14 @@
 				    "interrupt for message %d\n", rid);
 				return (EINVAL);
 			}
-			if (singleq) {
-				vector = rman_get_start(sc->msix_irq_res[k]);
+#ifdef IFNET_MULTIQUEUE			
+			if (singleq == 0) {
+				int vector = rman_get_start(sc->msix_irq_res[k]);
 				if (bootverbose)
 					device_printf(sc->dev, "binding vector=%d to cpu=%d\n", vector, k % mp_ncpus);
 				intr_bind(vector, k % mp_ncpus);
 			}
+#endif			
 		}
 	}
 
@@ -1073,15 +1077,16 @@
 	p->tq = taskqueue_create_fast(buf, M_NOWAIT,
 	    taskqueue_thread_enqueue, &p->tq);
 #endif	
-
+#ifndef IFNET_MULTIQUEUE
 	if (p->tq == NULL) {
 		device_printf(dev, "failed to allocate port task queue\n");
 		return (ENOMEM);
 	}	
 	taskqueue_start_threads(&p->tq, 1, PI_NET, "%s taskq",
 	    device_get_nameunit(dev));
+	
 	TASK_INIT(&p->start_task, 0, cxgb_start_proc, ifp);
-
+#endif
 	t3_sge_init_port(p);
 
 	return (0);
@@ -1925,10 +1930,11 @@
 
 	txq = &qs->txq[TXQ_ETH];
 	in_use_init = txq->in_use;
+	err = 0;
 	while ((txq->in_use - in_use_init < txmax) &&
 	    (txq->size > txq->in_use + TX_MAX_DESC)) {
 		m = cxgb_dequeue_packet(ifp, txq);
-		if (m == NULL)
+		if (m == NULL) 
 			break;
 		/*
 		 * Convert chain to M_IOVEC
@@ -1978,10 +1984,16 @@
 		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 		err = ENOSPC;
 	}
+#else
+	if ((err == 0) &&  (txq->size <= txq->in_use + TX_MAX_DESC)) {
+		err = ENOSPC;
+		setbit(&qs->txq_stopped, TXQ_ETH);
+	}
 #endif
 	return (err);
 }
 
+#ifndef IFNET_MULTIQUEUE
 static int
 cxgb_start_tx(struct ifnet *ifp, uint32_t txmax)
 {
@@ -2002,7 +2014,7 @@
 
 	if (txq->flags & TXQ_TRANSMITTING)
 		return (EINPROGRESS);
-	
+
 	mtx_lock(&txq->lock);
 	txq->flags |= TXQ_TRANSMITTING;
 	cxgb_tx_common(ifp, qs, txmax);
@@ -2032,7 +2044,6 @@
 	} while (error == 0);
 }
 
-#ifndef IFNET_MULTIQUEUE
 struct mbuf *
 cxgb_dequeue_packet(struct ifnet *ifp, struct sge_txq *unused)
 {

==== //depot/projects/ethng/src/sys/dev/cxgb/cxgb_multiq.c#5 (text+ko) ====

@@ -97,7 +97,7 @@
 SYSCTL_UINT(_hw_cxgb, OID_AUTO, sleep_ticks, CTLFLAG_RDTUN, &sleep_ticks, 0,
     "ticks to sleep between checking pcpu queues");
 
-int cxgb_txq_mbuf_ring_size = 2048;
+int cxgb_txq_mbuf_ring_size = 8192;
 TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_mbuf_ring_size);
 SYSCTL_UINT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_mbuf_ring_size, 0,
     "size of per-queue mbuf ring");
@@ -105,7 +105,7 @@
 
 static inline int32_t cxgb_pcpu_calc_cookie(struct ifnet *ifp, struct mbuf *immpkt);
 static void cxgb_pcpu_start_proc(void *arg);
-static int cxgb_pcpu_cookie_to_qidx(struct port_info *, int32_t cookie);
+static int cxgb_pcpu_cookie_to_qidx(struct port_info *, uint32_t cookie);
 
 static inline int
 cxgb_pcpu_enqueue_packet_(struct sge_qset *qs, struct mbuf *m)
@@ -113,11 +113,11 @@
 	struct sge_txq *txq;
 	struct mbuf_ring *mr;
 	struct mbuf_head *mbq;
-	int dropped = 0;
+	int err = 0;
 	
 	if (qs->qs_flags & QS_EXITING) {
-		m_freem(m);
-		return (0);
+		m_freem_vec(m);
+		return (ENXIO);
 	}
 
 	txq = &qs->txq[TXQ_ETH];
@@ -131,7 +131,7 @@
 		critical_exit();
 	} else {
 		int prod, cons, mask;
-		
+
 		mr = &txq->txq_mr;
 		mtx_lock(&txq->lock);
 		cons = mr->mr_cons;
@@ -142,23 +142,20 @@
 			mr->mr_prod = (prod + 1) & mask;
 		} else {
 			txq->txq_drops++;
-			atomic_set_acq_int(&qs->port->ifp->if_drv_flags, IFF_DRV_OACTIVE);
-			dropped = 1;
+			err = ENOBUFS;
 		}
 		mtx_unlock(&txq->lock);
-
 		if ((qs->txq[TXQ_ETH].flags & TXQ_TRANSMITTING) == 0)
 			wakeup(qs);
-
-		if (dropped) 
+		if (err) 
 			m_freem(m);
 	}
 
-	return (0);
+	return (err);
 }
 	
 int
-cxgb_pcpu_enqueue_packet(struct ifnet *ifp, int32_t cookie, struct mbuf *m)
+cxgb_pcpu_enqueue_packet(struct ifnet *ifp, struct mbuf *m)
 {
 	struct port_info *pi;
 	struct sge_qset *qs;
@@ -168,7 +165,7 @@
 	pi = ifp->if_softc;
 	err = 0;
 
-	calc_cookie = (cookie != -1) ? cookie : cxgb_pcpu_calc_cookie(ifp, m);
+	calc_cookie = m->m_pkthdr.rss_hash;
 	qidx = cxgb_pcpu_cookie_to_qidx(pi, calc_cookie);
 	qs = &pi->adapter->sge.qs[qidx];
 
@@ -222,6 +219,7 @@
 	int count;
 	int32_t cookie;
 
+	critical_enter();
 	/* 
 	 * Can definitely bypass bcopy XXX
 	 */
@@ -249,7 +247,8 @@
 	 * -> RSS map maps queue to CPU
 	 */
 	cookie = (base & (RSS_TABLE_SIZE-1));
-
+	critical_exit();
+	
 	return (cookie);
 }
 
@@ -267,10 +266,16 @@
 	struct sctphdr *sh;
 	uint8_t *next, proto;
 	int etype;
-	
+
 	if (immpkt == NULL)
 		return -1;
 
+#if 1	
+	/*
+	 * XXX perf test
+	 */
+	return (0);
+#endif	
 	rport = lport = 0;
 	cookie = -1;
 	next = NULL;
@@ -337,23 +342,24 @@
 static inline int
 cxgb_pcpu_pkt_coalesce(struct sge_txq *txq, struct mbuf *imm, int *complete)
 {
-	int prod, cons, mask, transferred;
+	int prod, cons, mask, err;
 	struct mbuf_head *mbq;
 	struct mbuf_ring *mr;
 	struct mbuf **ring, *m;
-
 	
 	mbq = &txq->sendq;
 	mr = &txq->txq_mr;
 	ring = mr->mr_ring;
 	mask = mr->mr_size - 1;
-
+	err = 0;
 	/*
 	 * Arbitrary threshold at which to apply backpressure
 	 */
 	if (mbufq_len(mbq) > cxgb_txq_mbuf_ring_size) {
+		if (imm)
+			m_freem_vec(imm);
 		*complete = 1;
-		return (0); 
+		return (ENOBUFS); 
 	}
 	
 	critical_enter();
@@ -363,157 +369,206 @@
 		m = ring[cons];
 		cons = (cons + 1) & mask;
 		mbufq_tail(mbq, m);
-		transferred++;
 	}
 	mr->mr_cons = cons;
 	if (imm)
 		mbufq_tail(mbq, imm);
+
 	*complete = ((mbufq_size(mbq) > TX_WR_SIZE_MAX) || (mbufq_len(mbq) >= TX_WR_COUNT_MAX));
 	critical_exit();
 
-	return (transferred);
+	return (err);
 }
 
 static void
+cxgb_pcpu_free(struct sge_qset *qs)
+{
+	struct mbuf *m;
+	struct sge_txq *txq = &qs->txq[TXQ_ETH];
+	int complete;
+	
+	while ((m = mbufq_dequeue(&txq->sendq)) != NULL) 
+		m_freem_vec(m);
+	cxgb_pcpu_pkt_coalesce(txq, NULL, &complete);
+	while ((m = mbufq_dequeue(&txq->sendq)) != NULL) 
+		m_freem_vec(m);
+}
+
+static int
 cxgb_pcpu_reclaim_tx(struct sge_txq *txq)
 {
-	int reclaimable, i, j, n;
+	int reclaimable, reclaimed, i, j, n;
 	struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
-
-	reclaimable = desc_reclaimable(txq);
-	j = 0;
-	while (reclaimable > 0) {
-
+	struct sge_qset *qs = txq_to_qset(txq, TXQ_ETH);
+		
+	KASSERT(qs->qs_cpuid == curcpu, ("cpu qset mismatch cpuid=%d curcpu=%d",
+			qs->qs_cpuid, curcpu));
+	
+	reclaimed = j = 0;
+	while  ((reclaimable = desc_reclaimable(txq)) > 0) {
 		critical_enter();
+		
 		n = t3_free_tx_desc(txq, min(reclaimable, TX_CLEAN_MAX_DESC), m_vec);
-
-		txq->cleaned += n;
-		txq->in_use -= n;
-		/*
-		 * In case this is called while mbufs are being freed 
-		 */
-		reclaimable = desc_reclaimable(txq);
 		critical_exit();
-
-		if (n == 0)
-			return;
+		
+		reclaimed += min(reclaimable, TX_CLEAN_MAX_DESC);
+		
 		if (j > 5 || cxgb_debug)
-			printf("n=%d reclaimable=%d txq->cleaned=%d txq->in_use=%d\n",
-			    n, reclaimable, txq->cleaned, txq->in_use);
+			printf("n=%d reclaimable=%d txq->processed=%d txq->cleaned=%d txq->in_use=%d\n",
+			    n, reclaimable, txq->processed, txq->cleaned, txq->in_use);
 		
-		for (i = 0; i < n; i++) 
+		for (i = 0; i < n; i++)  
 			m_freem_vec(m_vec[i]);
-		j++;    
+		j++;
+		
+		critical_enter();
+		txq->cleaned += reclaimed;
+		txq->in_use -= reclaimed;
+		if (isset(&qs->txq_stopped, TXQ_ETH))
+			clrbit(&qs->txq_stopped, TXQ_ETH);
+		critical_exit();
 	}
+
+	return (reclaimed);
 }
 
 static int
-cxgb_pcpu_start_(struct port_info *pi, int32_t cookie, struct mbuf *immpkt, int tx_flush)
+cxgb_pcpu_start_(struct sge_qset *qs, struct mbuf *immpkt, int tx_flush)
 {
-	int32_t calc_cookie;
-	int err, flush, complete, qidx, i = 0;
-	struct sge_qset *immqs, *curqs;
+	int i, err, flush, complete, reclaimed, stopped;
+	struct port_info *pi; 
 	struct sge_txq *txq;
 	adapter_t *sc;
 	uint32_t max_desc;
+
+	pi = qs->port;
+	err = 0;
+	sc = pi->adapter;
+	i = reclaimed = 0;
 	
-	if (!pi->link_config.link_ok) {
-		return (ENXIO);
+ retry:	
+	if (!pi->link_config.link_ok)
+		err = ENXIO;
+	else if (qs->qs_flags & QS_EXITING)
+		err = ENXIO;
+	else {
+		txq = &qs->txq[TXQ_ETH];
+		err = cxgb_pcpu_pkt_coalesce(txq, immpkt, &complete);
 	}
-	sc = pi->adapter;
-	calc_cookie = ((cookie == -1) && immpkt) ? cxgb_pcpu_calc_cookie(pi->ifp, immpkt) : cookie;
-	qidx = cxgb_pcpu_cookie_to_qidx(pi, calc_cookie);
-	immqs = &pi->adapter->sge.qs[qidx];
-	curqs = (curcpu < SGE_QSETS) ? &pi->adapter->sge.qs[curcpu] : NULL;
-	
-	if (immqs->qs_flags & QS_EXITING) {
-		printf("exting\n");
+	if (err) {
+		if (cxgb_debug)
+			printf("cxgb link down\n");
 		if (immpkt)
-			m_freem(immpkt);
-		return (0);
-	}
-	if (immpkt != NULL && tx_flush == 0)
-		DPRINTF("calc_cookie=%d\n", calc_cookie);
-
-	err = 0;
-	/*
-	 * If we're passed a packet and it isn't outbound from this cpu
-	 * we need to enqueue it be transmitted from the appropriate CPU
-	 */
-	if (immpkt && immqs != curqs) {
-		err = cxgb_pcpu_enqueue_packet_(immqs, immpkt);
-		immpkt = NULL;
-
+			m_freem_vec(immpkt);
+		return (err);
 	}
-	if (err || curqs == NULL)
-		return (err);
 	
-	txq = &curqs->txq[TXQ_ETH];
+	immpkt = NULL;
 
-	if (curqs->qs_flags & QS_EXITING)
-		return (0);
+	if (desc_reclaimable(txq) > 0) {
+		int reclaimed = 0;
 
-	/*
-	 * A transmitter is already running on the current cpu
-	 */
-	critical_enter();
-	if (txq->flags & TXQ_TRANSMITTING) {
-		critical_exit();
-		DPRINTF("transmit in progress\n");
-		return (0);
+		if (cxgb_debug) {
+			device_printf(qs->port->adapter->dev,
+			    "cpuid=%d curcpu=%d reclaimable=%d txq=%p txq->cidx=%d txq->pidx=%d ",
+			    qs->qs_cpuid, curcpu, desc_reclaimable(txq),
+			    txq, txq->cidx, txq->pidx);
+		}
+		reclaimed = cxgb_pcpu_reclaim_tx(txq);
+		if (cxgb_debug)
+			printf("reclaimed=%d\n", reclaimed);
 	}
-	txq->flags |= TXQ_TRANSMITTING;
-	critical_exit();
-	if (immpkt != NULL || tx_flush == 0) {
-		DPRINTF("immpkt=%p qidx=%d\n", immpkt, qidx);
-		DPRINTF("curcpu=%d sc->sge.qs[%d].cpuid=%d\n", curcpu,
-		    qidx, sc->sge.qs[qidx].cpuid);
-	}
-retry:
-	cxgb_pcpu_pkt_coalesce(txq, immpkt, &complete);
-	/*
+		/*
 	 * If coalescing is disabled OR a complete packet is ready OR we're being called from the flush thread
 	 */
-	flush = (!mbufq_empty(&txq->sendq)) && ((cxgb_pcpu_tx_coalesce == 0) || complete || tx_flush); 
+	stopped = isset(&qs->txq_stopped, TXQ_ETH);
+	flush = (!mbufq_empty(&txq->sendq)) && !stopped && ((cxgb_pcpu_tx_coalesce == 0) || complete || tx_flush); 
 	max_desc = tx_flush ? 0xffffff : TX_START_MAX_DESC;
+	err = flush ? cxgb_tx_common(qs->port->ifp, qs, max_desc) : ENOSPC;
 
-	err = flush ? cxgb_tx_common(curqs->port->ifp, curqs, max_desc) : 0;
-
-	if (desc_reclaimable(txq) > 0) {
-		if (cxgb_debug) {
-			device_printf(curqs->port->adapter->dev,
-			    "cpuid=%d reclaimable=%d\n", curcpu, desc_reclaimable(txq));
-		}
-		cxgb_pcpu_reclaim_tx(txq);
-		if (curqs->port->ifp->if_drv_flags & IFF_DRV_OACTIVE) 
-			atomic_clear_acq_int(&curqs->port->ifp->if_drv_flags, IFF_DRV_OACTIVE);
-
-	}
-	if (tx_flush && err == 0 && !mbufq_empty(&txq->sendq)) {
+	if ((tx_flush && flush && err == 0) && !mbufq_empty(&txq->sendq)) {
+#if 0		
+		struct thread *td = curthread;
+		thread_lock(td);
+		sched_prio(td, PRI_MIN_TIMESHARE);
+		thread_unlock(td);
+#endif		
 		if (i++ > 5000)
-			device_printf(curqs->port->adapter->dev,
+			device_printf(qs->port->adapter->dev,
 			    "mbuf head=%p qsize=%d qlen=%d\n",
 			    txq->sendq.head, txq->sendq.qsize, txq->sendq.qlen);
 
 		goto retry;
 	}
+	return (err);
+}
 
+static int
+cxgb_pcpu_txq_trylock(struct sge_txq *txq)
+{
+	critical_enter();
+	if (txq->flags & TXQ_TRANSMITTING) {
+		critical_exit();
+		DPRINTF("transmit in progress\n");
+		return (0);
+	}
+	txq->flags |= TXQ_TRANSMITTING;
+	critical_exit();
+	return (1);
+}
+
+static void
+cxgb_pcpu_txq_unlock(struct sge_txq *txq)
+{
+	critical_enter();
 	txq->flags &= ~TXQ_TRANSMITTING;
-	return (err);
+	critical_exit();
 }
-
 int
-cxgb_pcpu_start(struct ifnet *ifp, int32_t cookie, struct mbuf *immpkt)
+cxgb_pcpu_start(struct ifnet *ifp, struct mbuf *immpkt)
 {
-	int err;
+	uint32_t cookie;
+	int err, qidx, locked;
 	struct port_info *pi;
+	struct sge_qset *immqs, *curqs;
+	struct sge_txq *txq = NULL /* gcc is dumb */;
 
+	
 	pi = ifp->if_softc;
+	immqs = curqs = NULL;
+	err = cookie = locked = 0;
 	sched_pin();
-	err = cxgb_pcpu_start_(pi, cookie, immpkt, FALSE);
+	if (immpkt && (immpkt->m_pkthdr.rss_hash != 0)) {
+		cookie = immpkt->m_pkthdr.rss_hash;
+		qidx = cxgb_pcpu_cookie_to_qidx(pi, cookie);
+		DPRINTF("hash=0x%x qidx=%d cpu=%d\n", immpkt->m_pkthdr.rss_hash, qidx, curcpu);
+		immqs = &pi->adapter->sge.qs[qidx];
+		if (immqs->qs_cpuid != curcpu) {
+			cxgb_pcpu_enqueue_packet_(immqs, immpkt);
+			immpkt = NULL;
+		}
+	}
+	if (curcpu < pi->first_qset || curcpu >= (pi->first_qset + pi->nqsets)) {
+		/*
+		 * If packet isn't tagged and there is no queue for this cpu
+		 */
+		if (immpkt) {
+			immqs = &pi->adapter->sge.qs[pi->first_qset];
+			cxgb_pcpu_enqueue_packet_(immqs, immpkt);
+		}
+		goto done;
+	}
+	curqs = &pi->adapter->sge.qs[curcpu];
+	txq = &curqs->txq[TXQ_ETH];
+	if (cxgb_pcpu_txq_trylock(txq)) {
+		err = cxgb_pcpu_start_(curqs, immpkt, FALSE);
+		cxgb_pcpu_txq_unlock(txq);
+	} else if (immpkt)
+		err = cxgb_pcpu_enqueue_packet_(curqs, immpkt);
+done:
 	sched_unpin();
 
-	return (err);
+	return ((err == ENOSPC) ? 0 : err);
 }
 
 void
@@ -556,13 +611,21 @@
 		}
 	}
 	if (curcpu < SGE_QSETS) {
-		int32_t cookie;
-
+		qs = &pi->adapter->sge.qs[curcpu]; 
 		/*
 		 * Assume one-to-one mapping of qset to CPU for now XXX 
 		 */
-		cookie = pi->adapter->rrss_map[curcpu];
-		(void)cxgb_pcpu_start_(pi, cookie, lhead, 0);
+
+		if (cxgb_pcpu_txq_trylock(&qs->txq[TXQ_ETH])) {
+			(void)cxgb_pcpu_start_(qs, NULL, TRUE);
+			cxgb_pcpu_txq_unlock(&qs->txq[TXQ_ETH]);
+		} else {
+			/*
+			 * XXX multiple packets
+			 */
+			cxgb_pcpu_enqueue_packet_(qs, lhead);
+
+		}
 	}
 	sched_unpin();
 }
@@ -572,7 +635,9 @@
 {
 	struct sge_qset *qs = arg;
 	struct thread *td;
-
+	struct adapter *sc = qs->port->adapter;
+	int err = 0;
+	
 	td = curthread;
 
 	qs->qs_flags |= QS_RUNNING;
@@ -580,38 +645,64 @@
 	sched_bind(td, qs->qs_cpuid);
 	thread_unlock(td);
 
+	DELAY(qs->qs_cpuid*100000);
+	printf("bound to %d running on %d\n", qs->qs_cpuid, curcpu);
+	
 	for (;;) {
 		if (qs->qs_flags & QS_EXITING)
 			break;
 		
-		cxgb_pcpu_start_(qs->port, qs->qs_cpuid, NULL, TRUE);
-		
-		refill_fl_service(qs->port->adapter, &qs->fl[0]);
-		refill_fl_service(qs->port->adapter, &qs->fl[1]);
+		if (cxgb_pcpu_txq_trylock(&qs->txq[TXQ_ETH])) {
+			err = cxgb_pcpu_start_(qs, NULL, TRUE);
+			cxgb_pcpu_txq_unlock(&qs->txq[TXQ_ETH]);
+		} else
+			err = EINPROGRESS;
+	 	
+		if (mtx_trylock(&qs->rspq.lock)) {
+			process_responses(sc, qs, -1);
+
+			refill_fl_service(sc, &qs->fl[0]);
+			refill_fl_service(sc, &qs->fl[1]);
+			t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
+			    V_NEWTIMER(qs->rspq.next_holdoff) | V_NEWINDEX(qs->rspq.cidx));
+
+			mtx_unlock(&qs->rspq.lock);
+		}
+		if ((!mbufq_empty(&qs->txq[TXQ_ETH].sendq) ||
+			(qs->txq[TXQ_ETH].txq_mr.mr_cons != qs->txq[TXQ_ETH].txq_mr.mr_prod)) &&
+		    err == 0) {
+			if (cxgb_debug)
+				printf("head=%p cons=%d prod=%d\n",
+				    qs->txq[TXQ_ETH].sendq.head, qs->txq[TXQ_ETH].txq_mr.mr_cons,
+				    qs->txq[TXQ_ETH].txq_mr.mr_prod);
+			continue;
+		}
 		tsleep(qs, 1, "cxgbidle", sleep_ticks);
 	}
-	thread_lock(td);
-	sched_unbind(td);
-	thread_unlock(td);
 
 	if (bootverbose)
 		device_printf(qs->port->adapter->dev, "exiting thread for cpu%d\n", qs->qs_cpuid);
 
+
+	cxgb_pcpu_free(qs);
 	t3_free_qset(qs->port->adapter, qs);
+
 	qs->qs_flags &= ~QS_RUNNING;
 	kthread_exit(0);
 }
 
 static int
-cxgb_pcpu_cookie_to_qidx(struct port_info *pi, int32_t cookie)
+cxgb_pcpu_cookie_to_qidx(struct port_info *pi, uint32_t cookie)
 {
-	int qidx, tmp;
-
+	int qidx;
+	uint32_t tmp;
+	
 	 /*
 	 * Will probably need to be changed for 4-port XXX
 	 */
 	tmp = pi->tx_chan ? cookie : cookie & ((RSS_TABLE_SIZE>>1)-1);
-	qidx = pi->adapter->rspq_map[tmp];
+	DPRINTF(" tmp=%d ", tmp);
+	qidx = (tmp & (pi->nqsets -1)) + pi->first_qset;
 
 	return (qidx);
 }
@@ -654,11 +745,11 @@
 
 			qs->qs_flags |= QS_EXITING;
 			wakeup(qs);
-			tsleep(&sc, 0, "cxgb unload 0", hz>>2);
+			tsleep(&sc, PRI_MIN_TIMESHARE, "cxgb unload 0", hz>>2);
 			while (qs->qs_flags & QS_RUNNING) {
 				qs->qs_flags |= QS_EXITING;
 				device_printf(sc->dev, "qset thread %d still running - sleeping\n", first + j);
-				tsleep(&sc, 0, "cxgb unload 1", 2*hz);
+				tsleep(&sc, PRI_MIN_TIMESHARE, "cxgb unload 1", 2*hz);
 			}
 		}
 	}

==== //depot/projects/ethng/src/sys/dev/cxgb/cxgb_sge.c#8 (text+ko) ====

@@ -1197,7 +1197,10 @@
 	struct tx_desc *txd;
 	struct cpl_tx_pkt *cpl;
 	
-	DPRINTF("t3_encap ");
+#ifdef IFNET_MULTIQUEUE
+	KASSERT(qs->qs_cpuid == curcpu, ("cpu qset mismatch cpuid=%d curcpu=%d", qs->qs_cpuid, curcpu));
+#endif	
+	DPRINTF("t3_encap cpu=%d ", curcpu);
 	m0 = *m;
 	p = qs->port;
 	sc = p->adapter;
@@ -1672,13 +1675,14 @@
 t3_free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec)
 {
 	struct tx_sw_desc *d;
-	unsigned int cidx = q->cidx;
+	unsigned int cidx;
 	int nbufs = 0;
 	
 #ifdef T3_TRACE
 	T3_TRACE2(sc->tb[q->cntxt_id & 7],
 		  "reclaiming %u Tx descriptors at cidx %u", n, cidx);
 #endif
+	cidx = q->cidx;
 	d = &q->sdesc[cidx];
 	
 	while (n-- > 0) {
@@ -2298,6 +2302,7 @@
 	
 	prefetch(sd->cl);
 
+	DPRINTF("rx cpu=%d\n", curcpu);
 	fl->credits--;
 	bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
 
@@ -2366,6 +2371,8 @@
 		if (desc_reclaimable(&qs->txq[TXQ_ETH]) > TX_START_MAX_DESC)
 			taskqueue_enqueue(qs->port->adapter->tq,
 			    &qs->port->timer_reclaim_task);
+#else
+		wakeup(qs);
 #endif		
 	}
 	
@@ -2404,7 +2411,7 @@
  *	on this queue.  If the system is under memory shortage use a fairly
  *	long delay to help recovery.
  */
-static int
+int
 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
 {
 	struct sge_rspq *rspq = &qs->rspq;
@@ -2427,7 +2434,7 @@
 		int eth, eop = 0, ethpad = 0;
 		uint32_t flags = ntohl(r->flags);
 		uint32_t rss_csum = *(const uint32_t *)r;
-		uint32_t rss_hash = r->rss_hdr.rss_hash_val;
+		uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
 		
 		eth = (r->rss_hdr.opcode == CPL_RX_PKT);
 		
@@ -2460,13 +2467,14 @@
 		} else if (r->len_cq) {			
 			int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
 
-                        if (rspq->m == NULL)  
-				rspq->m = m_gethdr(M_DONTWAIT, MT_DATA);
-			if (rspq->m == NULL) { 
-				log(LOG_WARNING, "failed to get mbuf for packet\n"); 
-				break; 
+                        if (rspq->m == NULL) {
+				if ((rspq->m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) {
+					DPRINTF("0x%x:%d\n", rss_hash & ((1<<7)-1), curcpu);
+					rspq->next_holdoff = NOMEM_INTR_DELAY;
+					break;
+				} else 
+					rspq->m->m_pkthdr.rss_hash = rss_hash;
 			}
-
 			ethpad = 2;
 			eop = get_packet(adap, drop_thresh, qs, rspq->m, r);
 		} else {
@@ -2625,10 +2633,11 @@
 	adapter_t *adap = qs->port->adapter;
 	struct sge_rspq *rspq = &qs->rspq;
 
-	mtx_lock(&rspq->lock);
-	if (process_responses_gts(adap, rspq) == 0)
-		rspq->unhandled_irqs++;
-	mtx_unlock(&rspq->lock);
+	if (mtx_trylock(&rspq->lock)) {
+		if (process_responses_gts(adap, rspq) == 0)
+			rspq->unhandled_irqs++;
+		mtx_unlock(&rspq->lock);
+	}
 }
 
 /* 
@@ -2672,7 +2681,10 @@
 	struct sge_qset *qs;
 	int i, j, err, nqsets = 0;
 	struct mtx *lock;
-	
+
+	if ((sc->flags & FULL_INIT_DONE) == 0)
+		return (ENXIO);
+		
 	coalesce_nsecs = qsp->coalesce_nsecs;
         err = sysctl_handle_int(oidp, &coalesce_nsecs, arg2, req);
 

==== //depot/projects/ethng/src/sys/dev/cxgb/sys/mvec.h#2 (text+ko) ====

@@ -128,6 +128,8 @@
 {
 	struct mbuf *n = m->m_next;
 
+	m->m_pkthdr.rss_hash = 0;
+	
 	if (m->m_flags & M_IOVEC)
 		mb_free_vec(m);
 	else if (m->m_flags & M_EXT)


More information about the p4-projects mailing list