PERFORCE change 124803 for review

Kip Macy kmacy at FreeBSD.org
Mon Aug 6 22:01:35 PDT 2007


http://perforce.freebsd.org/chv.cgi?CH=124803

Change 124803 by kmacy at kmacy_home:ethng on 2007/08/07 05:01:15

	add multiq tunables
	setup producer consumer rings for relaying mbufs between cpus
	in case of multiqueue:	
		disable periodic tx cleaner and freelist refill 
		bind msix ithreads to corresponding cpu

Affected files ...

.. //depot/projects/ethng/src/sys/dev/cxgb/cxgb_main.c#3 edit
.. //depot/projects/ethng/src/sys/dev/cxgb/cxgb_sge.c#3 edit

Differences ...

==== //depot/projects/ethng/src/sys/dev/cxgb/cxgb_main.c#3 (text+ko) ====

@@ -208,11 +208,34 @@
  * The driver uses an auto-queue algorithm by default.
  * To disable it and force a single queue-set per port, use singleq = 1.
  */
+#ifdef IFNET_MULTIQUEUE
+static int singleq = 0;
+#else
 static int singleq = 1;
+#endif
 TUNABLE_INT("hw.cxgb.singleq", &singleq);
 SYSCTL_UINT(_hw_cxgb, OID_AUTO, singleq, CTLFLAG_RDTUN, &singleq, 0,
     "use a single queue-set per port");
 
+#ifdef IFNET_MULTIQUEUE
+static int cxgb_pcpu_tx_coalesce = 0;
+TUNABLE_INT("hw.cxgb.tx_coalesce", &cxgb_pcpu_tx_coalesce);
+SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce, CTLFLAG_RDTUN, &cxgb_pcpu_tx_coalesce, 0,
+    "coalesce small packets into a single work request");
+
+static int sleep_ticks = 1;
+TUNABLE_INT("hw.cxgb.sleep_ticks", &sleep_ticks);
+SYSCTL_UINT(_hw_cxgb, OID_AUTO, sleep_ticks, CTLFLAG_RDTUN, &sleep_ticks, 0,
+    "ticks to sleep between checking pcpu queues");
+
+int cxgb_txq_mbuf_ring_size = 2048;
+TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_mbuf_ring_size);
+SYSCTL_UINT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_mbuf_ring_size, 0,
+    "size of per-queue mbuf ring");
+#else
+int cxgb_txq_mbuf_ring_size = 0;
+#endif
+
 enum {
 	MAX_TXQ_ENTRIES      = 16384,
 	MAX_CTRL_TXQ_ENTRIES = 1024,
@@ -558,6 +581,7 @@
 		sc->port[i].nqsets = port_qsets;
 		sc->port[i].first_qset = i*port_qsets;
 		sc->port[i].port_id = i;
+		sc->port[i].tx_chan = i >= ai->nports0;
 		sc->portdev[i] = child;
 		device_set_softc(child, &sc->port[i]);
 	}

==== //depot/projects/ethng/src/sys/dev/cxgb/cxgb_sge.c#3 (text+ko) ====

@@ -69,6 +69,7 @@
 int      txq_fills = 0;
 int      collapse_mbufs = 0;
 static int recycle_enable = 1;
+extern int cxgb_txq_mbuf_ring_size;
 static int bogus_imm = 0;
 
 /*
@@ -708,12 +709,13 @@
 sge_timer_cb(void *arg)
 {
 	adapter_t *sc = arg;
+#ifndef IFNET_MULTIQUEUE	
 	struct port_info *p;
 	struct sge_qset *qs;
 	struct sge_txq  *txq;
 	int i, j;
 	int reclaim_eth, reclaim_ofl, refill_rx;
-	
+
 	for (i = 0; i < sc->params.nports; i++) 
 		for (j = 0; j < sc->port[i].nqsets; j++) {
 			qs = &sc->sge.qs[i + j];
@@ -728,6 +730,7 @@
 				break;
 			}
 		}
+#endif
 	if (sc->params.nports > 2) {
 		int i;
 
@@ -845,6 +848,11 @@
 	struct sge_txq *txq;
 	struct mtx *lock;
 
+#ifdef IFNET_MULTIQUEUE
+	panic("%s should not be called with multiqueue support\n", __FUNCTION__);
+#endif 
+
+	
 	for (i = 0; i < nqsets; i++) {
 		qs = &sc->sge.qs[i];
 		txq = &qs->txq[TXQ_ETH];
@@ -897,6 +905,10 @@
 	qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
 	qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
 	qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
+
+	mbufq_init(&qs->txq[TXQ_ETH].sendq);
+	mbufq_init(&qs->txq[TXQ_OFLD].sendq);
+	mbufq_init(&qs->txq[TXQ_CTRL].sendq);
 }
 
 
@@ -1519,7 +1531,11 @@
 {
 	int i;
 
-	for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
+       for (i = 0; i < SGE_TXQ_PER_SET; i++) 
+	       if (q->txq[i].txq_mr.mr_ring != NULL)
+		       free(q->txq[i].txq_mr.mr_ring, M_DEVBUF);
+
+       for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
 		if (q->fl[i].desc) {
 			mtx_lock(&sc->sge.reg_lock);
 			t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
@@ -2034,6 +2050,16 @@
 	struct sge_qset *q = &sc->sge.qs[id];
 	int i, ret = 0;
 
+	for (i = 0; i < SGE_TXQ_PER_SET; i++) {
+		if ((q->txq[i].txq_mr.mr_ring = malloc(cxgb_txq_mbuf_ring_size*sizeof(struct mbuf *),
+			    M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) {
+			device_printf(sc->dev, "failed to allocate mbuf ring\n");
+			goto err;
+		}
+		q->txq[i].txq_mr.mr_prod = q->txq[i].txq_mr.mr_cons = 0;
+		q->txq[i].txq_mr.mr_size = cxgb_txq_mbuf_ring_size;
+	}
+	
 	init_qset_cntxt(q, id);
 	
 	if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
@@ -2094,9 +2120,6 @@
 	TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_ETH]);
 	TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_OFLD]);
 
-	
-
-	
 	q->fl[0].gen = q->fl[1].gen = 1;
 	q->fl[0].size = p->fl_size;
 	q->fl[1].size = p->jumbo_size;
@@ -2328,9 +2351,11 @@
 	credits = G_RSPD_TXQ0_CR(flags);
 	if (credits) {
 		qs->txq[TXQ_ETH].processed += credits;
+#ifndef	IFNET_MULTIQUEUE
 		if (desc_reclaimable(&qs->txq[TXQ_ETH]) > TX_START_MAX_DESC)
 			taskqueue_enqueue(qs->port->adapter->tq,
 			    &qs->port->timer_reclaim_task);
+#endif		
 	}
 	
 	credits = G_RSPD_TXQ2_CR(flags);
@@ -2582,6 +2607,19 @@
 		taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
 }
 
+#ifdef IFNET_MULTIQUEUE
+static void
+bind_qs_thread(struct sge_qset *qs)
+{
+	struct thread *td = curthread;
+	thread_lock(td);
+	sched_bind(td, qs->cpuid);
+	thread_unlock(td);
+	critical_enter();
+	qs->flags |= QS_BOUND;
+	critical_exit();
+}
+
 void
 t3_intr_msix(void *data)
 {
@@ -2590,11 +2628,28 @@
 	struct sge_rspq *rspq = &qs->rspq;
 
 	mtx_lock(&rspq->lock);
+	if ((qs->flags & QS_BOUND) == 0)
+		bind_qs_thread(qs);
+
 	if (process_responses_gts(adap, rspq) == 0)
 		rspq->unhandled_irqs++;
 	mtx_unlock(&rspq->lock);
 }
 
+#else
+void
+t3_intr_msix(void *data)
+{
+	struct sge_qset *qs = data;
+	adapter_t *adap = qs->port->adapter;
+	struct sge_rspq *rspq = &qs->rspq;
+
+	mtx_lock(&rspq->lock);
+	if (process_responses_gts(adap, rspq) == 0)
+		rspq->unhandled_irqs++;
+	mtx_unlock(&rspq->lock);
+}
+#endif
 /* 
  * broken by recent mbuf changes 
  */ 


More information about the p4-projects mailing list