PERFORCE change 125128 for review
Kip Macy
kmacy at FreeBSD.org
Mon Aug 13 17:44:06 PDT 2007
http://perforce.freebsd.org/chv.cgi?CH=125128
Change 125128 by kmacy at kmacy_home:ethng on 2007/08/14 00:43:10
Allow any cpu to use any tx queue by protecting the txq with its lock as opposed to cpu binding
- it isn't clear how much the strong affinity buys us without complicating the scheduler and it
doesn't really reduce locking because we still have to go through the motions of waking up the
bound service thread
+ this greatly simplifies the logic because we don't have to special case the local queue any more
the txq lock is either free and the queue is not stalled in which case the packet is directly
transmitted or the queue is busy in which case the packet is enqueue on the packets ring buffer
+ this also eliminates the need for the rss_hash to be "correct" as an ack can always be transmitted
directly without any queueing delay, the rss_hash simply ends up being there for load balancing
and as an affinity hint
+ the one interesting thing that this approach brings up is the possibility of adding an interface
that allows the TCP stack to tell the driver to put a packet at the *head* of a queue so that an
inbound stream is not slowed down by an outbound stream
Affected files ...
.. //depot/projects/ethng/src/sys/dev/cxgb/cxgb_multiq.c#8 edit
Differences ...
==== //depot/projects/ethng/src/sys/dev/cxgb/cxgb_multiq.c#8 (text+ko) ====
@@ -396,11 +396,9 @@
qs->qs_cpuid, curcpu));
reclaimed = j = 0;
- while ((reclaimable = desc_reclaimable(txq)) > 0) {
- critical_enter();
-
+ reclaimable = desc_reclaimable(txq);
+ while (reclaimable > 0) {
n = t3_free_tx_desc(txq, min(reclaimable, TX_CLEAN_MAX_DESC), m_vec);
- critical_exit();
reclaimed += min(reclaimable, TX_CLEAN_MAX_DESC);
@@ -412,12 +410,11 @@
m_freem_vec(m_vec[i]);
j++;
- critical_enter();
txq->cleaned += reclaimed;
txq->in_use -= reclaimed;
if (isset(&qs->txq_stopped, TXQ_ETH))
clrbit(&qs->txq_stopped, TXQ_ETH);
- critical_exit();
+ reclaimable = desc_reclaimable(txq);
}
return (reclaimed);
@@ -455,7 +452,6 @@
return (err);
}
- immpkt = NULL;
if (desc_reclaimable(txq) > 0) {
int reclaimed = 0;
@@ -495,70 +491,35 @@
return (err);
}
-static int
-cxgb_pcpu_txq_trylock(struct sge_txq *txq)
-{
- critical_enter();
- if (txq->flags & TXQ_TRANSMITTING) {
- critical_exit();
- DPRINTF("transmit in progress\n");
- return (0);
- }
- txq->flags |= TXQ_TRANSMITTING;
- critical_exit();
- return (1);
-}
-
-static void
-cxgb_pcpu_txq_unlock(struct sge_txq *txq)
-{
- critical_enter();
- txq->flags &= ~TXQ_TRANSMITTING;
- critical_exit();
-}
int
cxgb_pcpu_start(struct ifnet *ifp, struct mbuf *immpkt)
{
uint32_t cookie;
int err, qidx, locked;
struct port_info *pi;
- struct sge_qset *immqs, *curqs;
+ struct sge_qset *qs;
struct sge_txq *txq = NULL /* gcc is dumb */;
pi = ifp->if_softc;
- immqs = curqs = NULL;
+ qs = NULL;
err = cookie = locked = 0;
- sched_pin();
+
if (immpkt && (immpkt->m_pkthdr.rss_hash != 0)) {
cookie = immpkt->m_pkthdr.rss_hash;
qidx = cxgb_pcpu_cookie_to_qidx(pi, cookie);
DPRINTF("hash=0x%x qidx=%d cpu=%d\n", immpkt->m_pkthdr.rss_hash, qidx, curcpu);
- immqs = &pi->adapter->sge.qs[qidx];
- if (immqs->qs_cpuid != curcpu) {
- cxgb_pcpu_enqueue_packet_(immqs, immpkt);
- immpkt = NULL;
- }
- }
- if (curcpu < pi->first_qset || curcpu >= (pi->first_qset + pi->nqsets)) {
- /*
- * If packet isn't tagged and there is no queue for this cpu
- */
- if (immpkt) {
- immqs = &pi->adapter->sge.qs[pi->first_qset];
- cxgb_pcpu_enqueue_packet_(immqs, immpkt);
- }
- goto done;
- }
- curqs = &pi->adapter->sge.qs[curcpu];
- txq = &curqs->txq[TXQ_ETH];
- if (cxgb_pcpu_txq_trylock(txq)) {
- err = cxgb_pcpu_start_(curqs, immpkt, FALSE);
- cxgb_pcpu_txq_unlock(txq);
+ qs = &pi->adapter->sge.qs[qidx];
+ } else
+ qs = &pi->adapter->sge.qs[pi->first_qset];
+
+ txq = &qs->txq[TXQ_ETH];
+
+ if (mtx_trylock(&txq->lock)) {
+ err = cxgb_pcpu_start_(qs, immpkt, FALSE);
+ mtx_unlock(&txq->lock);
} else if (immpkt)
- err = cxgb_pcpu_enqueue_packet_(curqs, immpkt);
-done:
- sched_unpin();
+ err = cxgb_pcpu_enqueue_packet_(qs, immpkt);
return ((err == ENOSPC) ? 0 : err);
}
@@ -584,7 +545,6 @@
IFQ_UNLOCK(&ifp->if_snd);
printf("dequeued %d packets\n", i);
lhead = ltail = NULL;
- sched_pin();
for (m = head; m != NULL; m = head->m_nextpkt) {
calc_cookie = cxgb_pcpu_calc_cookie(ifp, m);
qidx = cxgb_pcpu_cookie_to_qidx(pi, calc_cookie);
@@ -608,18 +568,12 @@
* Assume one-to-one mapping of qset to CPU for now XXX
*/
- if (cxgb_pcpu_txq_trylock(&qs->txq[TXQ_ETH])) {
- (void)cxgb_pcpu_start_(qs, NULL, TRUE);
- cxgb_pcpu_txq_unlock(&qs->txq[TXQ_ETH]);
- } else {
- /*
+ (void)cxgb_pcpu_start_(qs, NULL, TRUE);
+ /*
* XXX multiple packets
*/
cxgb_pcpu_enqueue_packet_(qs, lhead);
-
- }
}
- sched_unpin();
}
static void
@@ -638,15 +592,16 @@
thread_unlock(td);
DELAY(qs->qs_cpuid*100000);
- printf("bound to %d running on %d\n", qs->qs_cpuid, curcpu);
+ if (bootverbose)
+ printf("bound to %d running on %d\n", qs->qs_cpuid, curcpu);
for (;;) {
if (qs->qs_flags & QS_EXITING)
break;
- if (cxgb_pcpu_txq_trylock(&qs->txq[TXQ_ETH])) {
+ if (mtx_trylock(&qs->txq[TXQ_ETH].lock)) {
err = cxgb_pcpu_start_(qs, NULL, TRUE);
- cxgb_pcpu_txq_unlock(&qs->txq[TXQ_ETH]);
+ mtx_unlock(&qs->txq[TXQ_ETH].lock);
} else
err = EINPROGRESS;
More information about the p4-projects
mailing list