PERFORCE change 122896 for review
Kip Macy
kmacy at FreeBSD.org
Thu Jul 5 00:58:57 UTC 2007
http://perforce.freebsd.org/chv.cgi?CH=122896
Change 122896 by kmacy at kmacy_vt-x:opentoe_init on 2007/07/05 00:58:37
- reduce cpu usage by as much as 25% (40%->30%) by doing txq reclaim
more efficiently
- use mtx_trylock when trying to grab the lock to avoid spinning
during long encap loop
- add per-txq reclaim task
- if mbufs were successfully reclaimed, try another pass
- reduce contention on txq lock in if_start by returning right away
if another thread is already transmitting
- set/clear flag in txq when running in the t3_encap loop
- fix null pointer panic caused by bad merge
- track txq overruns with sysctl
MFP4 after: 2 days
Affected files ...
.. //depot/projects/opentoe/sys/dev/cxgb/cxgb_adapter.h#20 edit
.. //depot/projects/opentoe/sys/dev/cxgb/cxgb_main.c#13 edit
.. //depot/projects/opentoe/sys/dev/cxgb/cxgb_osdep.h#11 edit
.. //depot/projects/opentoe/sys/dev/cxgb/cxgb_sge.c#23 edit
Differences ...
==== //depot/projects/opentoe/sys/dev/cxgb/cxgb_adapter.h#20 (text+ko) ====
@@ -239,6 +239,8 @@
struct tx_desc;
struct tx_sw_desc;
+#define TXQ_TRANSMITTING 0x1
+
struct sge_txq {
uint64_t flags;
uint32_t in_use;
@@ -254,7 +256,9 @@
struct tx_sw_desc *sdesc;
uint32_t token;
bus_addr_t phys_addr;
- struct task qresume_tsk;
+ struct task qreclaim_task;
+ struct task qresume_task;
+ struct port_info *port;
uint32_t cntxt_id;
uint64_t stops;
uint64_t restarts;
==== //depot/projects/opentoe/sys/dev/cxgb/cxgb_main.c#13 (text+ko) ====
@@ -1613,7 +1613,11 @@
txq = &qs->txq[TXQ_ETH];
err = 0;
+ if (txq->flags & TXQ_TRANSMITTING)
+ return (EINPROGRESS);
+
mtx_lock(&txq->lock);
+ txq->flags |= TXQ_TRANSMITTING;
in_use_init = txq->in_use;
while ((txq->in_use - in_use_init < txmax) &&
(txq->size > txq->in_use + TX_MAX_DESC)) {
@@ -1651,6 +1655,7 @@
break;
BPF_MTAP(ifp, m);
}
+ txq->flags &= ~TXQ_TRANSMITTING;
mtx_unlock(&txq->lock);
if (__predict_false(err)) {
@@ -1685,8 +1690,7 @@
do {
if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC)
- taskqueue_enqueue(pi->adapter->tq,
- &pi->timer_reclaim_task);
+ taskqueue_enqueue(pi->adapter->tq, &txq->qreclaim_task);
error = cxgb_start_tx(ifp, TX_START_MAX_DESC);
} while (error == 0);
@@ -1704,9 +1708,8 @@
txq = &qs->txq[TXQ_ETH];
if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC)
- taskqueue_enqueue(pi->adapter->tq,
- &pi->timer_reclaim_task);
-
+ taskqueue_enqueue(pi->adapter->tq, &txq->qreclaim_task);
+
err = cxgb_start_tx(ifp, TX_START_MAX_DESC);
if (err == 0)
==== //depot/projects/opentoe/sys/dev/cxgb/cxgb_osdep.h#11 (text+ko) ====
@@ -95,7 +95,7 @@
#define TX_MAX_SIZE (1 << 16) /* 64KB */
#define TX_MAX_SEGS 36 /* maximum supported by card */
#define TX_MAX_DESC 4 /* max descriptors per packet */
-#define TX_START_MAX_DESC (TX_MAX_DESC << 2) /* maximum number of descriptors
+#define TX_START_MAX_DESC (TX_MAX_DESC << 3) /* maximum number of descriptors
* call to start used per */
#define TX_CLEAN_MAX_DESC (TX_MAX_DESC << 4) /* maximum tx descriptors
* to clean per iteration */
==== //depot/projects/opentoe/sys/dev/cxgb/cxgb_sge.c#23 (text+ko) ====
@@ -67,6 +67,7 @@
uint32_t collapse_free = 0;
uint32_t mb_free_vec_free = 0;
+int txq_fills = 0;
int collapse_mbufs = 0;
static int recycle_enable = 1;
@@ -187,7 +188,8 @@
static void t3_free_qset(adapter_t *sc, struct sge_qset *q);
static void sge_timer_cb(void *arg);
static void sge_timer_reclaim(void *arg, int ncount);
-static int free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec);
+static void sge_txq_reclaim_handler(void *arg, int ncount);
+static int free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec);
/**
* reclaim_completed_tx - reclaims completed Tx descriptors
@@ -199,14 +201,14 @@
* queue's lock held.
*/
static __inline int
-reclaim_completed_tx(adapter_t *adapter, struct sge_txq *q, int nbufs, struct mbuf **mvec)
+reclaim_completed_tx(struct sge_txq *q, int nbufs, struct mbuf **mvec)
{
int reclaimed, reclaim = desc_reclaimable(q);
int n = 0;
mtx_assert(&q->lock, MA_OWNED);
if (reclaim > 0) {
- n = free_tx_desc(adapter, q, min(reclaim, nbufs), mvec);
+ n = free_tx_desc(q, min(reclaim, nbufs), mvec);
reclaimed = min(reclaim, nbufs);
q->cleaned += reclaimed;
q->in_use -= reclaimed;
@@ -482,7 +484,7 @@
struct refill_fl_cb_arg cb_arg;
void *cl;
int err;
-
+
cb_arg.error = 0;
while (n--) {
/*
@@ -743,7 +745,47 @@
V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
}
+static __inline void
+sge_txq_reclaim_(struct sge_txq *txq)
+{
+ int reclaimable, i, n;
+ struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
+ struct port_info *p;
+ p = txq->port;
+reclaim_more:
+ n = 0;
+ reclaimable = desc_reclaimable(txq);
+ if (reclaimable > 0 && mtx_trylock(&txq->lock)) {
+ n = reclaim_completed_tx(txq, TX_CLEAN_MAX_DESC, m_vec);
+ mtx_unlock(&txq->lock);
+ }
+ if (n == 0)
+ return;
+
+ for (i = 0; i < n; i++) {
+ m_freem_vec(m_vec[i]);
+ }
+ if (p && p->ifp->if_drv_flags & IFF_DRV_OACTIVE &&
+ txq->size - txq->in_use >= TX_START_MAX_DESC) {
+ txq_fills++;
+ p->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ taskqueue_enqueue(p->tq, &p->start_task);
+ }
+
+ if (n)
+ goto reclaim_more;
+}
+
+static void
+sge_txq_reclaim_handler(void *arg, int ncount)
+{
+ struct sge_txq *q = arg;
+
+ sge_txq_reclaim_(q);
+}
+
+
static void
sge_timer_reclaim(void *arg, int ncount)
{
@@ -753,39 +795,15 @@
struct sge_qset *qs;
struct sge_txq *txq;
struct mtx *lock;
- struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
- int n, reclaimable;
for (i = 0; i < nqsets; i++) {
qs = &sc->sge.qs[i];
txq = &qs->txq[TXQ_ETH];
- reclaimable = desc_reclaimable(txq);
- if (reclaimable > 0) {
- mtx_lock(&txq->lock);
- n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec);
- mtx_unlock(&txq->lock);
-
- for (i = 0; i < n; i++)
- m_freem_vec(m_vec[i]);
-
- if (p->ifp->if_drv_flags & IFF_DRV_OACTIVE &&
- txq->size - txq->in_use >= TX_START_MAX_DESC) {
- p->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
- taskqueue_enqueue(p->tq, &p->start_task);
- }
- }
+ sge_txq_reclaim_(txq);
txq = &qs->txq[TXQ_OFLD];
- reclaimable = desc_reclaimable(txq);
- if (reclaimable > 0) {
- mtx_lock(&txq->lock);
- n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec);
- mtx_unlock(&txq->lock);
+ sge_txq_reclaim_(txq);
- for (i = 0; i < n; i++)
- m_freem_vec(m_vec[i]);
- }
-
lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
&sc->sge.qs[0].rspq.lock;
@@ -1223,7 +1241,6 @@
wr_lo = htonl(V_WR_TID(txq->token));
txsd->m = m0;
m_set_priority(m0, txqs.pidx);
- m_set_priority(m0, txqs.pidx);
write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo);
check_ring_tx_db(p->adapter, txq);
@@ -1563,8 +1580,8 @@
for (i = 0; i < nqsets; ++i) {
struct sge_qset *qs = &sc->sge.qs[i];
- taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_tsk);
- taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_tsk);
+ taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
+ taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
}
}
@@ -1579,18 +1596,18 @@
* Tx buffers. Called with the Tx queue lock held.
*/
int
-free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec)
+free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec)
{
struct tx_sw_desc *d;
- unsigned int cidx = q->cidx;
+ unsigned int cidx;
int nbufs = 0;
#ifdef T3_TRACE
T3_TRACE2(sc->tb[q->cntxt_id & 7],
"reclaiming %u Tx descriptors at cidx %u", n, cidx);
#endif
+ cidx = q->cidx;
d = &q->sdesc[cidx];
-
while (n-- > 0) {
DPRINTF("cidx=%d d=%p\n", cidx, d);
if (d->m) {
@@ -1599,21 +1616,13 @@
bus_dmamap_destroy(q->entry_tag, d->map);
d->flags &= ~TX_SW_DESC_MAPPED;
}
-
if (m_get_priority(d->m) == cidx) {
m_vec[nbufs] = d->m;
d->m = NULL;
nbufs++;
} else {
- printf("pri=%d cidx=%d\n", (int)m_get_priority(d->m), cidx);
+ printf("pri=%jd cidx=%d\n", m_get_priority(d->m), cidx);
}
- if (m_get_priority(d->m) == cidx) {
- m_vec[nbufs] = d->m;
- d->m = NULL;
- nbufs++;
- } else {
- printf("pri=%d cidx=%d\n", m_get_priority(d->m), cidx);
- }
}
++d;
if (++cidx == q->size) {
@@ -1622,7 +1631,7 @@
}
}
q->cidx = cidx;
-
+
return (nbufs);
}
@@ -1748,7 +1757,7 @@
return (ret);
}
ndesc = calc_tx_descs_ofld(m, nsegs);
-again: cleaned = reclaim_completed_tx(adap, q, TX_CLEAN_MAX_DESC, m_vec);
+again: cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec);
ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
if (__predict_false(ret)) {
@@ -1805,7 +1814,7 @@
struct tx_sw_desc *stx = &q->sdesc[q->pidx];
mtx_lock(&q->lock);
-again: cleaned = reclaim_completed_tx(adap, q, TX_CLEAN_MAX_DESC, m_vec);
+again: cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec);
while ((m = mbufq_peek(&q->sendq)) != NULL) {
unsigned int gen, pidx;
@@ -1944,13 +1953,13 @@
should_restart_tx(&qs->txq[TXQ_OFLD]) &&
test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
qs->txq[TXQ_OFLD].restarts++;
- taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_tsk);
+ taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
}
if (isset(&qs->txq_stopped, TXQ_CTRL) &&
should_restart_tx(&qs->txq[TXQ_CTRL]) &&
test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
qs->txq[TXQ_CTRL].restarts++;
- taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_tsk);
+ taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
}
}
@@ -2028,9 +2037,14 @@
device_get_unit(sc->dev), irq_vec_idx, i);
MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF);
}
+
+ q->txq[TXQ_ETH].port = pi;
- TASK_INIT(&q->txq[TXQ_OFLD].qresume_tsk, 0, restart_offloadq, q);
- TASK_INIT(&q->txq[TXQ_CTRL].qresume_tsk, 0, restart_ctrlq, q);
+ TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_ETH]);
+ TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_OFLD]);
+ TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
+ TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
+
q->fl[0].gen = q->fl[1].gen = 1;
q->fl[0].size = p->fl_size;
@@ -2646,6 +2660,10 @@
"collapse_mbufs",
CTLFLAG_RW, &collapse_mbufs,
0, "collapse mbuf chains into iovecs");
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ "txq_overrun",
+ CTLFLAG_RD, &txq_fills,
+ 0, "#times txq overrun");
}
/**
More information about the p4-projects
mailing list