PERFORCE change 126446 for review
Kip Macy
kmacy at FreeBSD.org
Sat Sep 15 14:02:44 PDT 2007
http://perforce.freebsd.org/chv.cgi?CH=126446
Change 126446 by kmacy at kmacy_home:ethng on 2007/09/15 21:02:32
move refcnt into cluster and tune prefetching slightly
Affected files ...
.. //depot/projects/ethng/src/sys/dev/cxgb/cxgb_sge.c#19 edit
Differences ...
==== //depot/projects/ethng/src/sys/dev/cxgb/cxgb_sge.c#19 (text+ko) ====
@@ -141,11 +141,18 @@
};
struct rx_sw_desc { /* SW state per Rx descriptor */
- void *cl;
- bus_dmamap_t map;
- int flags;
+ union {
+ void *cl;
+ uint32_t *ref;
+ } u;
+ uint8_t *data;
+ bus_dmamap_t map;
+ int flags;
};
+#define rxsd_cl u.cl
+#define rxsd_ref u.ref
+
struct txq_state {
unsigned int compl;
unsigned int gen;
@@ -356,8 +363,12 @@
memcpy(mtod(m, uint8_t *), resp->imm_data, len);
break;
case RSPQ_EOP:
- memcpy(cl, resp->imm_data, len);
- m_iovappend(m, cl, MSIZE, len, 0);
+ memcpy(cl, resp->imm_data, len);
+ /*
+ * XXX
+ */
+ panic("bad append");
+ m_iovappend(m, cl, MSIZE, len, 0, NULL);
break;
default:
bogus_imm++;
@@ -534,6 +545,7 @@
struct rx_desc *d = &q->desc[q->pidx];
struct refill_fl_cb_arg cb_arg;
void *cl;
+ uint32_t *ref;
int err;
cb_arg.error = 0;
@@ -541,10 +553,12 @@
/*
* We only allocate a cluster, mbuf allocation happens after rx
*/
- if ((cl = m_cljget(NULL, M_DONTWAIT, q->buf_size)) == NULL) {
+ if ((cl = m_cljget(NULL, M_DONTWAIT, q->zone)) == NULL) {
log(LOG_WARNING, "Failed to allocate cluster\n");
goto done;
}
+ ref = cl;
+
if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
@@ -553,7 +567,8 @@
}
sd->flags |= RX_SW_DESC_MAP_CREATED;
}
- err = bus_dmamap_load(q->entry_tag, sd->map, cl, q->buf_size,
+
+ err = bus_dmamap_load(q->entry_tag, sd->map, (uint32_t *)cl + 1, q->buf_size,
refill_fl_cb, &cb_arg, 0);
if (err != 0 || cb_arg.error) {
@@ -565,7 +580,8 @@
}
sd->flags |= RX_SW_DESC_INUSE;
- sd->cl = cl;
+ sd->rxsd_cl = cl;
+ sd->data = (uint8_t *)(sd->rxsd_ref + 1);
d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
d->len_gen = htobe32(V_FLD_GEN1(q->gen));
@@ -607,9 +623,9 @@
if (d->flags & RX_SW_DESC_INUSE) {
bus_dmamap_unload(q->entry_tag, d->map);
bus_dmamap_destroy(q->entry_tag, d->map);
- uma_zfree(q->zone, d->cl);
+ uma_zfree(q->zone, d->rxsd_cl);
}
- d->cl = NULL;
+ d->rxsd_cl = NULL;
if (++cidx == q->size)
cidx = 0;
}
@@ -1250,7 +1266,7 @@
uint32_t wr_hi, wr_lo, sgl_flits;
struct tx_desc *txd;
- DPRINTF("t3_encap port_id=%d qsidx=%d ", p->port_id, p->first_qset);
+
#if defined(IFNET_MULTIQUEUE) && defined(STRICT_AFFINITY)
KASSERT(qs->qs_cpuid == curcpu, ("cpu qset mismatch cpuid=%d curcpu=%d", qs->qs_cpuid, curcpu));
@@ -1263,7 +1279,8 @@
txsd = &txq->sdesc[txq->pidx];
txd = &txq->desc[txq->pidx];
- DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", mlen, p->txpkt_intf, p->tx_chan);
+ DPRINTF("t3_encap port_id=%d qsidx=%d ", p->port_id, p->first_qset);
+ DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", m[0]->m_pkthdr.len, p->txpkt_intf, p->tx_chan);
/*
* XXX handle checksum, TSO, and VLAN here
*
@@ -1779,7 +1796,7 @@
{
struct tx_sw_desc *txsd;
unsigned int cidx;
- int i, reclaimed, nbufs;
+ int i, iter, reclaimed, nbufs;
#ifdef T3_TRACE
T3_TRACE2(sc->tb[q->cntxt_id & 7],
@@ -1787,10 +1804,16 @@
#endif
cidx = q->cidx;
txsd = &q->sdesc[cidx];
+ prefetch(txsd);
reclaimed = nbufs = 0;
-
- for (reclaimed = 0; reclaimed < reclaimable;) {
- DPRINTF("cidx=%d d=%p\n", cidx, d);
+ for (iter = reclaimed = 0; reclaimed < reclaimable; iter++) {
+ if ((iter & 0x1) == 0) {
+ prefetch(txsd + 1);
+ prefetch(txsd + 2);
+ prefetch(txsd + 3);
+ prefetch(txsd + 4);
+ }
+ DPRINTF("cidx=%d d=%p\n", cidx, txsd);
if (txsd->count > 0) {
if (nbufs + txsd->count > m_vec_size)
break;
@@ -1798,13 +1821,17 @@
bus_dmamap_unload(q->entry_tag, txsd->map);
txsd->flags &= ~TX_SW_DESC_MAPPED;
}
- if (m_get_priority(txsd->m[0]) == cidx) {
- for (i = 0; i < txsd->count; i++, nbufs++)
- m_vec[nbufs] = txsd->m[i];
- txsd->count = 0;
- } else {
+ for (i = 0; i < txsd->count; i++, nbufs++) {
+ prefetch(txsd->m[i]);
+ m_vec[nbufs] = txsd->m[i];
+ }
+ txsd->count = 0;
+
+#ifdef DIAGNOSTIC
+ if (m_get_priority(txsd->m[0]) != cidx)
printf("pri=%d cidx=%d\n", (int)m_get_priority(txsd->m[0]), cidx);
- }
+#endif
+
} else
q->txq_skipped++;
@@ -2254,13 +2281,18 @@
q->txq[TXQ_ETH].stop_thres = nports *
flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
- q->fl[0].buf_size = MCLBYTES;
+ q->fl[0].buf_size = MCLBYTES - sizeof(uint32_t);
q->fl[0].zone = zone_clust;
q->fl[0].type = EXT_CLUSTER;
- q->fl[1].buf_size = MJUMPAGESIZE;
- q->fl[1].zone = zone_jumbop;
- q->fl[1].type = EXT_JUMBOP;
-
+ if (jumbo_phys_contig) {
+ q->fl[1].buf_size = MJUM9BYTES - sizeof(uint32_t);
+ q->fl[1].zone = zone_jumbo9;
+ q->fl[1].type = EXT_JUMBO9;
+ } else {
+ q->fl[1].buf_size = MJUMPAGESIZE - sizeof(uint32_t);
+ q->fl[1].zone = zone_jumbop;
+ q->fl[1].type = EXT_JUMBOP;
+ }
q->lro.enabled = lro_default;
mtx_lock(&sc->sge.reg_lock);
@@ -2372,7 +2404,9 @@
/*
* adjust after conversion to mbuf chain
*/
- m_adj(m, sizeof(*cpl) + ethpad);
+ m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
+ m->m_len -= (sizeof(*cpl) + ethpad);
+ m->m_data += (sizeof(*cpl) + ethpad);
(*ifp->if_input)(ifp, m);
}
@@ -2406,17 +2440,24 @@
uint32_t len = G_RSPD_LEN(len_cq);
uint32_t flags = ntohl(r->flags);
uint8_t sopeop = G_RSPD_SOP_EOP(flags);
+ uint32_t *ref;
int ret = 0;
- prefetch(sd->cl);
+ prefetch(sd->rxsd_cl);
fl->credits--;
bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
bus_dmamap_unload(fl->entry_tag, sd->map);
- m_cljset(m, sd->cl, fl->type);
+ ref = sd->rxsd_ref;
+ m_cljset(m, sd->rxsd_cl, fl->type, sd->rxsd_ref);
+ *ref = 1;
m->m_len = len;
-
+ /*
+ * bump past the refcnt address
+ */
+ m->m_data = sd->data;
+
switch(sopeop) {
case RSPQ_SOP_EOP:
DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
@@ -2474,9 +2515,11 @@
uint32_t flags = ntohl(r->flags);
uint8_t sopeop = G_RSPD_SOP_EOP(flags);
void *cl;
+ uint32_t *ref = NULL;
int ret = 0;
-
- prefetch(sd->cl);
+
+ prefetch((sd + 1)->rxsd_cl);
+ prefetch((sd + 2)->rxsd_cl);
DPRINTF("rx cpu=%d\n", curcpu);
fl->credits--;
@@ -2484,17 +2527,22 @@
if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
cl = mtod(m, void *);
- memcpy(cl, sd->cl, len);
+ memcpy(cl, sd->data, len);
recycle_rx_buf(adap, fl, fl->cidx);
} else {
- cl = sd->cl;
bus_dmamap_unload(fl->entry_tag, sd->map);
+ cl = sd->rxsd_cl;
+ ref = sd->rxsd_ref;
}
+
switch(sopeop) {
case RSPQ_SOP_EOP:
DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
- if (cl == sd->cl)
- m_cljset(m, cl, fl->type);
+ if (cl == sd->rxsd_cl) {
+ m_cljset(m, cl, fl->type, ref);
+ *ref = 1;
+ m->m_data = sd->data;
+ }
m->m_len = m->m_pkthdr.len = len;
ret = 1;
goto done;
@@ -2513,8 +2561,8 @@
ret = 1;
break;
}
- m_iovappend(m, cl, fl->buf_size, len, 0);
-
+ m_iovappend(m, cl, fl->buf_size, len, sizeof(uint32_t), ref);
+ *ref = 1;
done:
if (++fl->cidx == fl->size)
fl->cidx = 0;
@@ -2710,7 +2758,7 @@
if (eop) {
prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *));
- prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *) + L1_CACHE_BYTES);
+ prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *) + L1_CACHE_BYTES);
if (eth) {
t3_rx_eth_lro(adap, rspq, rspq->rspq_mh.mh_head, ethpad,
More information about the p4-projects
mailing list