PERFORCE change 126763 for review
Kip Macy
kmacy at FreeBSD.org
Sun Sep 23 22:02:22 PDT 2007
http://perforce.freebsd.org/chv.cgi?CH=126763
Change 126763 by kmacy at kmacy_home:ethng on 2007/09/24 05:01:29
avoid extra cache/tlb misses and allocations by embedding the
mbuf in the cluster on receive
Affected files ...
.. //depot/projects/ethng/src/sys/dev/cxgb/cxgb_sge.c#23 edit
Differences ...
==== //depot/projects/ethng/src/sys/dev/cxgb/cxgb_sge.c#23 (text+ko) ====
@@ -140,18 +140,13 @@
};
struct rx_sw_desc { /* SW state per Rx descriptor */
- union {
- void *cl;
- uint32_t *ref;
- } u;
- uint8_t *data;
+ caddr_t rxsd_cl;
+ uint32_t *rxsd_ref;
+ caddr_t data;
bus_dmamap_t map;
int flags;
};
-#define rxsd_cl u.cl
-#define rxsd_ref u.ref
-
struct txq_state {
unsigned int compl;
unsigned int gen;
@@ -547,8 +542,7 @@
struct rx_sw_desc *sd = &q->sdesc[q->pidx];
struct rx_desc *d = &q->desc[q->pidx];
struct refill_fl_cb_arg cb_arg;
- void *cl;
- uint32_t *ref;
+ caddr_t cl;
int err;
cb_arg.error = 0;
@@ -560,7 +554,6 @@
log(LOG_WARNING, "Failed to allocate cluster\n");
goto done;
}
- ref = cl;
if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
@@ -571,7 +564,8 @@
sd->flags |= RX_SW_DESC_MAP_CREATED;
}
#if !defined(__i386__) && !defined(__amd64__)
- err = bus_dmamap_load(q->entry_tag, sd->map, (uint32_t *)cl + 1, q->buf_size,
+ err = bus_dmamap_load(q->entry_tag, sd->map,
+ cl + sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t), q->buf_size,
refill_fl_cb, &cb_arg, 0);
if (err != 0 || cb_arg.error) {
@@ -582,11 +576,13 @@
return;
}
#else
- cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)((uint32_t *)cl + 1));
+ cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)(cl + sizeof(struct m_hdr) +
+ sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t)));
#endif
sd->flags |= RX_SW_DESC_INUSE;
sd->rxsd_cl = cl;
- sd->data = (uint8_t *)(sd->rxsd_ref + 1);
+ sd->rxsd_ref = (uint32_t *)(cl + sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_));
+ sd->data = cl + sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t);
d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
d->len_gen = htobe32(V_FLD_GEN1(q->gen));
@@ -1026,7 +1022,7 @@
* packet. Ethernet packets require addition of WR and CPL headers.
*/
static __inline unsigned int
-calc_tx_descs(const struct mbuf *m, int nsegs, int tsoinfo)
+calc_tx_descs(const struct mbuf *m, int nsegs)
{
unsigned int flits;
@@ -1035,7 +1031,7 @@
flits = sgl_len(nsegs) + 2;
#ifdef TSO_SUPPORTED
- if (tsoinfo)
+ if (m->m_pkthdr.csum_flags & CSUM_TSO)
flits++;
#endif
return flits_to_desc(flits);
@@ -1057,7 +1053,7 @@
goto done;
} else
#endif
- err = bus_dmamap_load_mvec_sg(txq->entry_tag, txsd->map, m0, segs, nsegs, 0);
+ err = bus_dmamap_load_mbuf_sg(txq->entry_tag, txsd->map, m0, segs, nsegs, 0);
if (err == 0) {
goto done;
@@ -1473,7 +1469,7 @@
return (err);
m0 = *m;
#endif
- ndesc = calc_tx_descs(mi, nsegs, tso_info);
+ ndesc = calc_tx_descs(m0, nsegs);
sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
make_sgl(sgp, segs, nsegs);
@@ -1487,7 +1483,7 @@
write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo);
check_ring_tx_db(pi->adapter, txq);
- if ((m0->m_type == MT_DATA) && (m0->m_flags & M_EXT)) {
+ if ((m0->m_type == MT_DATA) && ((m0->m_flags & (M_EXT|M_NOFREE)) == M_EXT)) {
m0->m_flags = 0;
m_free(m0);
}
@@ -2343,15 +2339,15 @@
q->txq[TXQ_ETH].stop_thres = nports *
flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
- q->fl[0].buf_size = MCLBYTES - sizeof(uint32_t);
+ q->fl[0].buf_size = (MCLBYTES - sizeof(uint32_t) - sizeof(struct m_hdr) - sizeof(struct pkthdr) - sizeof(struct m_ext_));
q->fl[0].zone = zone_clust;
q->fl[0].type = EXT_CLUSTER;
if (jumbo_phys_contig) {
- q->fl[1].buf_size = MJUM9BYTES - sizeof(uint32_t);
+ q->fl[1].buf_size = MJUM9BYTES - sizeof(uint32_t) - sizeof(struct m_hdr) - sizeof(struct pkthdr) - sizeof(struct m_ext_);
q->fl[1].zone = zone_jumbo9;
q->fl[1].type = EXT_JUMBO9;
} else {
- q->fl[1].buf_size = MJUMPAGESIZE - sizeof(uint32_t);
+ q->fl[1].buf_size = MJUMPAGESIZE - sizeof(uint32_t) - sizeof(struct m_hdr) - sizeof(struct pkthdr) - sizeof(struct m_ext_);
q->fl[1].zone = zone_jumbop;
q->fl[1].type = EXT_JUMBOP;
}
@@ -2567,9 +2563,32 @@
}
#else
+static void
+init_cluster_mbuf(caddr_t cl, int flags, int type)
+{
+ struct mbuf *m;
+ int header_size;
+
+ header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t);
+
+ bzero(cl, header_size);
+ m = (struct mbuf *)cl;
+
+ SLIST_INIT(&m->m_pkthdr.tags);
+ m->m_type = MT_DATA;
+ m->m_flags = flags | M_NOFREE | M_EXT;
+ m->m_data = cl + header_size;
+ m->m_ext.ext_buf = cl;
+ m->m_ext.ref_cnt = (uint32_t *)(cl + header_size - sizeof(uint32_t));
+ m->m_ext.ext_size = m_getsizefromtype(type);
+ m->m_ext.ext_type = type;
+ *(m->m_ext.ref_cnt) = 1;
+ DPRINTF("data=%p ref_cnt=%p\n", m->m_data, m->m_ext.ref_cnt);
+}
+
static int
get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
- struct mbuf *m, struct rsp_desc *r)
+ struct mbuf **m, struct rsp_desc *r)
{
unsigned int len_cq = ntohl(r->len_cq);
@@ -2579,8 +2598,8 @@
uint32_t flags = ntohl(r->flags);
uint8_t sopeop = G_RSPD_SOP_EOP(flags);
void *cl;
- uint32_t *ref = NULL;
int ret = 0;
+ struct mbuf *m0;
prefetch((sd + 1)->rxsd_cl);
prefetch((sd + 2)->rxsd_cl);
@@ -2590,43 +2609,46 @@
bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
- cl = mtod(m, void *);
+ if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
+ goto skip_recycle;
+ cl = mtod(m0, void *);
memcpy(cl, sd->data, len);
recycle_rx_buf(adap, fl, fl->cidx);
+ *m = m0;
} else {
+ skip_recycle:
bus_dmamap_unload(fl->entry_tag, sd->map);
cl = sd->rxsd_cl;
- ref = sd->rxsd_ref;
+ *m = m0 = (struct mbuf *)cl;
}
switch(sopeop) {
case RSPQ_SOP_EOP:
DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
- if (cl == sd->rxsd_cl) {
- m_cljset(m, cl, fl->type, ref);
- *ref = 1;
- m->m_data = sd->data;
- }
- m->m_len = m->m_pkthdr.len = len;
+ if (cl == sd->rxsd_cl)
+ init_cluster_mbuf(cl, M_PKTHDR, fl->type);
+ m0->m_len = m0->m_pkthdr.len = len;
ret = 1;
goto done;
break;
case RSPQ_NSOP_NEOP:
DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
+ panic("chaining unsupported");
ret = 0;
break;
case RSPQ_SOP:
DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
- m_iovinit(m);
+ panic("chaining unsupported");
+ m_iovinit(m0);
ret = 0;
break;
case RSPQ_EOP:
DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
+ panic("chaining unsupported");
ret = 1;
break;
}
- m_iovappend(m, cl, fl->buf_size, len, sizeof(uint32_t), ref);
- *ref = 1;
+ m_iovappend(m0, cl, fl->buf_size, len, sizeof(uint32_t), sd->rxsd_ref);
done:
if (++fl->cidx == fl->size)
fl->cidx = 0;
@@ -2779,17 +2801,9 @@
eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r, m);
#else
- if (rspq->rspq_mbuf == NULL)
- rspq->rspq_mbuf = m_gethdr(M_DONTWAIT, MT_DATA);
- if (rspq->rspq_mbuf == NULL) {
- rspq->next_holdoff = NOMEM_INTR_DELAY;
- log(LOG_WARNING, "failed to get mbuf for packet\n");
- break;
- } else {
- rspq->rspq_mbuf->m_pkthdr.rss_hash = rss_hash;
- rspq->rspq_mbuf->m_next = rspq->rspq_mbuf->m_nextpkt = NULL;
- }
- eop = get_packet(adap, drop_thresh, qs, rspq->rspq_mbuf, r);
+ eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mbuf, r);
+ rspq->rspq_mbuf->m_pkthdr.rss_hash = rss_hash;
+
#endif
ethpad = 2;
} else {
More information about the p4-projects
mailing list