PERFORCE change 109499 for review
Sam Leffler
sam at FreeBSD.org
Wed Nov 8 00:40:57 UTC 2006
http://perforce.freebsd.org/chv.cgi?CH=109499
Change 109499 by sam at sam_ebb on 2006/11/08 00:39:22
Add multi-segment tx:
o change ix_npe in npebuf to an NPE_MAXSEG array of
descriptors (3 for now based on tracing traffic for
NFS root mount and normal traffic patterns running
diskless)
o bring in defrag code from ath to handle the case
where the mbuf chain doesn't fit
Gets us >20% improvement for upstream TCP netperf on a
WITNESS+INVARIANTS kernel.
Note: rx buffers get NPE_MAXSEG-1 unused descriptors
(~8Kbytes right now); this can easily be reclaimed.
Note: can optimize npebuf setup a bit in tx path by
unrolling loop and eliminating extraneous write to
the uncached npebuf.
Affected files ...
.. //depot/projects/arm/src/sys/arm/xscale/ixp425/if_npe.c#15 edit
.. //depot/projects/arm/src/sys/arm/xscale/ixp425/if_npereg.h#3 edit
Differences ...
==== //depot/projects/arm/src/sys/arm/xscale/ixp425/if_npe.c#15 (text+ko) ====
@@ -380,7 +380,7 @@
static int
npe_dma_setup(struct npe_softc *sc, struct npedma *dma,
- const char *name, int nbuf)
+ const char *name, int nbuf, int maxseg)
{
int error, i;
@@ -391,7 +391,7 @@
/* DMA tag for mapped mbufs */
error = bus_dma_tag_create(NULL, 1, 0, BUS_SPACE_MAXADDR_32BIT,
- BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, MCLBYTES, 0,
+ BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, maxseg, MCLBYTES, 0,
busdma_lock_mutex, &sc->sc_mtx, &dma->mtag);
if (error != 0) {
device_printf(sc->sc_dev, "unable to create %s mbuf dma tag, "
@@ -508,10 +508,11 @@
}
} else
sc->sc_miih = sc->sc_ioh;
- error = npe_dma_setup(sc, &sc->txdma, "tx", NPE_MAX_TX_BUFFERS);
+ error = npe_dma_setup(sc, &sc->txdma, "tx", NPE_MAX_TX_BUFFERS,
+ NPE_MAXSEG);
if (error != 0)
return error;
- error = npe_dma_setup(sc, &sc->rxdma, "rx", NPE_MAX_RX_BUFFERS);
+ error = npe_dma_setup(sc, &sc->rxdma, "rx", NPE_MAX_RX_BUFFERS, 1);
if (error != 0)
return error;
@@ -753,6 +754,7 @@
uint32_t entry;
NPE_LOCK(sc);
+ /* XXX max # at a time? */
while (ixpqmgr_qread(qid, &entry) == 0) {
struct npebuf *npe = P2V(NPE_QM_Q_ADDR(entry));
@@ -786,7 +788,6 @@
m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
if (m == NULL)
return ENOBUFS;
- m->m_len = MCLBYTES;
}
KASSERT(m->m_ext.ext_size >= 1536 + ETHER_ALIGN,
("ext_size %d", m->m_ext.ext_size));
@@ -799,11 +800,11 @@
m_freem(m);
return error;
}
- npe->ix_ne_data = htobe32(segs[0].ds_addr);
+ npe->ix_ne[0].data = htobe32(segs[0].ds_addr);
/* NB: NPE requires length be a multiple of 64 */
/* NB: buffer length is shifted in word */
- npe->ix_ne_len = htobe32(segs[0].ds_len << 16);
- npe->ix_ne_next = 0;
+ npe->ix_ne[0].len = htobe32(segs[0].ds_len << 16);
+ npe->ix_ne[0].next = 0;
npe->ix_m = m;
/* Flush the memory in the mbuf */
bus_dmamap_sync(dma->mtag, npe->ix_map, BUS_DMASYNC_PREREAD);
@@ -830,7 +831,7 @@
struct mbuf *m;
DPRINTF(sc, "%s: entry 0x%x neaddr 0x%x ne_len 0x%x\n",
- __func__, entry, npe->ix_neaddr, npe->ix_ne_len);/*XXX*/
+ __func__, entry, npe->ix_neaddr, npe->ix_ne[0].len);
/*
* Allocate a new mbuf to replenish the rx buffer.
* If doing so fails we drop the rx'd frame so we
@@ -848,7 +849,7 @@
BUS_DMASYNC_POSTREAD);
/* set m_len etc. per rx frame size */
- mrx->m_len = be32toh(npe->ix_ne_len) & 0xffff;
+ mrx->m_len = be32toh(npe->ix_ne[0].len) & 0xffff;
mrx->m_pkthdr.len = mrx->m_len;
mrx->m_pkthdr.rcvif = ifp;
mrx->m_flags |= M_HASFCS;
@@ -862,8 +863,8 @@
}
} else {
m = npe->ix_m;
- npe->ix_ne_len = htobe32(m->m_len << 16);
- npe->ix_ne_next = 0;
+ npe->ix_ne[0].len = htobe32(m->m_len << 16);
+ npe->ix_ne[0].next = 0;
/* XXX? sync? */
}
bus_dmamap_sync(dma->buf_tag, dma->buf_map,
@@ -1000,27 +1001,88 @@
NPE_UNLOCK(sc);
}
+/*
+ * Defragment an mbuf chain, returning at most maxfrags separate
+ * mbufs+clusters. If this is not possible NULL is returned and
+ * the original mbuf chain is left in it's present (potentially
+ * modified) state. We use two techniques: collapsing consecutive
+ * mbufs and replacing consecutive mbufs by a cluster.
+ */
static struct mbuf *
-npe_linearize(struct mbuf *m0, int how)
+npe_defrag(struct mbuf *m0, int how, int maxfrags)
{
- struct mbuf *m, *n;
+ struct mbuf *m, *n, *n2, **prev;
+ u_int curfrags;
- if (m0->m_pkthdr.len > MHLEN)
- n = m_getcl(how, MT_DATA, M_PKTHDR);
- else
- n = m_gethdr(how, MT_DATA);
- if (n != NULL) {
- n->m_len = 0; /* NB: not initialized on alloc */
- for (m = m0; m != NULL; m = m->m_next) {
- bcopy(mtod(m, void *), mtod(n, char *) + n->m_len,
- m->m_len);
- n->m_len += m->m_len;
+ /*
+ * Calculate the current number of frags.
+ */
+ curfrags = 0;
+ for (m = m0; m != NULL; m = m->m_next)
+ curfrags++;
+ /*
+ * First, try to collapse mbufs. Note that we always collapse
+ * towards the front so we don't need to deal with moving the
+ * pkthdr. This may be suboptimal if the first mbuf has much
+ * less data than the following.
+ */
+ m = m0;
+again:
+ for (;;) {
+ n = m->m_next;
+ if (n == NULL)
+ break;
+ if ((m->m_flags & M_RDONLY) == 0 &&
+ n->m_len < M_TRAILINGSPACE(m)) {
+ bcopy(mtod(n, void *), mtod(m, char *) + m->m_len,
+ n->m_len);
+ m->m_len += n->m_len;
+ m->m_next = n->m_next;
+ m_free(n);
+ if (--curfrags <= maxfrags)
+ return m0;
+ } else
+ m = n;
+ }
+ KASSERT(maxfrags > 1,
+ ("maxfrags %u, but normal collapse failed", maxfrags));
+ /*
+ * Collapse consecutive mbufs to a cluster.
+ */
+ prev = &m0->m_next; /* NB: not the first mbuf */
+ while ((n = *prev) != NULL) {
+ if ((n2 = n->m_next) != NULL &&
+ n->m_len + n2->m_len < MCLBYTES) {
+ m = m_getcl(how, MT_DATA, 0);
+ if (m == NULL)
+ goto bad;
+ bcopy(mtod(n, void *), mtod(m, void *), n->m_len);
+ bcopy(mtod(n2, void *), mtod(m, char *) + n->m_len,
+ n2->m_len);
+ m->m_len = n->m_len + n2->m_len;
+ m->m_next = n2->m_next;
+ *prev = m;
+ m_free(n);
+ m_free(n2);
+ if (--curfrags <= maxfrags) /* +1 cl -2 mbufs */
+ return m0;
+ /*
+ * Still not there, try the normal collapse
+ * again before we allocate another cluster.
+ */
+ goto again;
}
- /* NB: this works because we never change m_final->m_data */
- m_move_pkthdr(n, m0);
+ prev = &n->m_next;
}
- m_freem(m0);
- return n;
+ /*
+ * No place where we can collapse to a cluster; punt.
+ * This can occur if, for example, you request 2 frags
+ * but the packet requires that both be clusters (we
+ * never reallocate the first mbuf to avoid moving the
+ * packet header).
+ */
+bad:
+ return NULL;
}
/*
@@ -1031,10 +1093,11 @@
{
struct npe_softc *sc = ifp->if_softc;
struct npebuf *npe;
- struct mbuf *m;
+ struct mbuf *m, *n;
struct npedma *dma = &sc->txdma;
- bus_dma_segment_t segs[1];
- int nseg, len;
+ bus_dma_segment_t segs[NPE_MAXSEG];
+ int nseg, len, error, i;
+ uint32_t next;
NPE_ASSERT_LOCKED(sc);
/* XXX can this happen? */
@@ -1049,19 +1112,27 @@
return;
}
npe = sc->tx_free;
- if (m->m_next != NULL) {
- m = npe_linearize(m, M_DONTWAIT);
- if (m == NULL)
- return;
+ error = bus_dmamap_load_mbuf_sg(dma->mtag, npe->ix_map,
+ m, segs, &nseg, 0);
+ if (error == EFBIG) {
+ n = npe_defrag(m, M_DONTWAIT, NPE_MAXSEG);
+ if (n == NULL) {
+ if_printf(ifp, "%s: too many fragments %u\n",
+ __func__, nseg);
+ m_freem(m);
+ return; /* XXX? */
+ }
+ m = n;
+ error = bus_dmamap_load_mbuf_sg(dma->mtag, npe->ix_map,
+ m, segs, &nseg, 0);
}
- if (bus_dmamap_load_mbuf_sg(dma->mtag, npe->ix_map,
- m, segs, &nseg, 0) != 0) {
+ if (error != 0 || nseg == 0) {
+ if_printf(ifp, "%s: error %u nseg %u\n",
+ __func__, error, nseg);
m_freem(m);
- continue;
+ return; /* XXX? */
}
sc->tx_free = npe->ix_next;
- if (sc->tx_free == NULL)
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
bus_dmamap_sync(dma->mtag, npe->ix_map, BUS_DMASYNC_PREWRITE);
@@ -1071,22 +1142,30 @@
BPF_MTAP(ifp, m);
npe->ix_m = m;
- npe->ix_ne_data = htobe32(segs[0].ds_addr);
- len = segs[0].ds_len;
- /* NB: this sets both frame and buffer lengths */
- npe->ix_ne_len = htobe32((len<<16) | len);
- npe->ix_ne_next = 0; /* NB: no chaining (yet) */
+ len = m->m_pkthdr.len;
+ next = npe->ix_neaddr + sizeof(npe->ix_ne[0]);
+ for (i = 0; i < nseg; i++) {
+ npe->ix_ne[i].data = htobe32(segs[i].ds_addr);
+ npe->ix_ne[i].len = htobe32((segs[i].ds_len<<16) | len);
+ npe->ix_ne[i].next = htobe32(next);
+
+ len = 0; /* zero for segments > 1 */
+ next += sizeof(npe->ix_ne[0]);
+ }
+ npe->ix_ne[i-1].next = 0; /* zero last in chain */
/* XXX flush descriptor instead of using uncached memory */
DPRINTF(sc, "%s: qwrite(%u, 0x%x) ne_data %x ne_len 0x%x\n",
__func__, sc->tx_qid, npe->ix_neaddr,
- npe->ix_ne_data, npe->ix_ne_len);
+ npe->ix_ne[0].data, npe->ix_ne[0].len);
/* stick it on the tx q */
/* XXX add vlan priority */
ixpqmgr_qwrite(sc->tx_qid, npe->ix_neaddr);
ifp->if_timer = 5;
}
+ if (sc->tx_free == NULL)
+ ifp->if_drv_flags |= IFF_DRV_OACTIVE;
}
void
==== //depot/projects/arm/src/sys/arm/xscale/ixp425/if_npereg.h#3 (text+ko) ====
@@ -67,16 +67,20 @@
* of the Intel code all the s/w area is free for us to use as we
* choose--only the npe area layout and alignment must be honored.
*/
+#define NPE_MAXSEG 3 /* empirically selected */
+
struct npebuf {
struct npebuf *ix_next; /* chain to next buffer */
void *ix_m; /* backpointer to mbuf */
uint32_t ix_neaddr; /* phys address of ix_ne */
bus_dmamap_t ix_map; /* bus dma map for associated data */
uint32_t ix_reserved[4];
- uint32_t ix_ne[8]; /* NPE shared area, cacheline aligned */
-#define ix_ne_next ix_ne[0] /* phys addr of next buffer */
-#define ix_ne_len ix_ne[1] /* buffer length (bytes) */
-#define ix_ne_data ix_ne[2] /* phys addr of data buffer */
+ struct { /* NPE shared area, cacheline aligned */
+ uint32_t next; /* phys addr of next segment */
+ uint32_t len; /* buffer/segment length (bytes) */
+ uint32_t data; /* phys addr of data segment */
+ uint32_t pad[5]; /* pad to cacheline */
+ } ix_ne[NPE_MAXSEG];
};
#define NPE_PORTS_MAX 3
More information about the p4-projects
mailing list