PERFORCE change 109763 for review
Sam Leffler
sam at FreeBSD.org
Sun Nov 12 00:19:27 UTC 2006
http://perforce.freebsd.org/chv.cgi?CH=109763
Change 109763 by sam at sam_ebb on 2006/11/12 00:17:27
Revamp descriptor/buffer handling to try and improve
performance and to get second port closer to working:
o split per-packet h/w state from s/w state so the s/w state
is in cached memory
o add array of backpointers to the softc structs for npe_txdone
to locate state given an NPE ID
o assign separate rx qid's for each NPE; this allows us to
avoid switching between softc's on each packet but means
we lose interleaving of rx packets; may want to revisit this
o correct macro that extracs the NPE ID from the qmgr entry
as returned by the NPE
o change txdone processing to empty the h/w q and link done
buffers together for each device; then post them back to
the tx_free list at the very end
o eliminate sc_portid; this is no longer important; the NPE
ID is the used so we can just reference the value from
the npeconfig structure when needed
Affected files ...
.. //depot/projects/arm/src/sys/arm/xscale/ixp425/if_npe.c#20 edit
.. //depot/projects/arm/src/sys/arm/xscale/ixp425/if_npereg.h#6 edit
Differences ...
==== //depot/projects/arm/src/sys/arm/xscale/ixp425/if_npe.c#20 (text+ko) ====
@@ -68,14 +68,23 @@
#include "miibus_if.h"
+struct npebuf {
+ struct npebuf *ix_next; /* chain to next buffer */
+ void *ix_m; /* backpointer to mbuf */
+ bus_dmamap_t ix_map; /* bus dma map for associated data */
+ struct npehwbuf *ix_hw; /* associated h/w block */
+ uint32_t ix_neaddr; /* phys address of ix_hw */
+};
+
struct npedma {
const char* name;
int nbuf; /* # npebuf's allocated */
bus_dma_tag_t mtag; /* bus dma tag for mbuf data */
- struct npebuf *buf; /* NPE buffers */
+ struct npehwbuf *hwbuf; /* NPE h/w buffers */
bus_dma_tag_t buf_tag; /* tag+map for NPE buffers */
bus_dmamap_t buf_map;
bus_addr_t buf_phys; /* phys addr of buffers */
+ struct npebuf *buf; /* s/w buffers (1-1 w/ h/w) */
};
struct npe_softc {
@@ -88,7 +97,6 @@
device_t sc_mii; /* child miibus */
bus_space_handle_t sc_miih; /* MII register window */
struct ixpnpe_softc *sc_npe; /* NPE support */
- int sc_portid; /* NPE port identification */
int sc_debug; /* DPRINTF* control */
int sc_tickinterval;
struct callout tick_ch; /* Tick callout */
@@ -112,6 +120,7 @@
*/
static const struct {
const char *desc; /* device description */
+ int npeid; /* NPE assignment */
int portid; /* NPE Ethernet port */
uint32_t imageid; /* NPE firmware image id */
uint32_t regbase;
@@ -122,8 +131,9 @@
uint8_t rx_freeqid;
uint8_t tx_qid;
uint8_t tx_doneqid;
-} npeconfig[] = {
+} npeconfig[NPE_PORTS_MAX] = {
{ .desc = "IXP NPE-B",
+ .npeid = NPE_B,
.portid = 0,
.imageid = IXP425_NPE_B_IMAGEID,
.regbase = IXP425_MAC_A_HWBASE,
@@ -136,18 +146,20 @@
.tx_doneqid = 31
},
{ .desc = "IXP NPE-C",
+ .npeid = NPE_C,
.portid = 1,
.imageid = IXP425_NPE_C_IMAGEID,
.regbase = IXP425_MAC_B_HWBASE,
.regsize = IXP425_MAC_B_SIZE,
.miibase = IXP425_MAC_A_HWBASE,
.miisize = IXP425_MAC_A_SIZE,
- .rx_qid = 4,
+ .rx_qid = 12,
.rx_freeqid = 28,
.tx_qid = 25,
.tx_doneqid = 31
},
};
+static struct npe_softc *npes[NPE_MAX]; /* NB: indexed by npeid */
static __inline uint32_t
RD4(struct npe_softc *sc, bus_size_t off)
@@ -198,9 +210,8 @@
static int npe_setloopback(struct npe_softc *, int ena);
#endif
-/* NB: all tx+rx traffic goes through one queue */
+/* NB: all tx done processing goes through one queue */
static int tx_doneqid = -1;
-static int rx_qid = -1;
SYSCTL_NODE(_hw, OID_AUTO, npe, CTLFLAG_RD, 0, "IXP425 NPE driver parameters");
@@ -231,17 +242,15 @@
static int
npe_probe(device_t dev)
{
-#define N(a) (sizeof(a)/sizeof(a[0]))
int unit = device_get_unit(dev);
- if (unit >= N(npeconfig)) {
+ if (unit >= NPE_PORTS_MAX) {
device_printf(dev, "unit %d not supported\n", unit);
return EINVAL;
}
/* XXX check feature register to see if enabled */
device_set_desc(dev, npeconfig[unit].desc);
return 0;
-#undef N
}
static int
@@ -398,6 +407,7 @@
npe_dma_setup(struct npe_softc *sc, struct npedma *dma,
const char *name, int nbuf, int maxseg)
{
+ int portid = npeconfig[device_get_unit(sc->sc_dev)].portid;
int error, i;
memset(dma, 0, sizeof(dma));
@@ -406,8 +416,9 @@
dma->nbuf = nbuf;
/* DMA tag for mapped mbufs */
- error = bus_dma_tag_create(NULL, 1, 0, BUS_SPACE_MAXADDR_32BIT,
- BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, maxseg, MCLBYTES, 0,
+ error = bus_dma_tag_create(NULL, 1, 0,
+ BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
+ MCLBYTES, maxseg, MCLBYTES, 0,
busdma_lock_mutex, &sc->sc_mtx, &dma->mtag);
if (error != 0) {
device_printf(sc->sc_dev, "unable to create %s mbuf dma tag, "
@@ -416,11 +427,11 @@
}
/* DMA tag and map for the NPE buffers */
- error = bus_dma_tag_create(NULL, sizeof(struct npebuf), 0,
+ error = bus_dma_tag_create(NULL, sizeof(uint32_t), 0,
BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
- nbuf * sizeof(struct npebuf), 1,
- nbuf * sizeof(struct npebuf), 0, busdma_lock_mutex,
- &sc->sc_mtx, &dma->buf_tag);
+ nbuf * sizeof(struct npehwbuf), 1,
+ nbuf * sizeof(struct npehwbuf), 0,
+ busdma_lock_mutex, &sc->sc_mtx, &dma->buf_tag);
if (error != 0) {
device_printf(sc->sc_dev,
"unable to create %s npebuf dma tag, error %u\n",
@@ -428,29 +439,37 @@
return error;
}
/* XXX COHERENT for now */
- if (bus_dmamem_alloc(dma->buf_tag, (void **)&dma->buf,
+ if (bus_dmamem_alloc(dma->buf_tag, (void **)&dma->hwbuf,
BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT,
&dma->buf_map) != 0) {
device_printf(sc->sc_dev,
- "unable to allocate memory for %s npebuf's, error %u\n",
+ "unable to allocate memory for %s h/w buffers, error %u\n",
dma->name, error);
return error;
}
+ /* XXX M_TEMP */
+ dma->buf = malloc(nbuf * sizeof(struct npebuf), M_TEMP, M_NOWAIT | M_ZERO);
+ if (dma->buf == NULL) {
+ device_printf(sc->sc_dev,
+ "unable to allocate memory for %s s/w buffers\n",
+ dma->name);
+ return error;
+ }
if (bus_dmamap_load(dma->buf_tag, dma->buf_map,
- dma->buf, nbuf * sizeof(struct npebuf), npe_getaddr, sc, 0) != 0) {
+ dma->hwbuf, nbuf*sizeof(struct npehwbuf), npe_getaddr, sc, 0) != 0) {
device_printf(sc->sc_dev,
- "unable to load memory for %s npebuf's, error %u\n",
+ "unable to map memory for %s h/w buffers, error %u\n",
dma->name, error);
return error;
}
dma->buf_phys = sc->buf_phys;
for (i = 0; i < dma->nbuf; i++) {
struct npebuf *npe = &dma->buf[i];
+ struct npehwbuf *hw = &dma->hwbuf[i];
/* calculate offset to shared area */
- npe->ix_neaddr = dma->buf_phys
- + (i * sizeof(struct npebuf))
- + offsetof(struct npebuf, ix_ne);
+ npe->ix_neaddr = dma->buf_phys +
+ ((uintptr_t)hw - (uintptr_t)dma->hwbuf);
KASSERT((npe->ix_neaddr & 0x1f) == 0,
("ixpbuf misaligned, PA 0x%x", npe->ix_neaddr));
error = bus_dmamap_create(dma->mtag, BUS_DMA_NOWAIT,
@@ -462,7 +481,8 @@
return error;
}
/* add port id once */
- npe->ix_neaddr |= sc->sc_portid << 3;
+ npe->ix_neaddr |= portid << 3;
+ npe->ix_hw = hw;
}
bus_dmamap_sync(dma->buf_tag, dma->buf_map, BUS_DMASYNC_PREWRITE);
return 0;
@@ -473,15 +493,17 @@
{
int i;
- if (dma->buf != NULL) {
+ if (dma->hwbuf != NULL) {
for (i = 0; i < dma->nbuf; i++) {
struct npebuf *npe = &dma->buf[i];
bus_dmamap_destroy(dma->mtag, npe->ix_map);
}
bus_dmamap_unload(dma->buf_tag, dma->buf_map);
- bus_dmamem_free(dma->buf_tag, dma->buf, dma->buf_map);
+ bus_dmamem_free(dma->buf_tag, dma->hwbuf, dma->buf_map);
bus_dmamap_destroy(dma->buf_tag, dma->buf_map);
}
+ if (dma->buf != NULL)
+ free(dma->buf, M_TEMP);
if (dma->buf_tag)
bus_dma_tag_destroy(dma->buf_tag);
if (dma->mtag)
@@ -496,12 +518,12 @@
int unit = device_get_unit(dev);
int error, i;
+
/* load NPE firmware and start it running */
error = ixpnpe_init(sc->sc_npe, "npe_fw", npeconfig[unit].imageid);
if (error != 0)
return error;
- sc->sc_portid = npeconfig[unit].portid;
if (bus_space_map(sc->sc_iot, npeconfig[unit].regbase,
npeconfig[unit].regsize, 0, &sc->sc_ioh)) {
device_printf(dev, "Cannot map registers 0x%x:0x%x\n",
@@ -532,9 +554,9 @@
return error;
/* setup statistics block */
- error = bus_dma_tag_create(NULL, 4, 0, BUS_SPACE_MAXADDR_32BIT,
- BUS_SPACE_MAXADDR, NULL, NULL, sizeof(struct npestats), 1,
- sizeof(struct npestats), 0,
+ error = bus_dma_tag_create(NULL, sizeof(uint32_t), 0,
+ BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
+ sizeof(struct npestats), 1, sizeof(struct npestats), 0,
busdma_lock_mutex, &sc->sc_mtx, &sc->sc_stats_tag);
if (error != 0) {
device_printf(sc->sc_dev, "unable to create stats tag, "
@@ -576,12 +598,9 @@
* changed at the time the q is configured.
*/
sc->rx_qid = npeconfig[unit].rx_qid;
+ ixpqmgr_qconfig(sc->rx_qid, npe_rxbuf, 0, 1,
+ IX_QMGR_Q_SOURCE_ID_NOT_E, npe_rxdone, sc);
sc->rx_freeqid = npeconfig[unit].rx_freeqid;
- if (rx_qid == -1) {
- ixpqmgr_qconfig(sc->rx_qid, npe_rxbuf, 0, 1,
- IX_QMGR_Q_SOURCE_ID_NOT_E, npe_rxdone, sc);
- rx_qid = sc->rx_qid;
- }
ixpqmgr_qconfig(sc->rx_freeqid, npe_rxbuf, 0, npe_rxbuf/2, 0, NULL, sc);
/* tell the NPE to direct all traffic to rx_qid */
#if 0
@@ -601,6 +620,10 @@
tx_doneqid = sc->tx_doneqid;
}
+ KASSERT(npes[npeconfig[unit].npeid] == NULL,
+ ("npe %u already setup", npeconfig[unit].npeid));
+ npes[npeconfig[unit].npeid] = sc;
+
return 0;
}
@@ -608,7 +631,10 @@
npe_deactivate(device_t dev)
{
struct npe_softc *sc = device_get_softc(dev);
+ int unit = device_get_unit(dev);
+ npes[npeconfig[unit].npeid] = NULL;
+
/* XXX disable q's */
if (sc->sc_npe != NULL)
ixpnpe_stop(sc->sc_npe);
@@ -756,43 +782,73 @@
eaddr[5] = RD4(sc, NPE_MAC_UNI_ADDR_6) & 0xff;
}
+struct txdone {
+ struct npebuf *head;
+ struct npebuf **tail;
+ int count;
+};
+
+static __inline void
+npe_txdone_finish(struct npe_softc *sc, const struct txdone *td)
+{
+ struct ifnet *ifp = sc->sc_ifp;
+
+ NPE_LOCK(sc);
+ *td->tail = sc->tx_free;
+ sc->tx_free = td->head;
+ /*
+ * We're no longer busy, so clear the busy flag and call the
+ * start routine to xmit more packets.
+ */
+ ifp->if_opackets += td->count;
+ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ ifp->if_timer = 0;
+ npestart_locked(ifp);
+ NPE_UNLOCK(sc);
+}
+
/*
* Q manager callback on tx done queue. Reap mbufs
* and return tx buffers to the free list. Finally
- * restart output.
- * XXX multiple NPE's
+ * restart output. Note the microcode has only one
+ * txdone q wired into it so we must use the port id
+ * returned with each npebuf to decide where to send
+ * buffers.
*/
static void
npe_txdone(int qid, void *arg)
{
-/* NB: the / handles the offset to ix_ne */
-#define P2V(a) &dma->buf[((a) - dma->buf_phys) / sizeof(struct npebuf)]
- struct npe_softc *sc = arg;
- struct ifnet *ifp = sc->sc_ifp;
- struct npedma *dma = &sc->txdma;
+#define P2V(a, dma) \
+ &(dma)->buf[((a) - (dma)->buf_phys) / sizeof(struct npehwbuf)]
+ struct npe_softc *sc0 = arg;
+ struct npe_softc *sc;
+ struct npebuf *npe;
+ struct txdone *td, q[NPE_MAX];
uint32_t entry;
- NPE_LOCK(sc);
+ /* XXX no NPE-A support */
+ q[NPE_B].tail = &q[NPE_B].head; q[NPE_B].count = 0;
+ q[NPE_C].tail = &q[NPE_C].head; q[NPE_C].count = 0;
/* XXX max # at a time? */
while (ixpqmgr_qread(qid, &entry) == 0) {
- struct npebuf *npe = P2V(NPE_QM_Q_ADDR(entry));
+ DPRINTF(sc0, "%s: entry 0x%x NPE %u port %u\n",
+ __func__, entry, NPE_QM_Q_NPE(entry), NPE_QM_Q_PORT(entry));
- DPRINTF(sc, "%s: entry 0x%x ne_addr 0x%x\n",
- __func__, entry, npe->ix_neaddr);
+ sc = npes[NPE_QM_Q_NPE(entry)];
+ npe = P2V(NPE_QM_Q_ADDR(entry), &sc->txdma);
m_freem(npe->ix_m);
npe->ix_m = NULL;
- npe->ix_next = sc->tx_free;
- sc->tx_free = npe;
- ifp->if_opackets++;
+
+ td = &q[NPE_QM_Q_NPE(entry)];
+ *td->tail = npe;
+ td->tail = &npe->ix_next;
+ td->count++;
}
- /*
- * We're no longer busy, so clear the busy flag and call the
- * start routine to xmit more packets.
- */
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
- ifp->if_timer = 0;
- npestart_locked(ifp);
- NPE_UNLOCK(sc);
+
+ if (q[NPE_B].count)
+ npe_txdone_finish(npes[NPE_B], &q[NPE_B]);
+ if (q[NPE_C].count)
+ npe_txdone_finish(npes[NPE_C], &q[NPE_C]);
#undef P2V
}
@@ -801,6 +857,7 @@
{
bus_dma_segment_t segs[1];
struct npedma *dma = &sc->rxdma;
+ struct npehwbuf *hw;
int error, nseg;
if (m == NULL) {
@@ -819,11 +876,12 @@
m_freem(m);
return error;
}
- npe->ix_ne[0].data = htobe32(segs[0].ds_addr);
+ hw = npe->ix_hw;
+ hw->ix_ne[0].data = htobe32(segs[0].ds_addr);
/* NB: NPE requires length be a multiple of 64 */
/* NB: buffer length is shifted in word */
- npe->ix_ne[0].len = htobe32(segs[0].ds_len << 16);
- npe->ix_ne[0].next = 0;
+ hw->ix_ne[0].len = htobe32(segs[0].ds_len << 16);
+ hw->ix_ne[0].next = 0;
npe->ix_m = m;
/* Flush the memory in the mbuf */
bus_dmamap_sync(dma->mtag, npe->ix_map, BUS_DMASYNC_PREREAD);
@@ -831,26 +889,25 @@
}
/*
- * Q manager callback on rx. Claim entries from the
- * hardware queue and pass the frames up the stack.
- * Pass the rx buffers to the free list.
+ * RX q processing for a specific NPE. Claim entries
+ * from the hardware queue and pass the frames up the
+ * stack. Pass the rx buffers to the free list.
*/
static void
npe_rxdone(int qid, void *arg)
{
-#define P2V(a) &dma->buf[((a) - dma->buf_phys) / sizeof(struct npebuf)]
+#define P2V(a, dma) \
+ &(dma)->buf[((a) - (dma)->buf_phys) / sizeof(struct npehwbuf)]
struct npe_softc *sc = arg;
- struct ifnet *ifp = sc->sc_ifp;
struct npedma *dma = &sc->rxdma;
uint32_t entry;
- bus_dmamap_sync(dma->buf_tag, dma->buf_map, BUS_DMASYNC_POSTREAD);
while (ixpqmgr_qread(qid, &entry) == 0) {
- struct npebuf *npe = P2V(NPE_QM_Q_ADDR(entry));
+ struct npebuf *npe = P2V(NPE_QM_Q_ADDR(entry), dma);
struct mbuf *m;
DPRINTF(sc, "%s: entry 0x%x neaddr 0x%x ne_len 0x%x\n",
- __func__, entry, npe->ix_neaddr, npe->ix_ne[0].len);
+ __func__, entry, npe->ix_neaddr, npe->ix_hw->ix_ne[0].len);
/*
* Allocate a new mbuf to replenish the rx buffer.
* If doing so fails we drop the rx'd frame so we
@@ -862,13 +919,16 @@
m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
if (m != NULL) {
struct mbuf *mrx = npe->ix_m;
+ struct npehwbuf *hw = npe->ix_hw;
+ struct ifnet *ifp = sc->sc_ifp;
/* Flush mbuf memory for rx'd data */
bus_dmamap_sync(dma->mtag, npe->ix_map,
BUS_DMASYNC_POSTREAD);
+ /* XXX flush hw buffer; works now 'cuz coherent */
/* set m_len etc. per rx frame size */
- mrx->m_len = be32toh(npe->ix_ne[0].len) & 0xffff;
+ mrx->m_len = be32toh(hw->ix_ne[0].len) & 0xffff;
mrx->m_pkthdr.len = mrx->m_len;
mrx->m_pkthdr.rcvif = ifp;
mrx->m_flags |= M_HASFCS;
@@ -1100,6 +1160,7 @@
{
struct npe_softc *sc = ifp->if_softc;
struct npebuf *npe;
+ struct npehwbuf *hw;
struct mbuf *m, *n;
struct npedma *dma = &sc->txdma;
bus_dma_segment_t segs[NPE_MAXSEG];
@@ -1149,22 +1210,23 @@
BPF_MTAP(ifp, m);
npe->ix_m = m;
+ hw = npe->ix_hw;
len = m->m_pkthdr.len;
- next = npe->ix_neaddr + sizeof(npe->ix_ne[0]);
+ next = npe->ix_neaddr + sizeof(hw->ix_ne[0]);
for (i = 0; i < nseg; i++) {
- npe->ix_ne[i].data = htobe32(segs[i].ds_addr);
- npe->ix_ne[i].len = htobe32((segs[i].ds_len<<16) | len);
- npe->ix_ne[i].next = htobe32(next);
+ hw->ix_ne[i].data = htobe32(segs[i].ds_addr);
+ hw->ix_ne[i].len = htobe32((segs[i].ds_len<<16) | len);
+ hw->ix_ne[i].next = htobe32(next);
len = 0; /* zero for segments > 1 */
- next += sizeof(npe->ix_ne[0]);
+ next += sizeof(hw->ix_ne[0]);
}
- npe->ix_ne[i-1].next = 0; /* zero last in chain */
+ hw->ix_ne[i-1].next = 0; /* zero last in chain */
/* XXX flush descriptor instead of using uncached memory */
DPRINTF(sc, "%s: qwrite(%u, 0x%x) ne_data %x ne_len 0x%x\n",
__func__, sc->tx_qid, npe->ix_neaddr,
- npe->ix_ne[0].data, npe->ix_ne[0].len);
+ hw->ix_ne[0].data, hw->ix_ne[0].len);
/* stick it on the tx q */
/* XXX add vlan priority */
ixpqmgr_qwrite(sc->tx_qid, npe->ix_neaddr);
@@ -1320,9 +1382,10 @@
static int
npe_setrxqosentry(struct npe_softc *sc, int classix, int trafclass, int qid)
{
+ int portid = npeconfig[device_get_unit(sc->sc_dev)].portid;
uint32_t msg[2];
- msg[0] = (NPE_SETRXQOSENTRY << 24) | (sc->sc_portid << 16) | classix;
+ msg[0] = (NPE_SETRXQOSENTRY << 24) | (portid << 16) | classix;
msg[1] = (trafclass << 24) | (1 << 23) | (qid << 16) | (qid << 4);
return ixpnpe_sendandrecvmsg(sc->sc_npe, msg, msg);
}
==== //depot/projects/arm/src/sys/arm/xscale/ixp425/if_npereg.h#6 (text+ko) ====
@@ -73,12 +73,7 @@
*/
#define NPE_MAXSEG 3 /* empirically selected */
-struct npebuf {
- struct npebuf *ix_next; /* chain to next buffer */
- void *ix_m; /* backpointer to mbuf */
- uint32_t ix_neaddr; /* phys address of ix_ne */
- bus_dmamap_t ix_map; /* bus dma map for associated data */
- uint32_t ix_reserved[4];
+struct npehwbuf {
struct { /* NPE shared area, cacheline aligned */
uint32_t next; /* phys addr of next segment */
uint32_t len; /* buffer/segment length (bytes) */
@@ -87,7 +82,13 @@
} ix_ne[NPE_MAXSEG];
};
-#define NPE_PORTS_MAX 3
+/* NPE ID's */
+#define NPE_A 0
+#define NPE_B 1
+#define NPE_C 2
+#define NPE_MAX (NPE_C+1)
+
+#define NPE_PORTS_MAX 2 /* logical ports */
#define NPE_FRAME_SIZE_DEFAULT 1536
#define NPE_FRAME_SIZE_MAX (65536-64)
#define NPE_FRAME_SIZE_MIN 64
@@ -98,7 +99,7 @@
* These define the layout of 32-bit Q entries passed
* between the host cpu and the NPE's.
*/
-#define NPE_QM_Q_NPE(e) (((e)>>0)&0x1) /* NPE ID */
+#define NPE_QM_Q_NPE(e) (((e)>>0)&0x3) /* NPE ID */
#define NPE_QM_Q_PORT(e) (((e)>>3)&0x1) /* Port ID */
#define NPE_QM_Q_PRIO(e) (((e)>>0)&0x3) /* 802.1d priority */
#define NPE_QM_Q_ADDR(e) ((e)&0xfffffffe0) /* phys address */
More information about the p4-projects
mailing list