svn commit: r310860 - stable/11/sys/arm/ti/cpsw

Luiz Otavio O Souza loos at FreeBSD.org
Fri Dec 30 20:43:01 UTC 2016


Author: loos
Date: Fri Dec 30 20:43:00 2016
New Revision: 310860
URL: https://svnweb.freebsd.org/changeset/base/310860

Log:
  MFC r306717:
  
  if_cpsw overhaul:
  - Fix RX and TX teardown:
    . TX teardown would not reclaim the abandoned descriptors;
    . Interrupt storms in RX teardown;
    . Fixed the acknowledge of the teardown completion interrupt.
  
  - Remove temporary lists for the descriptors;
  
  - Simplified the descriptor handling (less writes and reads from
    descriptors where possible);
  
  - Better debug;
  
  - Add support for the RX threshold interrupts:
    With interrupt moderation only, an RX overrun is likely to happen.  The
    RX threshold is set to trigger a non paced interrupt everytime your RX
    free buffers are under the minimum threshold, helping to prevent the rx
    overrun.
  
  The NIC now survive when pushed over its limits (where previously it would
  lock up in a few seconds).
  
  uFW (600MHz SoC) can now forward up to 560Mb/s of UDP traffic (netmap
  pkt-gen as source and sink).  TCP forwarding rate is over 350Mb/s.
  
  No difference (other than CPU use) was seen on Beaglebone black (1GHz SoC)
  for his fast ethernet.
  
  Tested on:	uFW, BBB
  Sponsored by:	Rubicon Communications, LLC (Netgate)

Modified:
  stable/11/sys/arm/ti/cpsw/if_cpsw.c
  stable/11/sys/arm/ti/cpsw/if_cpswreg.h
  stable/11/sys/arm/ti/cpsw/if_cpswvar.h
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/sys/arm/ti/cpsw/if_cpsw.c
==============================================================================
--- stable/11/sys/arm/ti/cpsw/if_cpsw.c	Fri Dec 30 20:40:25 2016	(r310859)
+++ stable/11/sys/arm/ti/cpsw/if_cpsw.c	Fri Dec 30 20:43:00 2016	(r310860)
@@ -336,6 +336,8 @@ cpsw_debugf(const char *fmt, ...)
 	bus_write_region_4(sc->mem_res, slot->bd_offset, (uint32_t *) val, 4)
 #define	cpsw_cpdma_write_bd_next(sc, slot, next_slot)			\
 	cpsw_write_4(sc, slot->bd_offset, cpsw_cpdma_bd_paddr(sc, next_slot))
+#define	cpsw_cpdma_write_bd_flags(sc, slot, val)			\
+	bus_write_2(sc->mem_res, slot->bd_offset + 14, val)
 #define	cpsw_cpdma_read_bd_flags(sc, slot)				\
 	bus_read_2(sc->mem_res, slot->bd_offset + 14)
 #define	cpsw_write_hdp_slot(sc, queue, slot)				\
@@ -383,7 +385,8 @@ cpsw_dump_slot(struct cpsw_softc *sc, st
 	int i;
 
 	cpsw_cpdma_read_bd(sc, slot, &bd);
-	printf("BD Addr: 0x%08x   Next: 0x%08x\n", cpsw_cpdma_bd_paddr(sc, slot), bd.next);
+	printf("BD Addr : 0x%08x   Next  : 0x%08x\n",
+	    cpsw_cpdma_bd_paddr(sc, slot), bd.next);
 	printf("  BufPtr: 0x%08x   BufLen: 0x%08x\n", bd.bufptr, bd.buflen);
 	printf("  BufOff: 0x%08x   PktLen: 0x%08x\n", bd.bufoff, bd.pktlen);
 	printf("  Flags: ");
@@ -417,7 +420,7 @@ cpsw_dump_queue(struct cpsw_softc *sc, s
 	int others = 0;
 
 	STAILQ_FOREACH(slot, q, next) {
-		if (i > 4)
+		if (i > CPSW_TXFRAGS)
 			++others;
 		else
 			cpsw_dump_slot(sc, slot);
@@ -581,16 +584,14 @@ cpsw_init(struct cpsw_softc *sc)
 	/* Enable statistics for ports 0, 1 and 2 */
 	cpsw_write_4(sc, CPSW_SS_STAT_PORT_EN, 7);
 
-	/* Experiment:  Turn off flow control */
-	/* This seems to fix the watchdog resets that have plagued
-	   earlier versions of this driver; I'm not yet sure if there
-	   are negative effects yet. */
+	/* Turn off flow control. */
 	cpsw_write_4(sc, CPSW_SS_FLOW_CONTROL, 0);
 
 	/* Make IP hdr aligned with 4 */
 	cpsw_write_4(sc, CPSW_CPDMA_RX_BUFFER_OFFSET, 2);
 
 	/* Initialize RX Buffer Descriptors */
+	cpsw_write_4(sc, CPSW_CPDMA_RX_PENDTHRESH(0), 0);
 	cpsw_write_4(sc, CPSW_CPDMA_RX_FREEBUFFER(0), 0);
 
 	/* Enable TX & RX DMA */
@@ -607,7 +608,8 @@ cpsw_init(struct cpsw_softc *sc)
 	cpsw_write_4(sc, CPSW_CPDMA_DMA_INTMASK_SET, 3);
 
 	/* Enable interrupts for RX and TX on Channel 0 */
-	cpsw_write_4(sc, CPSW_CPDMA_RX_INTMASK_SET, 1);
+	cpsw_write_4(sc, CPSW_CPDMA_RX_INTMASK_SET,
+	    CPSW_CPDMA_RX_INT(0) | CPSW_CPDMA_RX_INT_THRESH(0));
 	cpsw_write_4(sc, CPSW_CPDMA_TX_INTMASK_SET, 1);
 
 	/* Initialze MDIO - ENABLE, PREAMBLE=0, FAULTENB, CLKDIV=0xFF */
@@ -625,6 +627,8 @@ cpsw_init(struct cpsw_softc *sc)
 	if (slot != NULL)
 		cpsw_write_hdp_slot(sc, &sc->rx, slot);
 	cpsw_rx_enqueue(sc);
+	cpsw_write_4(sc, CPSW_CPDMA_RX_FREEBUFFER(0), sc->rx.active_queue_len);
+	cpsw_write_4(sc, CPSW_CPDMA_RX_PENDTHRESH(0), CPSW_TXFRAGS);
 
 	/* Activate network interface. */
 	sc->rx.running = 1;
@@ -1154,58 +1158,52 @@ cpsw_shutdown(device_t dev)
 }
 
 static void
-cpsw_rx_teardown_locked(struct cpsw_softc *sc)
+cpsw_rx_teardown(struct cpsw_softc *sc)
 {
-	struct ifnet *ifp;
-	struct mbuf *received, *next;
 	int i = 0;
 
+	CPSW_RX_LOCK(sc);
 	CPSW_DEBUGF(sc, ("starting RX teardown"));
+	sc->rx.teardown = 1;
 	cpsw_write_4(sc, CPSW_CPDMA_RX_TEARDOWN, 0);
-	for (;;) {
-		received = cpsw_rx_dequeue(sc);
-		CPSW_RX_UNLOCK(sc);
-		while (received != NULL) {
-			next = received->m_nextpkt;
-			received->m_nextpkt = NULL;
-			ifp = received->m_pkthdr.rcvif;
-			(*ifp->if_input)(ifp, received);
-			if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
-			received = next;
-		}
-		CPSW_RX_LOCK(sc);
-		if (!sc->rx.running) {
-			CPSW_DEBUGF(sc,
-			    ("finished RX teardown (%d retries)", i));
-			return;
-		}
+	CPSW_RX_UNLOCK(sc);
+	while (sc->rx.running) {
 		if (++i > 10) {
 			device_printf(sc->dev,
 			    "Unable to cleanly shutdown receiver\n");
 			return;
 		}
-		DELAY(10);
+		DELAY(200);
 	}
+	if (!sc->rx.running)
+		CPSW_DEBUGF(sc, ("finished RX teardown (%d retries)", i));
 }
 
 static void
-cpsw_tx_teardown_locked(struct cpsw_softc *sc)
+cpsw_tx_teardown(struct cpsw_softc *sc)
 {
 	int i = 0;
 
+	CPSW_TX_LOCK(sc);
 	CPSW_DEBUGF(sc, ("starting TX teardown"));
-	cpsw_write_4(sc, CPSW_CPDMA_TX_TEARDOWN, 0);
+	/* Start the TX queue teardown if queue is not empty. */
+	if (STAILQ_FIRST(&sc->tx.active) != NULL)
+		cpsw_write_4(sc, CPSW_CPDMA_TX_TEARDOWN, 0);
+	else
+		sc->tx.teardown = 1;
 	cpsw_tx_dequeue(sc);
 	while (sc->tx.running && ++i < 10) {
-		DELAY(10);
+		DELAY(200);
 		cpsw_tx_dequeue(sc);
 	}
 	if (sc->tx.running) {
 		device_printf(sc->dev,
 		    "Unable to cleanly shutdown transmitter\n");
 	}
-	CPSW_DEBUGF(sc, ("finished TX teardown (%d retries, %d idle buffers)",
-	    i, sc->tx.active_queue_len));
+	CPSW_DEBUGF(sc,
+	    ("finished TX teardown (%d retries, %d idle buffers)", i,
+	     sc->tx.active_queue_len));
+	CPSW_TX_UNLOCK(sc);
 }
 
 static void
@@ -1230,12 +1228,8 @@ cpswp_stop_locked(struct cpswp_softc *sc
 
 	/* Tear down the RX/TX queues. */
 	if (cpsw_ports_down(sc->swsc)) {
-		CPSW_RX_LOCK(sc->swsc);
-		cpsw_rx_teardown_locked(sc->swsc);
-		CPSW_RX_UNLOCK(sc->swsc);
-		CPSW_TX_LOCK(sc->swsc);
-		cpsw_tx_teardown_locked(sc->swsc);
-		CPSW_TX_UNLOCK(sc->swsc);
+		cpsw_rx_teardown(sc->swsc);
+		cpsw_tx_teardown(sc->swsc);
 	}
 
 	/* Stop MAC RX/TX modules. */
@@ -1350,12 +1344,11 @@ cpswp_ioctl(struct ifnet *ifp, u_long co
 					    ifp->if_flags & IFF_ALLMULTI);
 			} else {
 				CPSW_DEBUGF(sc->swsc,
-				    ("SIOCSIFFLAGS: UP but not RUNNING; starting up"));
+				    ("SIOCSIFFLAGS: starting up"));
 				cpswp_init_locked(sc);
 			}
 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
-			CPSW_DEBUGF(sc->swsc,
-			    ("SIOCSIFFLAGS: not UP but RUNNING; shutting down"));
+			CPSW_DEBUGF(sc->swsc, ("SIOCSIFFLAGS: shutting down"));
 			cpswp_stop_locked(sc);
 		}
 
@@ -1498,16 +1491,22 @@ cpswp_miibus_statchg(device_t dev)
 static void
 cpsw_intr_rx(void *arg)
 {
-	struct cpsw_softc *sc = arg;
+	struct cpsw_softc *sc;
 	struct ifnet *ifp;
 	struct mbuf *received, *next;
 
+	sc = (struct cpsw_softc *)arg;
 	CPSW_RX_LOCK(sc);
+	if (sc->rx.teardown) {
+		sc->rx.running = 0;
+		sc->rx.teardown = 0;
+		cpsw_write_cp(sc, &sc->rx, 0xfffffffc);
+	}
 	received = cpsw_rx_dequeue(sc);
 	cpsw_rx_enqueue(sc);
 	cpsw_write_4(sc, CPSW_CPDMA_CPDMA_EOI_VECTOR, 1);
 	CPSW_RX_UNLOCK(sc);
-	
+
 	while (received != NULL) {
 		next = received->m_nextpkt;
 		received->m_nextpkt = NULL;
@@ -1522,20 +1521,28 @@ static struct mbuf *
 cpsw_rx_dequeue(struct cpsw_softc *sc)
 {
 	struct cpsw_cpdma_bd bd;
-	struct cpsw_slot *slot;
+	struct cpsw_slot *last, *slot;
 	struct cpswp_softc *psc;
 	struct mbuf *mb_head, *mb_tail;
 	int port, removed = 0;
 
+	last = NULL;
 	mb_head = mb_tail = NULL;
 
 	/* Pull completed packets off hardware RX queue. */
 	while ((slot = STAILQ_FIRST(&sc->rx.active)) != NULL) {
 		cpsw_cpdma_read_bd(sc, slot, &bd);
-		if (bd.flags & CPDMA_BD_OWNER)
-			break; /* Still in use by hardware */
 
-		CPSW_DEBUGF(sc, ("Removing received packet from RX queue"));
+		/*
+		 * Stop on packets still in use by hardware, but do not stop
+		 * on packets with the teardown complete flag, they will be
+		 * discarded later.
+		 */
+		if ((bd.flags & (CPDMA_BD_OWNER | CPDMA_BD_TDOWNCMPLT)) ==
+		    CPDMA_BD_OWNER)
+			break;
+
+		last = slot;
 		++removed;
 		STAILQ_REMOVE_HEAD(&sc->rx.active, next);
 		STAILQ_INSERT_TAIL(&sc->rx.avail, slot, next);
@@ -1544,16 +1551,14 @@ cpsw_rx_dequeue(struct cpsw_softc *sc)
 		bus_dmamap_unload(sc->mbuf_dtag, slot->dmamap);
 
 		if (bd.flags & CPDMA_BD_TDOWNCMPLT) {
-			CPSW_DEBUGF(sc, ("RX teardown in progress"));
+			CPSW_DEBUGF(sc, ("RX teardown is complete"));
 			m_freem(slot->mbuf);
 			slot->mbuf = NULL;
-			cpsw_write_cp(sc, &sc->rx, 0xfffffffc);
 			sc->rx.running = 0;
+			sc->rx.teardown = 0;
 			break;
 		}
 
-		cpsw_write_cp_slot(sc, &sc->rx, slot);
-
 		port = (bd.flags & CPDMA_BD_PORT_MASK) - 1;
 		KASSERT(port >= 0 && port <= 1,
 		    ("patcket received with invalid port: %d", port));
@@ -1586,15 +1591,20 @@ cpsw_rx_dequeue(struct cpsw_softc *sc)
 		}
 		mb_tail = slot->mbuf;
 		slot->mbuf = NULL;
+		if (sc->rx_batch > 0 && sc->rx_batch == removed)
+			break;
 	}
 
 	if (removed != 0) {
+		cpsw_write_cp_slot(sc, &sc->rx, last);
 		sc->rx.queue_removes += removed;
-		sc->rx.active_queue_len -= removed;
 		sc->rx.avail_queue_len += removed;
+		sc->rx.active_queue_len -= removed;
 		if (sc->rx.avail_queue_len > sc->rx.max_avail_queue_len)
 			sc->rx.max_avail_queue_len = sc->rx.avail_queue_len;
+		CPSW_DEBUGF(sc, ("Removed %d received packet(s) from RX queue", removed));
 	}
+
 	return (mb_head);
 }
 
@@ -1603,13 +1613,16 @@ cpsw_rx_enqueue(struct cpsw_softc *sc)
 {
 	bus_dma_segment_t seg[1];
 	struct cpsw_cpdma_bd bd;
-	struct cpsw_slots tmpqueue = STAILQ_HEAD_INITIALIZER(tmpqueue);
-	struct cpsw_slot *slot, *prev_slot = NULL;
-	struct cpsw_slot *last_old_slot, *first_new_slot;
+	struct cpsw_slot *first_new_slot, *last_old_slot, *next, *slot;
 	int error, nsegs, added = 0;
+	uint32_t flags;
 
 	/* Register new mbufs with hardware. */
+	first_new_slot = NULL;
+	last_old_slot = STAILQ_LAST(&sc->rx.active, cpsw_slot, next);
 	while ((slot = STAILQ_FIRST(&sc->rx.avail)) != NULL) {
+		if (first_new_slot == NULL)
+			first_new_slot = slot;
 		if (slot->mbuf == NULL) {
 			slot->mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 			if (slot->mbuf == NULL) {
@@ -1639,8 +1652,11 @@ cpsw_rx_enqueue(struct cpsw_softc *sc)
 
 		bus_dmamap_sync(sc->mbuf_dtag, slot->dmamap, BUS_DMASYNC_PREREAD);
 
-		/* Create and submit new rx descriptor*/
-		bd.next = 0;
+		/* Create and submit new rx descriptor. */
+		if ((next = STAILQ_NEXT(slot, next)) != NULL)
+			bd.next = cpsw_cpdma_bd_paddr(sc, next);
+		else
+			bd.next = 0;
 		bd.bufptr = seg->ds_addr;
 		bd.bufoff = 0;
 		bd.buflen = MCLBYTES - 1;
@@ -1649,38 +1665,35 @@ cpsw_rx_enqueue(struct cpsw_softc *sc)
 		cpsw_cpdma_write_bd(sc, slot, &bd);
 		++added;
 
-		if (prev_slot != NULL)
-			cpsw_cpdma_write_bd_next(sc, prev_slot, slot);
-		prev_slot = slot;
 		STAILQ_REMOVE_HEAD(&sc->rx.avail, next);
-		sc->rx.avail_queue_len--;
-		STAILQ_INSERT_TAIL(&tmpqueue, slot, next);
+		STAILQ_INSERT_TAIL(&sc->rx.active, slot, next);
 	}
 
-	if (added == 0)
+	if (added == 0 || first_new_slot == NULL)
 		return;
 
 	CPSW_DEBUGF(sc, ("Adding %d buffers to RX queue", added));
 
 	/* Link new entries to hardware RX queue. */
-	last_old_slot = STAILQ_LAST(&sc->rx.active, cpsw_slot, next);
-	first_new_slot = STAILQ_FIRST(&tmpqueue);
-	STAILQ_CONCAT(&sc->rx.active, &tmpqueue);
-	if (first_new_slot == NULL) {
-		return;
-	} else if (last_old_slot == NULL) {
+	if (last_old_slot == NULL) {
 		/* Start a fresh queue. */
 		cpsw_write_hdp_slot(sc, &sc->rx, first_new_slot);
 	} else {
 		/* Add buffers to end of current queue. */
 		cpsw_cpdma_write_bd_next(sc, last_old_slot, first_new_slot);
 		/* If underrun, restart queue. */
-		if (cpsw_cpdma_read_bd_flags(sc, last_old_slot) & CPDMA_BD_EOQ) {
+		if ((flags = cpsw_cpdma_read_bd_flags(sc, last_old_slot)) &
+		    CPDMA_BD_EOQ) {
+			flags &= ~CPDMA_BD_EOQ;
+			cpsw_cpdma_write_bd_flags(sc, last_old_slot, flags);
 			cpsw_write_hdp_slot(sc, &sc->rx, first_new_slot);
+			sc->rx.queue_restart++;
 		}
 	}
 	sc->rx.queue_adds += added;
+	sc->rx.avail_queue_len -= added;
 	sc->rx.active_queue_len += added;
+	cpsw_write_4(sc, CPSW_CPDMA_RX_FREEBUFFER(0), sc->rx.active_queue_len);
 	if (sc->rx.active_queue_len > sc->rx.max_active_queue_len) {
 		sc->rx.max_active_queue_len = sc->rx.active_queue_len;
 	}
@@ -1689,13 +1702,16 @@ cpsw_rx_enqueue(struct cpsw_softc *sc)
 static void
 cpswp_start(struct ifnet *ifp)
 {
-	struct cpswp_softc *sc = ifp->if_softc;
+	struct cpswp_softc *sc;
 
-	CPSW_TX_LOCK(sc->swsc);
-	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && sc->swsc->tx.running) {
-		cpswp_tx_enqueue(sc);
-		cpsw_tx_dequeue(sc->swsc);
+	sc = ifp->if_softc;
+	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
+	    sc->swsc->tx.running == 0) {
+		return;
 	}
+	CPSW_TX_LOCK(sc->swsc);
+	cpswp_tx_enqueue(sc);
+	cpsw_tx_dequeue(sc->swsc);
 	CPSW_TX_UNLOCK(sc->swsc);
 }
 
@@ -1706,6 +1722,8 @@ cpsw_intr_tx(void *arg)
 
 	sc = (struct cpsw_softc *)arg;
 	CPSW_TX_LOCK(sc);
+	if (cpsw_read_4(sc, CPSW_CPDMA_TX_CP(0)) == 0xfffffffc)
+		cpsw_write_cp(sc, &sc->tx, 0xfffffffc);
 	cpsw_tx_dequeue(sc);
 	cpsw_write_4(sc, CPSW_CPDMA_CPDMA_EOI_VECTOR, 2);
 	CPSW_TX_UNLOCK(sc);
@@ -1716,9 +1734,7 @@ cpswp_tx_enqueue(struct cpswp_softc *sc)
 {
 	bus_dma_segment_t segs[CPSW_TXFRAGS];
 	struct cpsw_cpdma_bd bd;
-	struct cpsw_slots tmpqueue = STAILQ_HEAD_INITIALIZER(tmpqueue);
-	struct cpsw_slot *slot, *prev_slot = NULL;
-	struct cpsw_slot *last_old_slot, *first_new_slot;
+	struct cpsw_slot *first_new_slot, *last, *last_old_slot, *next, *slot;
 	struct mbuf *m0;
 	int error, flags, nsegs, seg, added = 0, padlen;
 
@@ -1728,6 +1744,9 @@ cpswp_tx_enqueue(struct cpswp_softc *sc)
 		    ((sc->unit + 1) & CPDMA_BD_PORT_MASK);
 	}
 	/* Pull pending packets from IF queue and prep them for DMA. */
+	last = NULL;
+	first_new_slot = NULL;
+	last_old_slot = STAILQ_LAST(&sc->swsc->tx.active, cpsw_slot, next);
 	while ((slot = STAILQ_FIRST(&sc->swsc->tx.avail)) != NULL) {
 		IF_DEQUEUE(&sc->ifp->if_snd, m0);
 		if (m0 == NULL)
@@ -1779,12 +1798,24 @@ cpswp_tx_enqueue(struct cpswp_softc *sc)
 		    ("Queueing TX packet: %d segments + %d pad bytes",
 		    nsegs, padlen));
 
+		if (first_new_slot == NULL)
+			first_new_slot = slot;
+
+		/* Link from the previous descriptor. */
+		if (last != NULL)
+			cpsw_cpdma_write_bd_next(sc->swsc, last, slot);
+
 		slot->ifp = sc->ifp;
+
 		/* If there is only one segment, the for() loop
 		 * gets skipped and the single buffer gets set up
 		 * as both SOP and EOP. */
-		/* Start by setting up the first buffer */
-		bd.next = 0;
+		if (nsegs > 1) {
+			next = STAILQ_NEXT(slot, next);
+			bd.next = cpsw_cpdma_bd_paddr(sc->swsc, next);
+		} else
+			bd.next = 0;
+		/* Start by setting up the first buffer. */
 		bd.bufptr = segs[0].ds_addr;
 		bd.bufoff = 0;
 		bd.buflen = segs[0].ds_len;
@@ -1793,19 +1824,16 @@ cpswp_tx_enqueue(struct cpswp_softc *sc)
 		for (seg = 1; seg < nsegs; ++seg) {
 			/* Save the previous buffer (which isn't EOP) */
 			cpsw_cpdma_write_bd(sc->swsc, slot, &bd);
-			if (prev_slot != NULL) {
-				cpsw_cpdma_write_bd_next(sc->swsc, prev_slot,
-				    slot);
-			}
-			prev_slot = slot;
 			STAILQ_REMOVE_HEAD(&sc->swsc->tx.avail, next);
-			sc->swsc->tx.avail_queue_len--;
-			STAILQ_INSERT_TAIL(&tmpqueue, slot, next);
-			++added;
+			STAILQ_INSERT_TAIL(&sc->swsc->tx.active, slot, next);
 			slot = STAILQ_FIRST(&sc->swsc->tx.avail);
 
 			/* Setup next buffer (which isn't SOP) */
-			bd.next = 0;
+			if (nsegs > seg + 1) {
+				next = STAILQ_NEXT(slot, next);
+				bd.next = cpsw_cpdma_bd_paddr(sc->swsc, next);
+			} else
+				bd.next = 0;
 			bd.bufptr = segs[seg].ds_addr;
 			bd.bufoff = 0;
 			bd.buflen = segs[seg].ds_len;
@@ -1815,25 +1843,18 @@ cpswp_tx_enqueue(struct cpswp_softc *sc)
 		/* Save the final buffer. */
 		if (padlen <= 0)
 			bd.flags |= CPDMA_BD_EOP;
+		else {
+			next = STAILQ_NEXT(slot, next);
+			bd.next = cpsw_cpdma_bd_paddr(sc->swsc, next);
+		}
 		cpsw_cpdma_write_bd(sc->swsc, slot, &bd);
-		if (prev_slot != NULL)
-			cpsw_cpdma_write_bd_next(sc->swsc, prev_slot, slot);
-		prev_slot = slot;
 		STAILQ_REMOVE_HEAD(&sc->swsc->tx.avail, next);
-		sc->swsc->tx.avail_queue_len--;
-		STAILQ_INSERT_TAIL(&tmpqueue, slot, next);
-		++added;
+		STAILQ_INSERT_TAIL(&sc->swsc->tx.active, slot, next);
 
 		if (padlen > 0) {
 			slot = STAILQ_FIRST(&sc->swsc->tx.avail);
-			STAILQ_REMOVE_HEAD(&sc->swsc->tx.avail, next);
-			sc->swsc->tx.avail_queue_len--;
-			STAILQ_INSERT_TAIL(&tmpqueue, slot, next);
-			++added;
-
-			/* Setup buffer of null pad bytes (definitely EOP) */
-			cpsw_cpdma_write_bd_next(sc->swsc, prev_slot, slot);
-			prev_slot = slot;
+
+			/* Setup buffer of null pad bytes (definitely EOP). */
 			bd.next = 0;
 			bd.bufptr = sc->swsc->null_mbuf_paddr;
 			bd.bufoff = 0;
@@ -1842,8 +1863,14 @@ cpswp_tx_enqueue(struct cpswp_softc *sc)
 			bd.flags = CPDMA_BD_EOP | CPDMA_BD_OWNER | flags;
 			cpsw_cpdma_write_bd(sc->swsc, slot, &bd);
 			++nsegs;
+
+			STAILQ_REMOVE_HEAD(&sc->swsc->tx.avail, next);
+			STAILQ_INSERT_TAIL(&sc->swsc->tx.active, slot, next);
 		}
 
+		last = slot;
+
+		added += nsegs;
 		if (nsegs > sc->swsc->tx.longest_chain)
 			sc->swsc->tx.longest_chain = nsegs;
 
@@ -1852,33 +1879,27 @@ cpswp_tx_enqueue(struct cpswp_softc *sc)
 		BPF_MTAP(sc->ifp, m0);
 	}
 
-	/* Attach the list of new buffers to the hardware TX queue. */
-	last_old_slot = STAILQ_LAST(&sc->swsc->tx.active, cpsw_slot, next);
-	first_new_slot = STAILQ_FIRST(&tmpqueue);
-	STAILQ_CONCAT(&sc->swsc->tx.active, &tmpqueue);
-	if (first_new_slot == NULL) {
+	if (first_new_slot == NULL)
 		return;
-	} else if (last_old_slot == NULL) {
-		/* Start a fresh queue. */
-		sc->swsc->last_hdp = cpsw_cpdma_bd_paddr(sc->swsc, first_new_slot);
-		cpsw_write_hdp_slot(sc->swsc, &sc->swsc->tx, first_new_slot);
-	} else {
+
+	/* Attach the list of new buffers to the hardware TX queue. */
+	if (last_old_slot != NULL &&
+	    (cpsw_cpdma_read_bd_flags(sc->swsc, last_old_slot) &
+	     CPDMA_BD_EOQ) == 0) {
 		/* Add buffers to end of current queue. */
 		cpsw_cpdma_write_bd_next(sc->swsc, last_old_slot,
 		    first_new_slot);
-		/* If underrun, restart queue. */
-		if (cpsw_cpdma_read_bd_flags(sc->swsc, last_old_slot) &
-		    CPDMA_BD_EOQ) {
-			sc->swsc->last_hdp = cpsw_cpdma_bd_paddr(sc->swsc, first_new_slot);
-			cpsw_write_hdp_slot(sc->swsc, &sc->swsc->tx,
-			    first_new_slot);
-		}
+	} else {
+		/* Start a fresh queue. */
+		cpsw_write_hdp_slot(sc->swsc, &sc->swsc->tx, first_new_slot);
 	}
 	sc->swsc->tx.queue_adds += added;
+	sc->swsc->tx.avail_queue_len -= added;
 	sc->swsc->tx.active_queue_len += added;
 	if (sc->swsc->tx.active_queue_len > sc->swsc->tx.max_active_queue_len) {
 		sc->swsc->tx.max_active_queue_len = sc->swsc->tx.active_queue_len;
 	}
+	CPSW_DEBUGF(sc->swsc, ("Queued %d TX packet(s)", added));
 }
 
 static int
@@ -1888,27 +1909,31 @@ cpsw_tx_dequeue(struct cpsw_softc *sc)
 	struct cpsw_cpdma_bd bd;
 	uint32_t flags, removed = 0;
 
-	slot = STAILQ_FIRST(&sc->tx.active);
-	if (slot == NULL && cpsw_read_cp(sc, &sc->tx) == 0xfffffffc) {
-		CPSW_DEBUGF(sc, ("TX teardown of an empty queue"));
-		cpsw_write_cp(sc, &sc->tx, 0xfffffffc);
-		sc->tx.running = 0;
-		return (0);
-	}
-
 	/* Pull completed buffers off the hardware TX queue. */
+	slot = STAILQ_FIRST(&sc->tx.active);
 	while (slot != NULL) {
 		flags = cpsw_cpdma_read_bd_flags(sc, slot);
-		if (flags & CPDMA_BD_OWNER)
+
+		/* TearDown complete is only marked on the SOP for the packet. */
+		if ((flags & (CPDMA_BD_SOP | CPDMA_BD_TDOWNCMPLT)) ==
+		    (CPDMA_BD_SOP | CPDMA_BD_TDOWNCMPLT)) {
+			sc->tx.teardown = 1;
+		}
+
+		if ((flags & CPDMA_BD_OWNER) != 0 && sc->tx.teardown == 0)
 			break; /* Hardware is still using this packet. */
 
-		CPSW_DEBUGF(sc, ("TX removing completed packet"));
 		bus_dmamap_sync(sc->mbuf_dtag, slot->dmamap, BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(sc->mbuf_dtag, slot->dmamap);
 		m_freem(slot->mbuf);
 		slot->mbuf = NULL;
-		if (slot->ifp)
-			if_inc_counter(slot->ifp, IFCOUNTER_OPACKETS, 1);
+
+		if (slot->ifp) {
+			if (sc->tx.teardown == 0)
+				if_inc_counter(slot->ifp, IFCOUNTER_OPACKETS, 1);
+			else
+				if_inc_counter(slot->ifp, IFCOUNTER_OQDROPS, 1);
+		}
 
 		/* Dequeue any additional buffers used by this packet. */
 		while (slot != NULL && slot->mbuf == NULL) {
@@ -1919,37 +1944,34 @@ cpsw_tx_dequeue(struct cpsw_softc *sc)
 			slot = STAILQ_FIRST(&sc->tx.active);
 		}
 
-		/* TearDown complete is only marked on the SOP for the packet. */
-		if ((flags & (CPDMA_BD_SOP | CPDMA_BD_TDOWNCMPLT)) ==
-		    (CPDMA_BD_SOP | CPDMA_BD_TDOWNCMPLT)) {
-			CPSW_DEBUGF(sc, ("TX teardown in progress"));
-			cpsw_write_cp(sc, &sc->tx, 0xfffffffc);
-			// TODO: Increment a count of dropped TX packets
-			sc->tx.running = 0;
-			break;
-		}
+		cpsw_write_cp_slot(sc, &sc->tx, last_removed_slot);
 
-		if ((flags & CPDMA_BD_EOP) == 0)
-			flags = cpsw_cpdma_read_bd_flags(sc, last_removed_slot);
-		if ((flags & (CPDMA_BD_EOP | CPDMA_BD_EOQ)) ==
+		/* Restart the TX queue if necessary. */
+		cpsw_cpdma_read_bd(sc, last_removed_slot, &bd);
+		if (slot != NULL && bd.next != 0 && (bd.flags &
+		    (CPDMA_BD_EOP | CPDMA_BD_OWNER | CPDMA_BD_EOQ)) ==
 		    (CPDMA_BD_EOP | CPDMA_BD_EOQ)) {
-			cpsw_cpdma_read_bd(sc, last_removed_slot, &bd);
-			if (bd.next != 0 && bd.next != sc->last_hdp) {
-				/* Restart the queue. */
-				sc->last_hdp = bd.next;
-				cpsw_write_4(sc, sc->tx.hdp_offset, bd.next);
-			}
+			cpsw_write_hdp_slot(sc, &sc->tx, slot);
+			sc->tx.queue_restart++;
+			break;
 		}
 	}
 
 	if (removed != 0) {
-		cpsw_write_cp_slot(sc, &sc->tx, last_removed_slot);
 		sc->tx.queue_removes += removed;
 		sc->tx.active_queue_len -= removed;
 		sc->tx.avail_queue_len += removed;
 		if (sc->tx.avail_queue_len > sc->tx.max_avail_queue_len)
 			sc->tx.max_avail_queue_len = sc->tx.avail_queue_len;
+		CPSW_DEBUGF(sc, ("TX removed %d completed packet(s)", removed));
+	}
+
+	if (sc->tx.teardown && STAILQ_EMPTY(&sc->tx.active)) {
+		CPSW_DEBUGF(sc, ("TX teardown is complete"));
+		sc->tx.teardown = 0;
+		sc->tx.running = 0;
 	}
+
 	return (removed);
 }
 
@@ -1962,11 +1984,25 @@ cpsw_tx_dequeue(struct cpsw_softc *sc)
 static void
 cpsw_intr_rx_thresh(void *arg)
 {
-	struct cpsw_softc *sc = arg;
-	uint32_t stat = cpsw_read_4(sc, CPSW_WR_C_RX_THRESH_STAT(0));
+	struct cpsw_softc *sc;
+	struct ifnet *ifp;
+	struct mbuf *received, *next;
 
-	CPSW_DEBUGF(sc, ("stat=%x", stat));
+	sc = (struct cpsw_softc *)arg;
+	CPSW_RX_LOCK(sc);
+	received = cpsw_rx_dequeue(sc);
+	cpsw_rx_enqueue(sc);
 	cpsw_write_4(sc, CPSW_CPDMA_CPDMA_EOI_VECTOR, 0);
+	CPSW_RX_UNLOCK(sc);
+
+	while (received != NULL) {
+		next = received->m_nextpkt;
+		received->m_nextpkt = NULL;
+		ifp = received->m_pkthdr.rcvif;
+		(*ifp->if_input)(ifp, received);
+		if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+		received = next;
+	}
 }
 
 static void
@@ -2138,6 +2174,11 @@ cpsw_tx_watchdog_full_reset(struct cpsw_
 
 	cpsw_debugf_head("CPSW watchdog");
 	device_printf(sc->dev, "watchdog timeout\n");
+	printf("CPSW_CPDMA_TX%d_HDP=0x%x\n", 0,
+	    cpsw_read_4(sc, CPSW_CPDMA_TX_HDP(0)));
+	printf("CPSW_CPDMA_TX%d_CP=0x%x\n", 0,
+	    cpsw_read_4(sc, CPSW_CPDMA_TX_CP(0)));
+	cpsw_dump_queue(sc, &sc->tx.active);
 	for (i = 0; i < CPSW_PORTS; i++) {
 		if (!sc->dualemac && i != sc->active_slave)
 			continue;
@@ -2569,6 +2610,9 @@ cpsw_add_queue_sysctls(struct sysctl_ctx
 	SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "totalDequeued",
 	    CTLFLAG_RD, &queue->queue_removes, 0,
 	    "Total buffers removed from queue");
+	SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "queueRestart",
+	    CTLFLAG_RD, &queue->queue_restart, 0,
+	    "Total times the queue has been restarted");
 	SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "longestChain",
 	    CTLFLAG_RD, &queue->longest_chain, 0,
 	    "Max buffers used for a single packet");
@@ -2602,6 +2646,9 @@ cpsw_add_sysctls(struct cpsw_softc *sc)
 	SYSCTL_ADD_INT(ctx, parent, OID_AUTO, "debug",
 	    CTLFLAG_RW, &sc->debug, 0, "Enable switch debug messages");
 
+	SYSCTL_ADD_INT(ctx, parent, OID_AUTO, "rx_batch",
+	    CTLFLAG_RW, &sc->rx_batch, 0, "Set the rx batch size");
+
 	SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "attachedSecs",
 	    CTLTYPE_UINT | CTLFLAG_RD, sc, 0, cpsw_stat_attached, "IU",
 	    "Time since driver attach");

Modified: stable/11/sys/arm/ti/cpsw/if_cpswreg.h
==============================================================================
--- stable/11/sys/arm/ti/cpsw/if_cpswreg.h	Fri Dec 30 20:40:25 2016	(r310859)
+++ stable/11/sys/arm/ti/cpsw/if_cpswreg.h	Fri Dec 30 20:43:00 2016	(r310860)
@@ -64,10 +64,13 @@
 #define	CPSW_CPDMA_RX_INTSTAT_MASKED	(CPSW_CPDMA_OFFSET + 0xA4)
 #define	CPSW_CPDMA_RX_INTMASK_SET	(CPSW_CPDMA_OFFSET + 0xA8)
 #define	CPSW_CPDMA_RX_INTMASK_CLEAR	(CPSW_CPDMA_OFFSET + 0xAc)
+#define	 CPSW_CPDMA_RX_INT_THRESH(_ch)	(1 << (8 + ((_ch) & 7)))
+#define	 CPSW_CPDMA_RX_INT(_ch)		(1 << (0 + ((_ch) & 7)))
 #define	CPSW_CPDMA_DMA_INTSTAT_RAW	(CPSW_CPDMA_OFFSET + 0xB0)
 #define	CPSW_CPDMA_DMA_INTSTAT_MASKED	(CPSW_CPDMA_OFFSET + 0xB4)
 #define	CPSW_CPDMA_DMA_INTMASK_SET	(CPSW_CPDMA_OFFSET + 0xB8)
 #define	CPSW_CPDMA_DMA_INTMASK_CLEAR	(CPSW_CPDMA_OFFSET + 0xBC)
+#define	CPSW_CPDMA_RX_PENDTHRESH(p)	(CPSW_CPDMA_OFFSET + 0x0c0 + ((p) * 0x04))
 #define	CPSW_CPDMA_RX_FREEBUFFER(p)	(CPSW_CPDMA_OFFSET + 0x0e0 + ((p) * 0x04))
 
 #define	CPSW_STATS_OFFSET		0x0900

Modified: stable/11/sys/arm/ti/cpsw/if_cpswvar.h
==============================================================================
--- stable/11/sys/arm/ti/cpsw/if_cpswvar.h	Fri Dec 30 20:40:25 2016	(r310859)
+++ stable/11/sys/arm/ti/cpsw/if_cpswvar.h	Fri Dec 30 20:43:00 2016	(r310860)
@@ -52,11 +52,13 @@ STAILQ_HEAD(cpsw_slots, cpsw_slot);
 struct cpsw_queue {
 	struct mtx	lock;
 	int		running;
+	int		teardown;
 	struct cpsw_slots active;
 	struct cpsw_slots avail;
 	uint32_t	queue_adds; /* total bufs added */
 	uint32_t	queue_removes; /* total bufs removed */
 	uint32_t	queue_removes_at_last_tick; /* Used by watchdog */
+	uint32_t	queue_restart;
 	int		queue_slots;
 	int		active_queue_len;
 	int		max_active_queue_len;
@@ -77,6 +79,7 @@ struct cpsw_softc {
 	int		active_slave;
 	int		debug;
 	int		dualemac;
+	int		rx_batch;
 	phandle_t	node;
 	struct bintime	attach_uptime; /* system uptime when attach happened. */
 	struct cpsw_port port[2];
@@ -84,7 +87,6 @@ struct cpsw_softc {
 
 	/* RX and TX buffer tracking */
 	struct cpsw_queue rx, tx;
-	uint32_t	last_hdp;
 
 	/* We expect 1 memory resource and 4 interrupts from the device tree. */
 	int		mem_rid;


More information about the svn-src-stable-11 mailing list