svn commit: r289671 - head/sys/mips/atheros

Wed Oct 21 01:41:20 UTC 2015

Author: adrian
Date: Wed Oct 21 01:41:18 2015
New Revision: 289671
URL: https://svnweb.freebsd.org/changeset/base/289671

Log:
  arge: don't do the rx fixup copy and just offset the mbuf by 2 bytes
  
  The existing code meets the "alignment" requirement for the l3 payload
  by offsetting the mbuf by uint64_t and then calling an rx fixup routine
  to copy the frame backwards by 2 bytes.  This DWORD aligns the
  L3 payload so tcp, etc doesn't panic on unaligned access.
  
  This is .. slow.
  
  For arge MACs that support 1 byte TX/RX address alignment, we can do
  the "other" hack: offset the RX address of the mbuf so the L3 payload
  again is hopefully DWORD aligned.
  
  This is much cheaper - since TX/RX is both 1 byte align ready (thanks
  to the previous commit) there's no bounce buffering going on and there
  is no rx fixup copying.
  
  This gets bridging performance up from 180mbit/sec -> 410mbit/sec.
  There's around 10% of CPU cycles spent in _bus_dmamap_sync(); I'll
  investigate that later.
  
  Tested:
  
  * QCA955x SoC (AP135 reference board), bridging arge0/arge1
    by programming the switch to have two vlangroups in dot1q mode:
  
  # ifconfig bridge0 inet 192.168.2.20/24
  # etherswitchcfg config vlan_mode dot1q
  # etherswitchcfg vlangroup0 members 0,1,2,3,4
  # etherswitchcfg vlangroup1 vlan 2 members 5,6
  # etherswitchcfg port5 pvid 2
  # etherswitchcfg port6 pvid 2
  # ifconfig arge1 up
  # ifconfig bridge0 addm arge1

Modified:
  head/sys/mips/atheros/if_arge.c

Modified: head/sys/mips/atheros/if_arge.c
==============================================================================

--- head/sys/mips/atheros/if_arge.c	Wed Oct 21 01:34:51 2015	(r289670)
+++ head/sys/mips/atheros/if_arge.c	Wed Oct 21 01:41:18 2015	(r289671)
@@ -2165,6 +2165,7 @@ arge_newbuf(struct arge_softc *sc, int i
 	bus_dmamap_t		map;
 	int			nsegs;
 
+	/* XXX TODO: should just allocate an explicit 2KiB buffer */
 	m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 	if (m == NULL)
 		return (ENOBUFS);
@@ -2174,7 +2175,15 @@ arge_newbuf(struct arge_softc *sc, int i
 	 * Add extra space to "adjust" (copy) the packet back to be aligned
 	 * for purposes of IPv4/IPv6 header contents.
 	 */
-	m_adj(m, sizeof(uint64_t));
+	if (sc->arge_hw_flags & ARGE_HW_FLG_RX_DESC_ALIGN_4BYTE)
+		m_adj(m, sizeof(uint64_t));
+	/*
+	 * If it's a 1-byte aligned buffer, then just offset it two bytes
+	 * and that will give us a hopefully correctly DWORD aligned
+	 * L3 payload - and we won't have to undo it afterwards.
+	 */
+	else if (sc->arge_hw_flags & ARGE_HW_FLG_RX_DESC_ALIGN_1BYTE)
+		m_adj(m, sizeof(uint16_t));
 
 	if (bus_dmamap_load_mbuf_sg(sc->arge_cdata.arge_rx_tag,
 	    sc->arge_cdata.arge_rx_sparemap, m, segs, &nsegs, 0) != 0) {
@@ -2186,6 +2195,11 @@ arge_newbuf(struct arge_softc *sc, int i
 	rxd = &sc->arge_cdata.arge_rxdesc[idx];
 	if (rxd->rx_m != NULL) {
 		bus_dmamap_unload(sc->arge_cdata.arge_rx_tag, rxd->rx_dmamap);
+		/* XXX TODO: free rx_m? */
+		device_printf(sc->arge_dev,
+		    "%s: ring[%d] rx_m wasn't free?\n",
+		    __func__,
+		    idx);
 	}
 	map = rxd->rx_dmamap;
 	rxd->rx_dmamap = sc->arge_cdata.arge_rx_sparemap;
@@ -2205,6 +2219,13 @@ arge_newbuf(struct arge_softc *sc, int i
 	return (0);
 }
 
+/*
+ * Move the data backwards 16 bits to (hopefully!) ensure the
+ * IPv4/IPv6 payload is aligned.
+ *
+ * This is required for earlier hardware where the RX path
+ * requires DWORD aligned buffers.
+ */
 static __inline void
 arge_fixup_rx(struct mbuf *m)
 {
@@ -2344,7 +2365,13 @@ arge_rx_locked(struct arge_softc *sc)
 		    BUS_DMASYNC_POSTREAD);
 		m = rxd->rx_m;
 
-		arge_fixup_rx(m);
+		/*
+		 * If the MAC requires 4 byte alignment then the RX setup
+		 * routine will have pre-offset things; so un-offset it here.
+		 */
+		if (sc->arge_hw_flags & ARGE_HW_FLG_RX_DESC_ALIGN_4BYTE)
+			arge_fixup_rx(m);
+
 		m->m_pkthdr.rcvif = ifp;
 		/* Skip 4 bytes of CRC */
 		m->m_pkthdr.len = m->m_len = packet_len - ETHER_CRC_LEN;