svn commit: r201707 - stable/7/sys/dev/bge

Pyun YongHyeon yongari at FreeBSD.org
Thu Jan 7 00:57:40 UTC 2010


Author: yongari
Date: Thu Jan  7 00:57:40 2010
New Revision: 201707
URL: http://svn.freebsd.org/changeset/base/201707

Log:
  MFC r200088,200227-200228,200246,200264,201446
  
  r200088:
    Add workaround to overcome hardware limitation which allows only a
    single outstanding DMA read operation. Most controllers targeted to
    client with PCIe bus interface(e.g. BCM5761) may have this
    limitation. All controllers for servers does not have this
    limitation.
    Collapsing mbuf chains to reduce number of memory reads before
    transmitting was most effective way to workaround this. I got about
    940Mbps from 850Mbps with mbuf collapsing on BCM5761. However it
    takes a lot of CPU cycles to collapse mbuf chains so add tunable to
    control the number of allowed TX buffers before collapsing. The
    default value is 0 which effectively disables the forced collapsing.
    For most cases 2 would yield best performance(about 930Mbps)
    without much sacrificing CPU cycles.
    Note the collapsing is only activated when the controller is on
    PCIe bus and the frame does not need TSO operation. TSO does not
    seem to suffer from the hardware limitation because the payload
    size is much bigger than normal IP datagram.
    Thanks to davidch@ who told me the limitation of client controllers
    and actually gave possible workarounds to mitigate the limitation.
  
  r200227:
    Remove PHY isolate/power down code in bge_stop(). The isolation
    handler in brgphy(4) does not exist and brgphy(4) just resets the
    PHY and returns EINVAL as it has no isolation handler. I also agree
    on Marius's opinion that stop handler of every NIC driver seems to
    be the wrong place for implementing PHY isolate/power down.
    If we need PHY isolate/power down it should be implemented in
    brgphy(4) and users should administratively down the PHY.
  
  r200228:
    Don't access jumbo frame related registers if controller lacks the
    feature. These registers are reserved on controllers that have no
    support for jumbo frame.
    Only BCM5700 has mini ring so do not poke mini ring related
    registers if controller is not BCM5700.
  
  r200246:
    Partially revert r200228. For mini RCB case, bge(4) still have to
    disable mini ring withtout regard to mini ring support.
  
  r200264:
    Create sysctl node(dev.bge.%d.focred_collapse) instead of
    hw.bge.forced_collapse. hw.bge.forced_collapse affects all bge(4)
    controllers on system which may not desirable behavior of the
    sysctl node. Also allow the sysctl node could be modified at any
    time.
  
  r201446:
    Fix regression introduced in r198318. BCM5754/BCM5754M uses the
    same ASIC ID of BCM5758 such that r198318 incorecctly enabled TSO
    on BCM5754.BCM5754M controllers. BCM5754/BCM5754M needs a special
    firmware to enable TSO and bge(4) does not support firmware based
    TSO.

Modified:
  stable/7/sys/dev/bge/if_bge.c
  stable/7/sys/dev/bge/if_bgereg.h
Directory Properties:
  stable/7/sys/   (props changed)
  stable/7/sys/cddl/contrib/opensolaris/   (props changed)
  stable/7/sys/contrib/dev/acpica/   (props changed)
  stable/7/sys/contrib/pf/   (props changed)

Modified: stable/7/sys/dev/bge/if_bge.c
==============================================================================
--- stable/7/sys/dev/bge/if_bge.c	Thu Jan  7 00:55:07 2010	(r201706)
+++ stable/7/sys/dev/bge/if_bge.c	Thu Jan  7 00:57:40 2010	(r201707)
@@ -1625,7 +1625,9 @@ bge_blockinit(struct bge_softc *sc)
 	else
 		val = BGE_STD_RX_RING_CNT / 8;
 	CSR_WRITE_4(sc, BGE_RBDI_STD_REPL_THRESH, val);
-	CSR_WRITE_4(sc, BGE_RBDI_JUMBO_REPL_THRESH, BGE_JUMBO_RX_RING_CNT/8);
+	if (BGE_IS_JUMBO_CAPABLE(sc))
+		CSR_WRITE_4(sc, BGE_RBDI_JUMBO_REPL_THRESH,
+		    BGE_JUMBO_RX_RING_CNT/8);
 
 	/*
 	 * Disable all unused send rings by setting the 'ring disabled'
@@ -1667,8 +1669,10 @@ bge_blockinit(struct bge_softc *sc)
 
 	/* Initialize RX ring indexes */
 	bge_writembx(sc, BGE_MBX_RX_STD_PROD_LO, 0);
-	bge_writembx(sc, BGE_MBX_RX_JUMBO_PROD_LO, 0);
-	bge_writembx(sc, BGE_MBX_RX_MINI_PROD_LO, 0);
+	if (BGE_IS_JUMBO_CAPABLE(sc))
+		bge_writembx(sc, BGE_MBX_RX_JUMBO_PROD_LO, 0);
+	if (sc->bge_asicrev == BGE_ASICREV_BCM5700)
+		bge_writembx(sc, BGE_MBX_RX_MINI_PROD_LO, 0);
 
 	/*
 	 * Set up RX return ring 0
@@ -2638,8 +2642,15 @@ bge_attach(device_t dev)
 	 * the TSO to the controllers that are not affected TSO issues
 	 * (e.g. 5755 or higher).
 	 */
-	if (BGE_IS_5755_PLUS(sc))
-		sc->bge_flags |= BGE_FLAG_TSO;
+	if (BGE_IS_5755_PLUS(sc)) {
+		/*
+		 * BCM5754 and BCM5787 shares the same ASIC id so
+		 * explicit device id check is required.
+		 */
+		if (pci_get_device(dev) != BCOM_DEVICEID_BCM5754 &&
+		    pci_get_device(dev) != BCOM_DEVICEID_BCM5754M)
+			sc->bge_flags |= BGE_FLAG_TSO;
+	}
 
   	/*
 	 * Check if this is a PCI-X or PCI Express device.
@@ -3911,6 +3922,26 @@ bge_encap(struct bge_softc *sc, struct m
 			csum_flags |= BGE_TXBDFLAG_IP_FRAG;
 	}
 
+	if ((m->m_pkthdr.csum_flags & CSUM_TSO) == 0 &&
+	    sc->bge_forced_collapse > 0 &&
+	    (sc->bge_flags & BGE_FLAG_PCIE) != 0 && m->m_next != NULL) {
+		/*
+		 * Forcedly collapse mbuf chains to overcome hardware
+		 * limitation which only support a single outstanding
+		 * DMA read operation.
+		 */
+		if (sc->bge_forced_collapse == 1)
+			m = m_defrag(m, M_DONTWAIT);
+		else
+			m = m_collapse(m, M_DONTWAIT, sc->bge_forced_collapse);
+		if (m == NULL) {
+			m_freem(*m_head);
+			*m_head = NULL;
+			return (ENOBUFS);
+		}
+		*m_head = m;
+	}
+
 	map = sc->bge_cdata.bge_tx_dmamap[idx];
 	error = bus_dmamap_load_mbuf_sg(sc->bge_cdata.bge_tx_mtag, map, m, segs,
 	    &nsegs, BUS_DMA_NOWAIT);
@@ -4547,17 +4578,11 @@ static void
 bge_stop(struct bge_softc *sc)
 {
 	struct ifnet *ifp;
-	struct ifmedia_entry *ifm;
-	struct mii_data *mii = NULL;
-	int mtmp, itmp;
 
 	BGE_LOCK_ASSERT(sc);
 
 	ifp = sc->bge_ifp;
 
-	if ((sc->bge_flags & BGE_FLAG_TBI) == 0)
-		mii = device_get_softc(sc->bge_miibus);
-
 	callout_stop(&sc->bge_stat_ch);
 
 	/* Disable host interrupts. */
@@ -4631,27 +4656,6 @@ bge_stop(struct bge_softc *sc)
 	/* Free TX buffers. */
 	bge_free_tx_ring(sc);
 
-	/*
-	 * Isolate/power down the PHY, but leave the media selection
-	 * unchanged so that things will be put back to normal when
-	 * we bring the interface back up.
-	 */
-	if ((sc->bge_flags & BGE_FLAG_TBI) == 0) {
-		itmp = ifp->if_flags;
-		ifp->if_flags |= IFF_UP;
-		/*
-		 * If we are called from bge_detach(), mii is already NULL.
-		 */
-		if (mii != NULL) {
-			ifm = mii->mii_media.ifm_cur;
-			mtmp = ifm->ifm_media;
-			ifm->ifm_media = IFM_ETHER | IFM_NONE;
-			mii_mediachg(mii);
-			ifm->ifm_media = mtmp;
-		}
-		ifp->if_flags = itmp;
-	}
-
 	sc->bge_tx_saved_considx = BGE_TXCONS_UNSET;
 
 	/* Clear MAC's link state (PHY may still have link UP). */
@@ -4857,6 +4861,26 @@ bge_add_sysctls(struct bge_softc *sc)
 
 #endif
 
+	/*
+	 * A common design characteristic for many Broadcom client controllers
+	 * is that they only support a single outstanding DMA read operation
+	 * on the PCIe bus. This means that it will take twice as long to fetch
+	 * a TX frame that is split into header and payload buffers as it does
+	 * to fetch a single, contiguous TX frame (2 reads vs. 1 read). For
+	 * these controllers, coalescing buffers to reduce the number of memory
+	 * reads is effective way to get maximum performance(about 940Mbps).
+	 * Without collapsing TX buffers the maximum TCP bulk transfer
+	 * performance is about 850Mbps. However forcing coalescing mbufs
+	 * consumes a lot of CPU cycles, so leave it off by default.
+	 */
+	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "forced_collapse",
+	    CTLFLAG_RW, &sc->bge_forced_collapse, 0,
+	    "Number of fragmented TX buffers of a frame allowed before "
+	    "forced collapsing");
+	resource_int_value(device_get_name(sc->bge_dev),
+	    device_get_unit(sc->bge_dev), "forced_collapse",
+	    &sc->bge_forced_collapse);
+
 	if (BGE_IS_5705_PLUS(sc))
 		return;
 

Modified: stable/7/sys/dev/bge/if_bgereg.h
==============================================================================
--- stable/7/sys/dev/bge/if_bgereg.h	Thu Jan  7 00:55:07 2010	(r201706)
+++ stable/7/sys/dev/bge/if_bgereg.h	Thu Jan  7 00:57:40 2010	(r201707)
@@ -2647,6 +2647,7 @@ struct bge_softc {
 	int			bge_link;	/* link state */
 	int			bge_link_evt;	/* pending link event */
 	int			bge_timer;
+	int			bge_forced_collapse;
 	struct callout		bge_stat_ch;
 	uint32_t		bge_rx_discards;
 	uint32_t		bge_tx_discards;


More information about the svn-src-stable mailing list