Under heavy load internet gets killed, only a reboot can bring it back up

Pyun YongHyeon pyunyh at gmail.com
Wed Oct 15 16:58:44 PDT 2008


On Wed, Oct 15, 2008 at 04:24:21PM +0200, Aniruddha wrote:
 > On Wed, 2008-10-15 at 21:09 +0900, PYUN Yong-Hyeon wrote:
 > > This controller is known to buggy one. See below.
 > > 
 > > [...]
 > > 
 > >  > > Trying to mount root from ufs:/dev/ad16s3a
 > >  > > WARNING: / was not properly dismounted
 > >  > > GEOM_LABEL: Label ext2fs/home removed.
 > >  > > GEOM_LABEL: Label ext2fs/data removed.
 > >  > > mskc0: Uncorrectable PCI Express error
 > >  > > mskc0: Uncorrectable PCI Express error
 > >  > 
 > >  > Those errors at the end of your dmesg don't look good; could be the sign
 > >  > of a NIC or motherboard that's going bad, or possibly a very strange
 > >  > driver problem.
 > > 
 > > I guess the message above could be safely ignored.
 > > 
 > >  > 
 > >  > Adding Yong-Hyeon PYUN to this thread, since he helps maintain the
 > >  > msk(4) driver.  Yong-Hyeon, do you know of any conditions where heavy
 > >  > network I/O could cause msk(4) to lock up or stop transmitting traffic,
 > >  > or possibly hard-lock on ifconfig down/up?
 > >  > 
 > > 
 > > I think workaround for the controller bug was committed to HEAD(SVN
 > > r183346). To original poster, would you try latest if_msk.c from
 > > HEAD?(Just copy if_msk.c/if_mskreg.h from HEAD to your box.)
 > > 
 > 
 > You got to help me a little bit here. How do I achieve this? Btw I am
 > running FreeBSD 7.1 BETA. Doesn't that mean the fix is already applied?
 > 

It seems that msk(4) in HEAD does not build correctly on RELENG_7.
Try attached patch.

Save attached patch to /path/to/patch
#cd /usr/src/sys/dev/msk
#patch -p0 < /path/to/patch/msk.watchdog.diff
And rebuild your kernel.

-- 
Regards,
Pyun YongHyeon
-------------- next part --------------
Index: if_msk.c
===================================================================
--- if_msk.c	(revision 183165)
+++ if_msk.c	(working copy)
@@ -244,6 +244,9 @@
 static int msk_handle_events(struct msk_softc *);
 static void msk_handle_hwerr(struct msk_if_softc *, uint32_t);
 static void msk_intr_hwerr(struct msk_softc *);
+#ifndef __NO_STRICT_ALIGNMENT
+static __inline void msk_fixup_rx(struct mbuf *);
+#endif
 static void msk_rxeof(struct msk_if_softc *, uint32_t, int);
 static void msk_jumbo_rxeof(struct msk_if_softc *, uint32_t, int);
 static void msk_txeof(struct msk_if_softc *, int);
@@ -783,7 +786,12 @@
 		return (ENOBUFS);
 
 	m->m_len = m->m_pkthdr.len = MCLBYTES;
-	m_adj(m, ETHER_ALIGN);
+	if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) == 0)
+		m_adj(m, ETHER_ALIGN);
+#ifndef __NO_STRICT_ALIGNMENT
+	else
+		m_adj(m, MSK_RX_BUF_ALIGN);
+#endif
 
 	if (bus_dmamap_load_mbuf_sg(sc_if->msk_cdata.msk_rx_tag,
 	    sc_if->msk_cdata.msk_rx_sparemap, m, segs, &nsegs,
@@ -840,7 +848,12 @@
 		return (ENOBUFS);
 	}
 	m->m_pkthdr.len = m->m_len = MSK_JLEN;
-	m_adj(m, ETHER_ALIGN);
+	if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) == 0)
+		m_adj(m, ETHER_ALIGN);
+#ifndef __NO_STRICT_ALIGNMENT
+	else
+		m_adj(m, MSK_RX_BUF_ALIGN);
+#endif
 
 	if (bus_dmamap_load_mbuf_sg(sc_if->msk_cdata.msk_jumbo_rx_tag,
 	    sc_if->msk_cdata.msk_jumbo_rx_sparemap, m, segs, &nsegs,
@@ -1041,14 +1054,16 @@
 {
 	int next;
 	int i;
-	uint8_t val;
 
 	/* Get adapter SRAM size. */
-	val = CSR_READ_1(sc, B2_E_0);
-	sc->msk_ramsize = (val == 0) ? 128 : val * 4;
+	sc->msk_ramsize = CSR_READ_1(sc, B2_E_0) * 4;
 	if (bootverbose)
 		device_printf(sc->msk_dev,
 		    "RAM buffer size : %dKB\n", sc->msk_ramsize);
+	if (sc->msk_ramsize == 0)
+		return (0);
+
+	sc->msk_pflags |= MSK_FLAG_RAMBUF;
 	/*
 	 * Give receiver 2/3 of memory and round down to the multiple
 	 * of 1024. Tx/Rx RAM buffer size of Yukon II shoud be multiple
@@ -1412,6 +1427,7 @@
 	sc_if->msk_if_dev = dev;
 	sc_if->msk_port = port;
 	sc_if->msk_softc = sc;
+	sc_if->msk_flags = sc->msk_pflags;
 	sc->msk_if[port] = sc_if;
 	/* Setup Tx/Rx queue register offsets. */
 	if (port == MSK_PORT_A) {
@@ -1976,6 +1992,7 @@
 	struct msk_rxdesc *jrxd;
 	struct msk_jpool_entry *entry;
 	uint8_t *ptr;
+	bus_size_t rxalign;
 	int error, i;
 
 	mtx_init(&sc_if->msk_jlist_mtx, "msk_jlist_mtx", NULL, MTX_DEF);
@@ -2107,9 +2124,16 @@
 		goto fail;
 	}
 
+	rxalign = 1;
+	/*
+	 * Workaround hardware hang which seems to happen when Rx buffer
+	 * is not aligned on multiple of FIFO word(8 bytes).
+	 */
+	if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) != 0)
+		rxalign = MSK_RX_BUF_ALIGN;
 	/* Create tag for Rx buffers. */
 	error = bus_dma_tag_create(sc_if->msk_cdata.msk_parent_tag,/* parent */
-		    1, 0,			/* alignment, boundary */
+		    rxalign, 0,			/* alignment, boundary */
 		    BUS_SPACE_MAXADDR,		/* lowaddr */
 		    BUS_SPACE_MAXADDR,		/* highaddr */
 		    NULL, NULL,			/* filter, filterarg */
@@ -2918,6 +2942,23 @@
 	return (0);
 }
 
+#ifndef __NO_STRICT_ALIGNMENT
+static __inline void
+msk_fixup_rx(struct mbuf *m)
+{
+        int i;
+        uint16_t *src, *dst;
+
+	src = mtod(m, uint16_t *);
+	dst = src - 3;
+
+	for (i = 0; i < (m->m_len / sizeof(uint16_t) + 1); i++)
+		*dst++ = *src++;
+
+	m->m_data -= (MSK_RX_BUF_ALIGN - ETHER_ALIGN);
+}
+#endif
+
 static void
 msk_rxeof(struct msk_if_softc *sc_if, uint32_t status, int len)
 {
@@ -2955,6 +2996,10 @@
 		}
 		m->m_pkthdr.rcvif = ifp;
 		m->m_pkthdr.len = m->m_len = len;
+#ifndef __NO_STRICT_ALIGNMENT
+		if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) != 0)
+			msk_fixup_rx(m);
+#endif
 		ifp->if_ipackets++;
 		/* Check for VLAN tagged packets. */
 		if ((status & GMR_FS_VLAN) != 0 &&
@@ -3008,6 +3053,10 @@
 		}
 		m->m_pkthdr.rcvif = ifp;
 		m->m_pkthdr.len = m->m_len = len;
+#ifndef __NO_STRICT_ALIGNMENT
+		if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) != 0)
+			msk_fixup_rx(m);
+#endif
 		ifp->if_ipackets++;
 		/* Check for VLAN tagged packets. */
 		if ((status & GMR_FS_VLAN) != 0 &&
@@ -3677,7 +3726,7 @@
 	/* Configure hardware VLAN tag insertion/stripping. */
 	msk_setvlan(sc_if, ifp);
 
-	if (sc->msk_hw_id == CHIP_ID_YUKON_EC_U) {
+	if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) == 0) {
 		/* Set Rx Pause threshould. */
 		CSR_WRITE_1(sc, MR_ADDR(sc_if->msk_port, RX_GMF_LP_THR),
 		    MSK_ECU_LLPP);
@@ -3790,6 +3839,8 @@
 	int ltpp, utpp;
 
 	sc = sc_if->msk_softc;
+	if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) == 0)
+		return;
 
 	/* Setup Rx Queue. */
 	CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_rxq, RB_CTRL), RB_RST_CLR);
Index: if_mskreg.h
===================================================================
--- if_mskreg.h	(revision 183165)
+++ if_mskreg.h	(working copy)
@@ -2158,6 +2158,7 @@
 
 #define MSK_TX_RING_CNT		256
 #define MSK_RX_RING_CNT		256
+#define	MSK_RX_BUF_ALIGN	8
 #define MSK_JUMBO_RX_RING_CNT	MSK_RX_RING_CNT
 #define	MSK_STAT_RING_CNT	((1 + 3) * (MSK_TX_RING_CNT + MSK_RX_RING_CNT))
 #define MSK_MAXTXSEGS		32
@@ -2307,6 +2308,7 @@
 	uint32_t		msk_coppertype;
 	uint32_t		msk_intrmask;
 	uint32_t		msk_intrhwemask;
+	uint32_t		msk_pflags;
 	int			msk_suspended;
 	int			msk_clock;
 	int			msk_msi;
@@ -2348,6 +2350,8 @@
 	int			msk_phytype;
 	int			msk_phyaddr;
 	int			msk_link;
+	uint32_t		msk_flags;
+#define	MSK_FLAG_RAMBUF		0x0010
 	struct callout		msk_tick_ch;
 	int			msk_watchdog_timer;
 	uint32_t		msk_txq;	/* Tx. Async Queue offset */


More information about the freebsd-questions mailing list