re(4) driver dropping packets when reading NFS files

Pyun YongHyeon pyunyh at gmail.com
Mon Nov 8 00:27:01 UTC 2010


On Sun, Nov 07, 2010 at 07:06:44PM -0500, Rick Macklem wrote:
> > 
> > I highly doubt it could be hardware issue.
> > 
> Looks like the hardware guys may be off the hook. See below.
> > 
> > It's job of bus_dma(9) and I don't think barrier instructions would
> > be helpful here as I don't see out-of-order execution in RX
> > handler.
> > 
> My current hunch is that something that changed between June 7 and
> June 15 in head/sys has caused the chip to have difficulties doing
> DMA, resulting in the Fifo overflows and approx. 10% "missed frames".
> 
> > 
> > Let's kill driver bug. No one reported this kind of issue so far
> > and I guess most users took it granted for the poor performance
> > because they are using low end consumer grade controller.
> >
> I think your driver is off the hook, too.
> 
> > 
> > > re0 statistics:
> > > Transmit good frames : 101346
> > > Receive good frames : 133390
> > > Tx errors : 0
> > > Rx errors : 0
> > > Rx missed frames : 14394
> > > Rx frame alignment errs : 0
> > > Tx single collisions : 0
> > > Tx multiple collisions : 0
> > > Rx unicast frames : 133378
> > > Rx broadcast frames : 0
> > > Rx multicast frames : 12
> > > Tx aborts : 0
> > > Tx underruns : 0
> > > rxe did 0: 14359
> > 
> Seeing that someone thought it had worked ok a while back, I decided to
> try some old kernels I had lying about from head/-current. I found that
> the one I svn`d on June 7 works well (about 7Mbytes per sec read rate) whereas one
> svn`d on June 15 had the problem (about 500Kbytes per sec read rate).
> 
> So what is different between these kernels:
> - if_re.c is identical
> - subr_dma.c has a simple change and porting the June 7 one over didn`t make
>   the June 15 one work better
> - amd64`s busdma_machdep.c is identical
> 
> so it must be something else. There are a bunch of changes to amd64`s pmap.c,
> which is why I`ve cc`d Alan, in case he might know if those changes could affect
> PCIe DMA or similar.
> 
> Other than that, maybe someone else familiar with the PCIe DMA could look and see
> if a change done to head between June 7 and 15 might explain it. (and it could
> be something else, a DMA problem for the chip is just a guess)
> 

If that made difference, all other ethernet controllers would have
suffered from the similar issues.

> rick
> ps: Unfortunately I`ll be on the road for the next month, so I won`t be able
>     to test patches until early Dec.

If you have some spare time please try attach one. I guess fast
ethernet controller has smaller FIFO size than that of GigE
controller so it is frequently triggered the issue on fast ethernet
controller than GigE controllers. I still guess that there are
cases that an interrupt is not correctly served such that driver
missed a lot of frames.
-------------- next part --------------
Index: sys/pci/if_rlreg.h
===================================================================
--- sys/pci/if_rlreg.h	(revision 214897)
+++ sys/pci/if_rlreg.h	(working copy)
@@ -218,9 +218,10 @@
 #define RL_ISR_TX_OK		0x0004
 #define RL_ISR_TX_ERR		0x0008
 #define RL_ISR_RX_OVERRUN	0x0010
+#define RL_ISR_RX_DESC_UNAVAIL	0x0010	/* C+ only */
 #define RL_ISR_PKT_UNDERRUN	0x0020
 #define RL_ISR_LINKCHG		0x0020	/* 8169 only */
-#define RL_ISR_FIFO_OFLOW	0x0040	/* 8139 only */
+#define RL_ISR_FIFO_OFLOW	0x0040
 #define RL_ISR_TX_DESC_UNAVAIL	0x0080	/* C+ only */
 #define RL_ISR_SWI		0x0100	/* C+ only */
 #define RL_ISR_CABLE_LEN_CHGD	0x2000
@@ -236,12 +237,12 @@
 #ifdef RE_TX_MODERATION
 #define RL_INTRS_CPLUS	\
 	(RL_ISR_RX_OK|RL_ISR_RX_ERR|RL_ISR_TX_ERR|			\
-	RL_ISR_RX_OVERRUN|RL_ISR_PKT_UNDERRUN|RL_ISR_FIFO_OFLOW|	\
+	RL_ISR_RX_DESC_UNAVAIL|RL_ISR_PKT_UNDERRUN|RL_ISR_FIFO_OFLOW|	\
 	RL_ISR_PCS_TIMEOUT|RL_ISR_SYSTEM_ERR|RL_ISR_TIMEOUT_EXPIRED)
 #else
 #define RL_INTRS_CPLUS	\
 	(RL_ISR_RX_OK|RL_ISR_RX_ERR|RL_ISR_TX_ERR|RL_ISR_TX_OK|		\
-	RL_ISR_RX_OVERRUN|RL_ISR_PKT_UNDERRUN|RL_ISR_FIFO_OFLOW|	\
+	RL_ISR_RX_DESC_UNAVAIL|RL_ISR_PKT_UNDERRUN|RL_ISR_FIFO_OFLOW|	\
 	RL_ISR_PCS_TIMEOUT|RL_ISR_SYSTEM_ERR|RL_ISR_TIMEOUT_EXPIRED)
 #endif
 
@@ -873,9 +874,7 @@
 	int			rl_twist_row;
 	int			rl_twist_col;
 	int			suspended;	/* 0 = normal  1 = suspended */
-#ifdef DEVICE_POLLING
 	int			rxcycles;
-#endif
 
 	struct task		rl_txtask;
 	struct task		rl_inttask;
Index: sys/dev/re/if_re.c
===================================================================
--- sys/dev/re/if_re.c	(revision 214897)
+++ sys/dev/re/if_re.c	(working copy)
@@ -1860,7 +1860,7 @@
 	int			i, total_len;
 	struct rl_desc		*cur_rx;
 	u_int32_t		rxstat, rxvlan;
-	int			maxpkt = 16, rx_npkts = 0;
+	int			rx_npkts = 0;
 
 	RL_LOCK_ASSERT(sc);
 
@@ -1872,7 +1872,7 @@
 	    sc->rl_ldata.rl_rx_list_map,
 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
-	for (i = sc->rl_ldata.rl_rx_prodidx; maxpkt > 0;
+	for (i = sc->rl_ldata.rl_rx_prodidx; sc->rxcycles > 0;
 	    i = RL_RX_DESC_NXT(sc, i)) {
 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 			break;
@@ -2036,7 +2036,7 @@
 				}
 			}
 		}
-		maxpkt--;
+		sc->rxcycles--;
 		if (rxvlan & RL_RDESC_VLANCTL_TAG) {
 			m->m_pkthdr.ether_vtag =
 			    bswap16((rxvlan & RL_RDESC_VLANCTL_DATA));
@@ -2058,10 +2058,10 @@
 
 	if (rx_npktsp != NULL)
 		*rx_npktsp = rx_npkts;
-	if (maxpkt)
-		return (EAGAIN);
+	if (sc->rxcycles)
+		return (0);
 
-	return (0);
+	return (EAGAIN);
 }
 
 static void
@@ -2243,6 +2243,8 @@
 	RL_LOCK(sc);
 
 	status = CSR_READ_2(sc, RL_ISR);
+	if (status & RL_ISR_FIFO_OFLOW)
+		status |= RL_ISR_RX_DESC_UNAVAIL;
         CSR_WRITE_2(sc, RL_ISR, status);
 
 	if (sc->suspended ||
@@ -2258,8 +2260,11 @@
 	}
 #endif
 
-	if (status & (RL_ISR_RX_OK|RL_ISR_RX_ERR|RL_ISR_FIFO_OFLOW))
+	if (status & (RL_ISR_RX_OK | RL_ISR_RX_ERR | RL_ISR_FIFO_OFLOW |
+	    RL_ISR_RX_DESC_UNAVAIL)) {
+		sc->rxcycles = sc->rl_ldata.rl_rx_desc_cnt / 2;
 		rval = re_rxeof(sc, NULL);
+	}
 
 	/*
 	 * Some chips will ignore a second TX request issued


More information about the freebsd-current mailing list