[PATCH] TX algorithms, missetting IFF_OACTIVE and if_timer

Ruslan Ermilov ru at FreeBSD.org
Fri Apr 2 04:06:32 PST 2004


Gang,

I've been wading through several network drivers recently,
learning Bill Paul's code.  Specifically, I examined the
following drivers: ste(4), rl(4), dc(4), nge(4), and vr(4).

They all use the consumer/producer approach for handling TX.
if_start() works with the producer, and txeof() works with
the consumer.  The condition (consumer == producer) is when
the TX ring is empty.  To differentiate the case of an empty
ring from the full ring, some drivers (ste(4), dc(4), and
nge(4)) have the threshold (6 for dc(4), 3 for ste(4), and
2 for nge(4)) to assert the gap between producer and consumer,
thus not allow the producer to catch the consumer.  (The
vr(4) is hairier, and I will not discuss it in detail here.)

First, could you please explain these magic numbers?

Also, some drivers use indexes for consumer and producer,
where they could use "next" pointers, which should be faster.

I also think that using the gap between producer/consumer is
suboptimal, but this gap is part of the existing algorithm.

Attached is the patch for ste(4) that converts its producer
and consumer to be pointers rather than indexes (like in vr(4),
but slightly differently, only using two pointers), drops the
tx_cnt and removes the gap.  The condition of an empty ring
becomes "the mbuf pointer of the consumer is NULL".  The patch
also fixes the resetting of IFF_OACTIVE and if_timer.  Before,
it was possible for IFF_OACTIVE to be reset even if we didn't
free any mbufs in ste_txeof() (this is most apparent in the
DEVICE_POLLING case, when we call ste_txeof() periodically),
and if_timer could be reset to zero even if we have some more
TX work to do later.

Attached also the patch for nge(4) that fixes resetting of
IFF_OACTIVE and if_itimer in nge_txeof().  Previously, the
if_timer was always bogusly reset to 0, and we could reset
IFF_OACTIVE when we didn't free any mbufs (again, this is
most apparent with DEVICE_POLLING -- we break out the loop
if NGE_OWNDESC(cur_tx) bit is set, meaning that the current
descriptor wasn't yet DMA'ed, and still treated this as if
we did some TX work, and reset IFF_OACTIVE).  (I didn't
convert it to replace indexes with the pointers for consumer
and producer, although this is possible, and should probably
be faster.)

The rl(4) is buggier: the TX ring is only 4 elements, and
the driver does not have a producer/consumer gap, so the
producer can catch the consumer, which itself is normal.
So rl_txeof() always loops through TX list, but doesn't
detect a case when the TX list is empty, resulting in
PR kern/64975, visible under DEVICE_POLLING.  Also, when
TX list is full, we could break out the loop without
freeing any mbufs, and still resetting if_timer to zero
(Luigi attempted to fix this in rev. 1.71, but the fix
is incomplete).  Attached is the patch that should fix
both issues.

The dc(4) driver looks the most correct dealing with
IFF_OACTIVE and if_timer.

As an aside, lot of drivers that support auto-padding
still have useless xx_pad[XX_MIN_FRAMELEN] element in
their xx_chain_data structure, namely, dc(4), ste(4),
and tl(4) have it.  I think these were copied from
some template driver, is it safe to remove them?


Cheers,
-- 
Ruslan Ermilov
ru at FreeBSD.org
FreeBSD committer
-------------- next part --------------
Index: sys/pci/if_ste.c
===================================================================
RCS file: /home/ncvs/src/sys/pci/if_ste.c,v
retrieving revision 1.67
diff -u -r1.67 if_ste.c
--- sys/pci/if_ste.c	1 Apr 2004 12:55:38 -0000	1.67
+++ sys/pci/if_ste.c	2 Apr 2004 08:13:26 -0000
@@ -893,35 +893,23 @@
 ste_txeof(sc)
 	struct ste_softc	*sc;
 {
-	struct ste_chain	*cur_tx = NULL;
+	struct ste_chain	*cur_tx;
 	struct ifnet		*ifp;
-	int			idx;
 
 	ifp = &sc->arpcom.ac_if;
 
-	idx = sc->ste_cdata.ste_tx_cons;
-	while(idx != sc->ste_cdata.ste_tx_prod) {
-		cur_tx = &sc->ste_cdata.ste_tx_chain[idx];
-
+	cur_tx = sc->ste_cdata.ste_tx_cons;
+	while (cur_tx->ste_mbuf != NULL) {
 		if (!(cur_tx->ste_ptr->ste_ctl & STE_TXCTL_DMADONE))
 			break;
-
-		if (cur_tx->ste_mbuf != NULL) {
-			m_freem(cur_tx->ste_mbuf);
-			cur_tx->ste_mbuf = NULL;
-		}
-
+		m_freem(cur_tx->ste_mbuf);
+		cur_tx->ste_mbuf = NULL;
 		ifp->if_opackets++;
-
-		sc->ste_cdata.ste_tx_cnt--;
-		STE_INC(idx, STE_TX_LIST_CNT);
-		ifp->if_timer = 0;
-	}
-
-	sc->ste_cdata.ste_tx_cons = idx;
-
-	if (cur_tx != NULL)
 		ifp->if_flags &= ~IFF_OACTIVE;
+		cur_tx = cur_tx->ste_next;
+	}
+	sc->ste_cdata.ste_tx_cons = cur_tx;
+	ifp->if_timer = (cur_tx->ste_mbuf == NULL) ? 0 : 5;
 
 	return;
 }
@@ -1280,22 +1268,13 @@
 		cd->ste_tx_chain[i].ste_phys = vtophys(&ld->ste_tx_list[i]);
 		if (i == (STE_TX_LIST_CNT - 1))
 			cd->ste_tx_chain[i].ste_next =
+			cd->ste_tx_prod = cd->ste_tx_cons =
 			    &cd->ste_tx_chain[0];
 		else
 			cd->ste_tx_chain[i].ste_next =
 			    &cd->ste_tx_chain[i + 1];
-		if (i == 0)
-			cd->ste_tx_chain[i].ste_prev =
-			     &cd->ste_tx_chain[STE_TX_LIST_CNT - 1];
-		else
-			cd->ste_tx_chain[i].ste_prev =
-			     &cd->ste_tx_chain[i - 1];
 	}
 
-	cd->ste_tx_prod = 0;
-	cd->ste_tx_cons = 0;
-	cd->ste_tx_cnt = 0;
-
 	return;
 }
 
@@ -1379,7 +1358,7 @@
 	STE_SETBIT4(sc, STE_DMACTL, STE_DMACTL_TXDMA_UNSTALL);
 	STE_SETBIT4(sc, STE_DMACTL, STE_DMACTL_TXDMA_UNSTALL);
 	ste_wait(sc);
-	sc->ste_tx_prev_idx=-1;
+	sc->ste_tx_prev = NULL;
 
 	/* Enable receiver and transmitter */
 	CSR_WRITE_2(sc, STE_MACCTL0, 0);
@@ -1610,8 +1589,7 @@
 {
 	struct ste_softc	*sc;
 	struct mbuf		*m_head = NULL;
-	struct ste_chain	*cur_tx = NULL;
-	int			idx;
+	struct ste_chain	*cur_tx;
 
 	sc = ifp->if_softc;
 	STE_LOCK(sc);
@@ -1626,27 +1604,19 @@
 		return;
 	}
 
-	idx = sc->ste_cdata.ste_tx_prod;
-
-	while(sc->ste_cdata.ste_tx_chain[idx].ste_mbuf == NULL) {
-
-		if ((STE_TX_LIST_CNT - sc->ste_cdata.ste_tx_cnt) < 3) {
-			ifp->if_flags |= IFF_OACTIVE;
-			break;
-		}
+	cur_tx = sc->ste_cdata.ste_tx_prod;
+	while (cur_tx->ste_mbuf == NULL) {
 
 		IF_DEQUEUE(&ifp->if_snd, m_head);
 		if (m_head == NULL)
 			break;
 
-		cur_tx = &sc->ste_cdata.ste_tx_chain[idx];
-
 		if (ste_encap(sc, cur_tx, m_head) != 0)
 			break;
 
 		cur_tx->ste_ptr->ste_next = 0;
 
-		if(sc->ste_tx_prev_idx < 0){
+		if (sc->ste_tx_prev == NULL) {
 			cur_tx->ste_ptr->ste_ctl = STE_TXCTL_DMAINTR | 1;
 			/* Load address of the TX list */
 			STE_SETBIT4(sc, STE_DMACTL, STE_DMACTL_TXDMA_STALL);
@@ -1662,12 +1632,11 @@
 			ste_wait(sc);
 		}else{
 			cur_tx->ste_ptr->ste_ctl = STE_TXCTL_DMAINTR | 1;
-			sc->ste_cdata.ste_tx_chain[
-			    sc->ste_tx_prev_idx].ste_ptr->ste_next
+			sc->ste_tx_prev->ste_ptr->ste_next
 				= cur_tx->ste_phys;
 		}
 
-		sc->ste_tx_prev_idx=idx;
+		sc->ste_tx_prev = cur_tx;
 
 		/*
 		 * If there's a BPF listener, bounce a copy of this frame
@@ -1675,11 +1644,13 @@
 	 	 */
 		BPF_MTAP(ifp, cur_tx->ste_mbuf);
 
-		STE_INC(idx, STE_TX_LIST_CNT);
-		sc->ste_cdata.ste_tx_cnt++;
 		ifp->if_timer = 5;
-		sc->ste_cdata.ste_tx_prod = idx;
+		cur_tx = cur_tx->ste_next;
 	}
+
+	if (cur_tx->ste_mbuf != NULL)
+		ifp->if_flags |= IFF_OACTIVE;
+	sc->ste_cdata.ste_tx_prod = cur_tx;
 
 	STE_UNLOCK(sc);
 
Index: sys/pci/if_stereg.h
===================================================================
RCS file: /home/ncvs/src/sys/pci/if_stereg.h,v
retrieving revision 1.13
diff -u -r1.13 if_stereg.h
--- sys/pci/if_stereg.h	31 Mar 2004 20:39:20 -0000	1.13
+++ sys/pci/if_stereg.h	2 Apr 2004 07:11:41 -0000
@@ -467,8 +467,6 @@
 #define ETHER_ALIGN		2
 #define STE_RX_LIST_CNT		64
 #define STE_TX_LIST_CNT		64
-#define STE_INC(x, y)		(x) = (x + 1) % y
-#define STE_NEXT(x, y)		(x + 1) % y
 
 struct ste_type {
 	u_int16_t		ste_vid;
@@ -486,7 +484,6 @@
 	struct ste_desc		*ste_ptr;
 	struct mbuf		*ste_mbuf;
 	struct ste_chain	*ste_next;
-	struct ste_chain	*ste_prev;
 	u_int32_t		ste_phys;
 };
 
@@ -501,9 +498,8 @@
 	struct ste_chain	 ste_tx_chain[STE_TX_LIST_CNT];
 	struct ste_chain_onefrag *ste_rx_head;
 
-	int			ste_tx_prod;
-	int			ste_tx_cons;
-	int			ste_tx_cnt;
+	struct ste_chain	*ste_tx_prod;
+	struct ste_chain	*ste_tx_cons;
 };
 
 struct ste_softc {
@@ -520,7 +516,7 @@
 	int			ste_tx_thresh;
 	u_int8_t		ste_link;
 	int			ste_if_flags;
-	int			ste_tx_prev_idx;
+	struct ste_chain	*ste_tx_prev;
 	struct ste_list_data	*ste_ldata;
 	struct ste_chain_data	ste_cdata;
 	struct callout_handle	ste_stat_ch;
-------------- next part --------------
Index: sys/dev/nge/if_nge.c
===================================================================
RCS file: /home/ncvs/src/sys/dev/nge/if_nge.c,v
retrieving revision 1.55
diff -u -r1.55 if_nge.c
--- sys/dev/nge/if_nge.c	30 Mar 2004 10:24:52 -0000	1.55
+++ sys/dev/nge/if_nge.c	2 Apr 2004 11:20:16 -0000
@@ -1418,9 +1418,6 @@
 
 	ifp = &sc->arpcom.ac_if;
 
-	/* Clear the timeout timer. */
-	ifp->if_timer = 0;
-
 	/*
 	 * Go through our tx list and free mbufs for those
 	 * frames that have been transmitted.
@@ -1457,13 +1454,14 @@
 
 		sc->nge_cdata.nge_tx_cnt--;
 		NGE_INC(idx, NGE_TX_LIST_CNT);
-		ifp->if_timer = 0;
 	}
 
-	sc->nge_cdata.nge_tx_cons = idx;
-
-	if (cur_tx != NULL)
+	if (idx != sc->nge_cdata.nge_tx_cons) {
+		/* Some buffers have been freed. */
+		sc->nge_cdata.nge_tx_cons = idx;
 		ifp->if_flags &= ~IFF_OACTIVE;
+	}
+	ifp->if_timer = (idx == sc->nge_cdata.nge_tx_prod) ? 0 : 5;
 
 	return;
 }
-------------- next part --------------
Index: sys/pci/if_rl.c
===================================================================
RCS file: /home/ncvs/src/sys/pci/if_rl.c,v
retrieving revision 1.133
diff -u -r1.133 if_rl.c
--- sys/pci/if_rl.c	17 Mar 2004 17:50:53 -0000	1.133
+++ sys/pci/if_rl.c	2 Apr 2004 09:57:11 -0000
@@ -1359,6 +1359,8 @@
 	 * frames that have been uploaded.
 	 */
 	do {
+		if (RL_LAST_TXMBUF(sc) == NULL)
+			break;
 		txstat = CSR_READ_4(sc, RL_LAST_TXSTAT(sc));
 		if (!(txstat & (RL_TXSTAT_TX_OK|
 		    RL_TXSTAT_TX_UNDERRUN|RL_TXSTAT_TXABRT)))
@@ -1366,12 +1368,10 @@
 
 		ifp->if_collisions += (txstat & RL_TXSTAT_COLLCNT) >> 24;
 
-		if (RL_LAST_TXMBUF(sc) != NULL) {
-			bus_dmamap_unload(sc->rl_tag, RL_LAST_DMAMAP(sc));
-			bus_dmamap_destroy(sc->rl_tag, RL_LAST_DMAMAP(sc));
-			m_freem(RL_LAST_TXMBUF(sc));
-			RL_LAST_TXMBUF(sc) = NULL;
-		}
+		bus_dmamap_unload(sc->rl_tag, RL_LAST_DMAMAP(sc));
+		bus_dmamap_destroy(sc->rl_tag, RL_LAST_DMAMAP(sc));
+		m_freem(RL_LAST_TXMBUF(sc));
+		RL_LAST_TXMBUF(sc) = NULL;
 		if (txstat & RL_TXSTAT_TX_OK)
 			ifp->if_opackets++;
 		else {
@@ -1396,8 +1396,7 @@
 		ifp->if_flags &= ~IFF_OACTIVE;
 	} while (sc->rl_cdata.last_tx != sc->rl_cdata.cur_tx);
 
-	ifp->if_timer =
-	    (sc->rl_cdata.last_tx == sc->rl_cdata.cur_tx) ? 0 : 5;
+	ifp->if_timer = (RL_LAST_TXMBUF(sc) == NULL) ? 0 : 5;
 
 	return;
 }
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 187 bytes
Desc: not available
Url : http://lists.freebsd.org/pipermail/freebsd-net/attachments/20040402/ff3a3f7b/attachment.bin


More information about the freebsd-net mailing list