TCP/UDP cksum offload on hme(4)
Pyun YongHyeon
yongari at kt-is.co.kr
Thu Jun 17 06:30:59 GMT 2004
On Thu, Jun 17, 2004 at 04:48:00AM +0200, Thomas Moestl wrote:
> Hello,
>
> On Wed, 2004/06/16 at 12:45:20 +0900, Pyun YongHyeon wrote:
> > 1. UDP TX cksum offload has an issue. The hardware doesn't flip the
> > cksum bits when the computed cksum is 0x0000. I have no idea this
> > is the reason why STP2002QFP says it supports only TCP RX/TX cksum.
> > (pp. 29, pp. 40, pp. 42)
>
> It would probably be better not to advertise CSUM_UDP by default then,
> so that the checksum will never be unset unless that is desired. One
> of the IFF_LINK* flags or a sysctl could be used to allow the user to
> enable it anyway.
>
Ok. Patch was modified to use IFF_LINK0. Now UDP TX cksumming is
disabled by default.
> > +
> > + for(; m && m->m_len == 0; m = m->m_next)
> > + ;
> > + if (m == NULL || m->m_len < ETHER_HDR_LEN) {
> > + printf("hme_txcksum: m_len < ETHER_HDR_LEN\n");
> > + return; /* cksum will be corrupted */
> > + }
>
> It would probably be best to try as hard as possible to not fail
> here. Since the conditions in which we would fail because of mbuf
> sizes should rarely be true, maybe a good option would be to just
> sacrifice some CPU time and do an m_pullup() then to rectify the mbuf
> chain according to our needs (hme_txcksum would need to return a
> struct mbuf * for that)?
>
Because we will have ALTQ feature in near future, the driver
can't call IF_PREPEND anymore. So if m_pullup() failed it would
be dropped. I thought, we may not encounter such mbuf fragmentation
in real environments.(I didn't see any of these fragment condition
on my setup.)
If we use m_pullup() it will make ALTQ-enabled hme driver useless.
> > +
> > + hlen = ip->ip_hl << 2;
> > + pktlen -= sizeof(struct ether_header);
> > + if (hlen < sizeof(struct ip))
> > + return;
> > + if (ntohs(ip->ip_len) < hlen)
> > + return;
>
> This test is a bit redundant with the TCP/UDP ones below.
>
When we have a packet that contains IP header with options plus some
corrupted TCP/UDP header(i.e. less than the size of the header),
should the check be performed?
> > + if (ntohs(ip->ip_len) != pktlen)
> > + return;
> > + if (ip->ip_off & htons(IP_MF | IP_OFFMASK))
> > + return; /* can't handle fragmented packet */
> > +
> > + switch (ip->ip_p) {
> > + case IPPROTO_TCP:
> > + if (pktlen < (hlen + sizeof(struct tcphdr)))
> > + return;
> > + break;
> > + case IPPROTO_UDP:
> > + if (pktlen < (hlen + sizeof(struct udphdr)))
> > + return;
> > + uh = (struct udphdr *)((caddr_t)ip + hlen);
> > + if (uh->uh_sum == 0)
> > + return; /* no checksum */
> > + break;
> > + default:
> > + return;
> > + }
Thank you very much for your detailed explanation and suggestions.
New patch is available at:
http://www.kr.freebsd.org/~yongari/hme.freebsd.diff2
Thanks again.
Rgeards,
Pyun YongHyeon
--
Pyun YongHyeon <http://www.kr.freebsd.org/~yongari>
-------------- next part --------------
--- if_hme.c.orig Tue Jun 15 17:05:24 2004
+++ if_hme.c Thu Jun 17 14:55:36 2004
@@ -54,9 +54,15 @@
* maximum packet size (this is not verified). Buffers starting on odd
* boundaries must be mapped so that the burst can start on a natural boundary.
*
- * Checksumming is not yet supported.
+ * STP2002QFP-UG says that Ethernet hardware supports TCP checksum offload.
+ * In reality, we can do the same technique for UDP datagram too. However,
+ * the hardware doesn't compensate the cksum for UDP datagram which can yield
+ * to 0x0.
+ * When the UDP datagram with cksum 0 sent to a system, it would think it
+ * received a datagram with 'no cksum'. I don't know this is compulsory reason
+ * to disable UDP cksum offload capability.
*/
-
+#define HME_CSUM_FEATURES (CSUM_TCP)
#define HMEDEBUG
#define KTR_HME KTR_CT2 /* XXX */
@@ -80,6 +86,12 @@
#include <net/if_media.h>
#include <net/if_vlan_var.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+
#include <dev/mii/mii.h>
#include <dev/mii/miivar.h>
@@ -106,10 +118,12 @@
static void hme_mediastatus(struct ifnet *, struct ifmediareq *);
static int hme_load_txmbuf(struct hme_softc *, struct mbuf *);
-static void hme_read(struct hme_softc *, int, int);
+static void hme_read(struct hme_softc *, int, int, u_int32_t);
static void hme_eint(struct hme_softc *, u_int);
static void hme_rint(struct hme_softc *);
static void hme_tint(struct hme_softc *);
+static void hme_txcksum(struct mbuf *, u_int32_t *);
+static void hme_rxcksum(struct mbuf *, u_int32_t);
static void hme_cdma_callback(void *, bus_dma_segment_t *, int, int);
static void hme_rxdma_callback(void *, bus_dma_segment_t *, int,
@@ -120,6 +134,7 @@
devclass_t hme_devclass;
static int hme_nerr;
+static int hme_csum_features = HME_CSUM_FEATURES;
DRIVER_MODULE(miibus, hme, miibus_driver, miibus_devclass, 0, 0);
MODULE_DEPEND(hme, miibus, 1, 1, 1);
@@ -316,11 +331,12 @@
ether_ifattach(ifp, sc->sc_arpcom.ac_enaddr);
/*
- * Tell the upper layer(s) we support long frames.
+ * Tell the upper layer(s) we support long frames and cksum offloads.
*/
ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
- ifp->if_capabilities |= IFCAP_VLAN_MTU;
- ifp->if_capenable |= IFCAP_VLAN_MTU;
+ ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_HWCSUM;
+ ifp->if_hwassist |= hme_csum_features;
+ ifp->if_capenable |= IFCAP_VLAN_MTU | IFCAP_HWCSUM;
callout_init(&sc->sc_tick_ch, 0);
return (0);
@@ -656,7 +672,7 @@
struct hme_softc *sc = (struct hme_softc *)xsc;
struct ifnet *ifp = &sc->sc_arpcom.ac_if;
u_int8_t *ea;
- u_int32_t v;
+ u_int32_t n, v;
/*
* Initialization sequence. The numbered steps below correspond
@@ -740,6 +756,15 @@
v = HME_SEB_CFG_BURST64;
break;
}
+ /*
+ * Blindly setting 64bit transfers may hang PCI cards(Cheerio?).
+ * Allowing 64bit transfers breaks TX checksum offload as well.
+ * Don't know this comes from hardware bug or driver's DMAing
+ * scheme.
+ *
+ * if (sc->sc_pci == 0)
+ * v |= HME_SEB_CFG_64BIT;
+ */
HME_SEB_WRITE_4(sc, HME_SEBI_CFG, v);
/* step 9. ETX Configuration: use mostly default values */
@@ -775,6 +800,10 @@
/* Enable DMA, fix RX first byte offset. */
v &= ~HME_ERX_CFG_FBO_MASK;
v |= HME_ERX_CFG_DMAENABLE | (HME_RXOFFS << HME_ERX_CFG_FBO_SHIFT);
+ /* RX TCP/UDP cksum offset */
+ n = (ETHER_HDR_LEN + sizeof(struct ip)) / 2;
+ n = (n << HME_ERX_CFG_CSUMSTART_SHIFT) & HME_ERX_CFG_CSUMSTART_MASK;
+ v |= n;
CTR1(KTR_HME, "hme_init: programming ERX_CFG to %x", (u_int)v);
HME_ERX_WRITE_4(sc, HME_ERXI_CFG, v);
@@ -893,6 +922,46 @@
("hme_txdma_callback: missed end of packet!"));
}
+/* TX TCP/UDP cksum */
+static void
+hme_txcksum(struct mbuf *m, u_int32_t *cflags)
+{
+ struct ip *ip;
+ u_int32_t offset, offset2;
+ caddr_t p;
+
+ if ((m->m_pkthdr.csum_flags & hme_csum_features) == 0)
+ return;
+
+ for(; m && m->m_len == 0; m = m->m_next)
+ ;
+ if (m == NULL || m->m_len < ETHER_HDR_LEN) {
+ printf("hme_txcksum: m_len < ETHER_HDR_LEN\n");
+ return; /* cksum will be corrupted */
+ }
+ if (m->m_len < ETHER_HDR_LEN + sizeof(u_int32_t)) {
+ if (m->m_len != ETHER_HDR_LEN) {
+ printf("hme_txcksum: m_len != ETHER_HDR_LEN\n");
+ return; /* cksum will be corrupted */
+ }
+ /* XXX */
+ for(m = m->m_next; m && m->m_len == 0; m = m->m_next)
+ ;
+ if (m == NULL)
+ return; /* cksum will be corrupted */
+ ip = mtod(m, struct ip *);
+ } else {
+ p = mtod(m, caddr_t);
+ p += ETHER_HDR_LEN;
+ ip = (struct ip *)p;
+ }
+ offset2 = m->m_pkthdr.csum_data;
+ offset = (ip->ip_hl << 2) + ETHER_HDR_LEN;
+ *cflags = offset << HME_XD_TXCKSUM_SSHIFT;
+ *cflags |= ((offset + offset2) << HME_XD_TXCKSUM_OSHIFT);
+ *cflags |= HME_XD_TXCKSUM;
+}
+
/*
* Routine to dma map an mbuf chain, set up the descriptor rings accordingly and
* start the transmission.
@@ -905,11 +974,12 @@
struct hme_txdma_arg cba;
struct hme_txdesc *td;
int error, si, ri;
- u_int32_t flags;
+ u_int32_t flags, cflags = 0;
si = sc->sc_rb.rb_tdhead;
if ((td = STAILQ_FIRST(&sc->sc_rb.rb_txfreeq)) == NULL)
return (-1);
+ hme_txcksum(m0, &cflags);
td->htx_m = m0;
cba.hta_sc = sc;
cba.hta_htx = td;
@@ -933,7 +1003,7 @@
do {
ri = (ri + HME_NTXDESC - 1) % HME_NTXDESC;
flags = HME_XD_GETFLAGS(sc->sc_pci, sc->sc_rb.rb_txd, ri) |
- HME_XD_OWN;
+ HME_XD_OWN | cflags;
CTR3(KTR_HME, "hme_load_mbuf: activating ri %d, si %d (%#x)",
ri, si, flags);
HME_XD_SETFLAGS(sc->sc_pci, sc->sc_rb.rb_txd, ri, flags);
@@ -951,7 +1021,7 @@
* Pass a packet to the higher levels.
*/
static void
-hme_read(struct hme_softc *sc, int ix, int len)
+hme_read(struct hme_softc *sc, int ix, int len, u_int32_t flags)
{
struct ifnet *ifp = &sc->sc_arpcom.ac_if;
struct mbuf *m;
@@ -986,6 +1056,9 @@
m->m_pkthdr.rcvif = ifp;
m->m_pkthdr.len = m->m_len = len + HME_RXOFFS;
m_adj(m, HME_RXOFFS);
+ /* RX TCP/UDP cksum */
+ if (ifp->if_capenable & IFCAP_RXCSUM)
+ hme_rxcksum(m, flags);
/* Pass the packet up. */
(*ifp->if_input)(ifp, m);
}
@@ -1108,6 +1181,71 @@
}
/*
+ * RX TCP/UDP cksumming
+ */
+static void
+hme_rxcksum(struct mbuf *m, u_int32_t flags)
+{
+ struct ether_header *eh;
+ struct ip *ip;
+ struct udphdr *uh;
+ int32_t hlen, len, pktlen;
+ u_int16_t cksum, *opts;
+ u_int32_t temp32;
+
+ pktlen = m->m_pkthdr.len;
+ if (pktlen < sizeof(struct ether_header) + sizeof(struct ip))
+ return;
+ eh = mtod(m, struct ether_header *);
+ if (eh->ether_type != htons(ETHERTYPE_IP))
+ return;
+ ip = (struct ip *)(eh + 1);
+ if (ip->ip_v != IPVERSION)
+ return;
+
+ hlen = ip->ip_hl << 2;
+ pktlen -= sizeof(struct ether_header);
+ if (hlen < sizeof(struct ip))
+ return;
+ if (ntohs(ip->ip_len) < hlen)
+ return;
+ if (ntohs(ip->ip_len) != pktlen)
+ return;
+ if (ip->ip_off & htons(IP_MF | IP_OFFMASK))
+ return; /* can't handle fragmented packet */
+
+ switch (ip->ip_p) {
+ case IPPROTO_TCP:
+ if (pktlen < (hlen + sizeof(struct tcphdr)))
+ return;
+ break;
+ case IPPROTO_UDP:
+ if (pktlen < (hlen + sizeof(struct udphdr)))
+ return;
+ uh = (struct udphdr *)((caddr_t)ip + hlen);
+ if (uh->uh_sum == 0)
+ return; /* no checksum */
+ break;
+ default:
+ return;
+ }
+
+ cksum = ~(flags & HME_XD_RXCKSUM);
+ /* cksum fixup for IP options */
+ len = hlen - sizeof(struct ip);
+ if (len > 0) {
+ opts = (u_int16_t *)(ip + 1);
+ for (; len > 0; len -= sizeof(u_int16_t), opts++) {
+ temp32 = cksum - *opts;
+ temp32 = (temp32 >> 16) + (temp32 & 65535);
+ cksum = temp32 & 65535;
+ }
+ }
+ m->m_pkthdr.csum_flags |= CSUM_DATA_VALID;
+ m->m_pkthdr.csum_data = cksum;
+}
+
+/*
* Receive interrupt.
*/
static void
@@ -1137,7 +1275,7 @@
hme_discard_rxbuf(sc, ri);
} else {
len = HME_XD_DECODE_RSIZE(flags);
- hme_read(sc, ri, len);
+ hme_read(sc, ri, len, flags);
}
}
if (progress) {
@@ -1373,6 +1511,10 @@
*/
hme_init(sc);
}
+ if ((ifp->if_flags & IFF_LINK0) != 0)
+ hme_csum_features |= CSUM_UDP;
+ else
+ hme_csum_features &= ~CSUM_UDP;
#ifdef HMEDEBUG
sc->sc_debug = (ifp->if_flags & IFF_DEBUG) != 0 ? 1 : 0;
#endif
@@ -1386,6 +1528,13 @@
case SIOCGIFMEDIA:
case SIOCSIFMEDIA:
error = ifmedia_ioctl(ifp, ifr, &sc->sc_mii->mii_media, cmd);
+ break;
+ case SIOCSIFCAP:
+ ifp->if_capenable = ifr->ifr_reqcap;
+ if (ifp->if_capenable & IFCAP_TXCSUM)
+ ifp->if_hwassist = hme_csum_features;
+ else
+ ifp->if_hwassist = 0;
break;
default:
error = ether_ioctl(ifp, cmd, data);
--- if_hme_sbus.c.orig Tue Jun 15 17:05:24 2004
+++ if_hme_sbus.c Tue Jun 15 17:05:24 2004
@@ -244,8 +244,14 @@
burst = sbus_get_burstsz(dev);
/* Translate into plain numerical format */
- sc->sc_burst = (burst & SBUS_BURST_32) ? 32 :
- (burst & SBUS_BURST_16) ? 16 : 0;
+ if ((burst & SBUS_BURST_64))
+ sc->sc_burst = 64;
+ else if ((burst & SBUS_BURST_32))
+ sc->sc_burst = 32;
+ else if ((burst & SBUS_BURST_16))
+ sc->sc_burst = 16;
+ else
+ sc->sc_burst = 0;
sc->sc_pci = 0; /* XXX: should all be done in bus_dma. */
sc->sc_dev = dev;
--- if_hmereg.h.orig Tue Jun 15 17:05:24 2004
+++ if_hmereg.h Thu Jun 17 14:23:02 2004
@@ -54,8 +54,8 @@
#define HME_SEB_CFG_BURST16 0x00000000 /* 16 byte bursts */
#define HME_SEB_CFG_BURST32 0x00000001 /* 32 byte bursts */
#define HME_SEB_CFG_BURST64 0x00000002 /* 64 byte bursts */
-#define HME_SEB_CFG_64BIT 0x00000004 /* ? */
-#define HME_SEB_CFG_PARITY 0x00000008 /* ? */
+#define HME_SEB_CFG_64BIT 0x00000004 /* extended transfer mode */
+#define HME_SEB_CFG_PARITY 0x00000008 /* parity check for DVMA/PIO */
#define HME_SEB_STAT_GOTFRAME 0x00000001 /* frame received */
#define HME_SEB_STAT_RCNTEXP 0x00000002 /* rx frame count expired */
@@ -154,7 +154,7 @@
#define HME_ERXI_FIFO_WPTR (3*4) /* FIFO write pointer */
#define HME_ERXI_FIFO_SWPTR (4*4) /* FIFO shadow write pointer */
#define HME_ERXI_FIFO_RPTR (5*4) /* FIFO read pointer */
-#define HME_ERXI_FIFO_SRPTR (6*4) /* FIFO shadow read pointer */
+#define HME_ERXI_FIFO_PKTCNT (6*4) /* FIFO packet counter */
#define HME_ERXI_STATEMACHINE (7*4) /* State machine */
/* RXI_CFG bits */
@@ -166,7 +166,8 @@
#define HME_ERX_CFG_RINGSIZE128 0x00000400 /* Descriptor ring size: 128 */
#define HME_ERX_CFG_RINGSIZE256 0x00000600 /* Descriptor ring size: 256 */
#define HME_ERX_CFG_RINGSIZEMSK 0x00000600 /* Descriptor ring size: 256 */
-#define HME_ERX_CFG_CSUMSTART 0x007f0000 /* cksum offset */
+#define HME_ERX_CFG_CSUMSTART_MASK 0x007f0000 /* cksum offset mask */
+#define HME_ERX_CFG_CSUMSTART_SHIFT 16
/*
* HME MAC-core register offsets
@@ -289,6 +290,8 @@
#define HME_XD_RXLENMSK 0x3fff0000 /* packet length mask (rx) */
#define HME_XD_RXLENSHIFT 16
#define HME_XD_TXLENMSK 0x00003fff /* packet length mask (tx) */
+#define HME_XD_TXCKSUM_SSHIFT 14
+#define HME_XD_TXCKSUM_OSHIFT 20
#define HME_XD_RXCKSUM 0x0000ffff /* packet checksum (rx) */
/* Macros to encode/decode the receive buffer size from the flags field */
More information about the freebsd-sparc64
mailing list