TCP/UDP cksum offload on hme(4)

Scott Long scottl at freebsd.org
Wed Jun 16 04:53:43 GMT 2004


Excellent work, thanks a lot!  Have you sent this to Bill Paul
<wpaul at freebsd.org> for review?

Scott

Pyun YongHyeon wrote:
> Hello All,
> 
> I made a patch that enables cksum offloads on hme(4). Originally
> the patch was made for OpenBSD due to FreeBSD's lack of FAS366
> support. Now, we had esp(4) ported by Scott, I could port the patch
> from my OpenBSD patch. During simple test phase, I didn't notice
> problems. 
> 
>  1. UDP TX cksum offload has an issue. The hardware doesn't flip the
>     cksum bits when the computed cksum is 0x0000. I have no idea this
>     is the reason why STP2002QFP says it supports only TCP RX/TX cksum.
>     (pp. 29, pp. 40, pp. 42)
> 
>  2. The patch was tested on Ultra2(2x300MHz, FAS366). I'd like to
>     hear ok/nok results from PCI based sparc64 users.
>     The dmesg of the Ultra2 is available at:
>     http:///www.kr.freebsd.org/~yognari/dmesg.u2.txt
> 
>  3. I couldn't feel performance boost from the cksum offloads but
>     enabling it reduced system loads considerably.
> 
> The attached patch is for -CURRENT(2004.06.07), and is also available at:
> http://www.kr.freebsd.org/~yognari/hme.freebsd.diff
> 
> Corrections, suggestions welcome.
> 
> Thanks.
> 
> Regards,
> Pyun YongHyeon
> 
> 
> ------------------------------------------------------------------------
> 
> --- if_hme.c.orig	Wed Jun 16 12:16:56 2004
> +++ if_hme.c	Wed Jun 16 12:16:56 2004
> @@ -54,9 +54,15 @@
>   * maximum packet size (this is not verified). Buffers starting on odd
>   * boundaries must be mapped so that the burst can start on a natural boundary.
>   *
> - * Checksumming is not yet supported.
> + * STP2002QFP-UG says that Ethernet hardware supports TCP checksum offload.
> + * In reality, we can do the same technique for UDP datagram too. However,
> + * the hardware doesn't compensate the cksum for UDP datagram which can yield
> + * to 0x0.
> + * When the UDP datagram with cksum 0 sent to a system, it would think it
> + * received a datagram with 'no cksum'. I don't know this is compulsory reason
> + * to disable UDP cksum offload capability.
>   */
> -
> +#define HME_CSUM_FEATURES	(CSUM_TCP | CSUM_UDP)
>  #define HMEDEBUG
>  #define	KTR_HME		KTR_CT2		/* XXX */
>  
> @@ -80,6 +86,12 @@
>  #include <net/if_media.h>
>  #include <net/if_vlan_var.h>
>  
> +#include <netinet/in.h>
> +#include <netinet/in_systm.h>
> +#include <netinet/ip.h>
> +#include <netinet/tcp.h>
> +#include <netinet/udp.h>
> +
>  #include <dev/mii/mii.h>
>  #include <dev/mii/miivar.h>
>  
> @@ -106,10 +118,12 @@
>  static void	hme_mediastatus(struct ifnet *, struct ifmediareq *);
>  
>  static int	hme_load_txmbuf(struct hme_softc *, struct mbuf *);
> -static void	hme_read(struct hme_softc *, int, int);
> +static void	hme_read(struct hme_softc *, int, int, u_int32_t);
>  static void	hme_eint(struct hme_softc *, u_int);
>  static void	hme_rint(struct hme_softc *);
>  static void	hme_tint(struct hme_softc *);
> +static void	hme_txcksum(struct mbuf *, u_int32_t *);
> +static void	hme_rxcksum(struct mbuf *, u_int32_t);
>  
>  static void	hme_cdma_callback(void *, bus_dma_segment_t *, int, int);
>  static void	hme_rxdma_callback(void *, bus_dma_segment_t *, int,
> @@ -316,11 +330,12 @@
>  	ether_ifattach(ifp, sc->sc_arpcom.ac_enaddr);
>  
>  	/*
> -	 * Tell the upper layer(s) we support long frames.
> +	 * Tell the upper layer(s) we support long frames and cksum offloads.
>  	 */
>  	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
> -	ifp->if_capabilities |= IFCAP_VLAN_MTU;
> -	ifp->if_capenable |= IFCAP_VLAN_MTU;
> +	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_HWCSUM;
> +	ifp->if_hwassist |= HME_CSUM_FEATURES;
> +	ifp->if_capenable |= IFCAP_VLAN_MTU | IFCAP_HWCSUM;
>  
>  	callout_init(&sc->sc_tick_ch, 0);
>  	return (0);
> @@ -656,7 +671,7 @@
>  	struct hme_softc *sc = (struct hme_softc *)xsc;
>  	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
>  	u_int8_t *ea;
> -	u_int32_t v;
> +	u_int32_t n, v;
>  
>  	/*
>  	 * Initialization sequence. The numbered steps below correspond
> @@ -740,6 +755,15 @@
>  		v = HME_SEB_CFG_BURST64;
>  		break;
>  	}
> +	/*
> +	 * Blindly setting 64bit transfers may hang PCI cards(Cheerio?).
> +	 * Allowing 64bit transfers breaks TX checksum offload as well.
> +	 * Don't know this comes from hardware bug or driver's DMAing
> +	 * scheme.
> +	 *
> +	 * if (sc->sc_pci == 0)
> + 	 *	v |= HME_SEB_CFG_64BIT;
> +	 */
>  	HME_SEB_WRITE_4(sc, HME_SEBI_CFG, v);
>  
>  	/* step 9. ETX Configuration: use mostly default values */
> @@ -775,6 +799,12 @@
>  	/* Enable DMA, fix RX first byte offset. */
>  	v &= ~HME_ERX_CFG_FBO_MASK;
>  	v |= HME_ERX_CFG_DMAENABLE | (HME_RXOFFS << HME_ERX_CFG_FBO_SHIFT);
> +	/* RX TCP/UDP cksum offset */
> +	if (ifp->if_capenable & IFCAP_TXCSUM) {
> +		n = (ETHER_HDR_LEN + sizeof(struct ip)) / 2;
> +		n = (n << HME_ERX_CFG_CSUM_SHIFT) & HME_ERX_CFG_CSUMSTART;
> +		v |= n;
> +	}
>  	CTR1(KTR_HME, "hme_init: programming ERX_CFG to %x", (u_int)v);
>  	HME_ERX_WRITE_4(sc, HME_ERXI_CFG, v);
>  
> @@ -893,6 +923,55 @@
>  	    ("hme_txdma_callback: missed end of packet!"));
>  }
>  
> +/* TX TCP/UDP cksum */
> +static void
> +hme_txcksum(struct mbuf *m, u_int32_t *cflags)
> +{
> +	struct ip *ip;
> +	u_int32_t offset, offset2, csumflag;
> +	caddr_t p;
> +
> +	if ((m->m_pkthdr.csum_flags & CSUM_TCP)) {
> +		offset2 = offsetof(struct tcphdr, th_sum); 
> +		csumflag = HME_XD_TCPCKSUM;
> +	} else if((m->m_pkthdr.csum_flags & CSUM_UDP)) {
> +		offset2 = offsetof(struct udphdr, uh_sum);
> +		csumflag = HME_XD_UDPCKSUM;
> +	} else
> +		return;
> +
> +	for(; m && m->m_len == 0; m = m->m_next)
> +		;
> +	if (m == NULL || m->m_len < ETHER_HDR_LEN) {
> +		printf("hme_txcksum: m_len < ETHER_HDR_LEN\n");
> +		return; /* cksum will be corrupted */
> +	}
> +	if (m->m_len < ETHER_HDR_LEN + sizeof(u_int32_t)) {
> +		if (m->m_len != ETHER_HDR_LEN) {
> +			printf("hme_txcksum: m_len != ETHER_HDR_LEN\n");
> +			return;	/* cksum will be corrupted */
> +		}
> +		/* XXX */
> +		for(m = m->m_next; m && m->m_len == 0; m = m->m_next)
> +			;
> +		if (m == NULL)
> +			return; /* cksum will be corrupted */
> +		ip = mtod(m, struct ip *);
> +	} else {
> +		p = mtod(m, caddr_t);
> +		p += ETHER_HDR_LEN;
> +		ip = (struct ip *)p;
> +	}
> +	if ((ip->ip_hl << 2) == sizeof(*ip))
> +		*cflags = csumflag;
> +	else {
> +		offset = (ip->ip_hl << 2) + ETHER_HDR_LEN;
> +		*cflags = offset << HME_XD_TXCKSUM_SSHIFT;
> +		*cflags |= ((offset + offset2) << HME_XD_TXCKSUM_OSHIFT); 
> +		*cflags |= HME_XD_TXCKSUM;
> +	}
> +}
> +
>  /*
>   * Routine to dma map an mbuf chain, set up the descriptor rings accordingly and
>   * start the transmission.
> @@ -905,11 +984,12 @@
>  	struct hme_txdma_arg cba;
>  	struct hme_txdesc *td;
>  	int error, si, ri;
> -	u_int32_t flags;
> +	u_int32_t flags, cflags = 0;
>  
>  	si = sc->sc_rb.rb_tdhead;
>  	if ((td = STAILQ_FIRST(&sc->sc_rb.rb_txfreeq)) == NULL)
>  		return (-1);
> +	hme_txcksum(m0, &cflags);
>  	td->htx_m = m0;
>  	cba.hta_sc = sc;
>  	cba.hta_htx = td;
> @@ -933,7 +1013,7 @@
>  	do {
>  		ri = (ri + HME_NTXDESC - 1) % HME_NTXDESC;
>  		flags = HME_XD_GETFLAGS(sc->sc_pci, sc->sc_rb.rb_txd, ri) |
> -		    HME_XD_OWN;
> +		    HME_XD_OWN | cflags;
>  		CTR3(KTR_HME, "hme_load_mbuf: activating ri %d, si %d (%#x)",
>  		    ri, si, flags);
>  		HME_XD_SETFLAGS(sc->sc_pci, sc->sc_rb.rb_txd, ri, flags);
> @@ -951,7 +1031,7 @@
>   * Pass a packet to the higher levels.
>   */
>  static void
> -hme_read(struct hme_softc *sc, int ix, int len)
> +hme_read(struct hme_softc *sc, int ix, int len, u_int32_t flags)
>  {
>  	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
>  	struct mbuf *m;
> @@ -986,6 +1066,9 @@
>  	m->m_pkthdr.rcvif = ifp;
>  	m->m_pkthdr.len = m->m_len = len + HME_RXOFFS;
>  	m_adj(m, HME_RXOFFS);
> +	/* RX TCP/UDP cksum */
> +	if (ifp->if_capenable & IFCAP_RXCSUM)
> +		hme_rxcksum(m, flags);
>  	/* Pass the packet up. */
>  	(*ifp->if_input)(ifp, m);
>  }
> @@ -1108,6 +1191,71 @@
>  }
>  
>  /*
> + * RX TCP/UDP cksumming 
> + */
> +static void
> +hme_rxcksum(struct mbuf *m, u_int32_t flags)
> +{
> +	struct ether_header *eh;
> +	struct ip *ip;
> +	struct udphdr *uh;
> +	int32_t hlen, len, pktlen;
> +	u_int16_t cksum, *opts;
> +	u_int32_t temp32;
> +
> +	pktlen = m->m_pkthdr.len;
> +	if (pktlen < sizeof(struct ether_header))
> +		return;
> +	eh = mtod(m, struct ether_header *);
> +	if (eh->ether_type != htons(ETHERTYPE_IP))
> +		return;
> +	ip = (struct ip *)(eh + 1);
> +	if (ip->ip_v != IPVERSION)
> +		return;
> +
> +	hlen = ip->ip_hl << 2;
> +	pktlen -= sizeof(struct ether_header);
> +	if (hlen < sizeof(struct ip))
> +		return;
> +	if (ntohs(ip->ip_len) < hlen)
> +		return;
> +	if (ntohs(ip->ip_len) != pktlen)
> +		return;
> +	if (ip->ip_off & htons(IP_MF | IP_OFFMASK))
> +		return;	/* can't handle fragmented packet */
> +
> +	switch (ip->ip_p) {
> +	case IPPROTO_TCP:
> +		if (pktlen < (hlen + sizeof(struct tcphdr)))
> +			return;
> +		break;
> +	case IPPROTO_UDP:
> +		if (pktlen < (hlen + sizeof(struct udphdr)))
> +			return;
> +		uh = (struct udphdr *)((caddr_t)ip + hlen);
> +		if (uh->uh_sum == 0)
> +			return; /* no checksum */
> +		break;
> +	default:
> +		return;
> +	}
> +
> +	cksum = ~(flags & HME_XD_RXCKSUM);
> +	/* cksum fixup for IP options */
> +	len = hlen - sizeof(struct ip);
> +	if (len > 0) {
> +		opts = (u_int16_t *)(ip + 1);
> +		for (; len > 0; len -= sizeof(u_int16_t), opts++) {
> +			temp32 = cksum - *opts;
> +			temp32 = (temp32 >> 16) + (temp32 & 65535);
> +			cksum = temp32 & 65535;
> +		}
> +	}
> +	m->m_pkthdr.csum_flags |= CSUM_DATA_VALID;
> +	m->m_pkthdr.csum_data = cksum;
> +}
> +
> +/*
>   * Receive interrupt.
>   */
>  static void
> @@ -1137,7 +1285,7 @@
>  			hme_discard_rxbuf(sc, ri);
>  		} else {
>  			len = HME_XD_DECODE_RSIZE(flags);
> -			hme_read(sc, ri, len);
> +			hme_read(sc, ri, len, flags);
>  		}
>  	}
>  	if (progress) {
> @@ -1386,6 +1534,15 @@
>  	case SIOCGIFMEDIA:
>  	case SIOCSIFMEDIA:
>  		error = ifmedia_ioctl(ifp, ifr, &sc->sc_mii->mii_media, cmd);
> +		break;
> +	case SIOCSIFCAP:
> +		ifp->if_capenable = ifr->ifr_reqcap;
> +		if (ifp->if_capenable & IFCAP_HWCSUM)
> +			ifp->if_hwassist = HME_CSUM_FEATURES;
> +		else
> +			ifp->if_hwassist = 0;
> +		if (ifp->if_flags & IFF_RUNNING)
> +			hme_init(sc);
>  		break;
>  	default:
>  		error = ether_ioctl(ifp, cmd, data);
> --- if_hme_sbus.c.orig	Wed Jun 16 12:16:56 2004
> +++ if_hme_sbus.c	Wed Jun 16 12:16:56 2004
> @@ -244,8 +244,14 @@
>  
>  	burst = sbus_get_burstsz(dev);
>  	/* Translate into plain numerical format */
> -	sc->sc_burst =  (burst & SBUS_BURST_32) ? 32 :
> -	    (burst & SBUS_BURST_16) ? 16 : 0;
> +	if ((burst & SBUS_BURST_64))
> +		sc->sc_burst = 64;
> +	else if ((burst & SBUS_BURST_32))
> +		sc->sc_burst = 32;
> +	else if ((burst & SBUS_BURST_16))
> +		sc->sc_burst = 16;
> +	else
> +		 sc->sc_burst = 0;
>  
>  	sc->sc_pci = 0;	/* XXX: should all be done in bus_dma. */
>  	sc->sc_dev = dev;
> --- if_hmereg.h.orig	Wed Jun 16 12:16:56 2004
> +++ if_hmereg.h	Wed Jun 16 12:16:56 2004
> @@ -54,8 +54,8 @@
>  #define HME_SEB_CFG_BURST16	0x00000000	/* 16 byte bursts */
>  #define HME_SEB_CFG_BURST32	0x00000001	/* 32 byte bursts */
>  #define HME_SEB_CFG_BURST64	0x00000002	/* 64 byte bursts */
> -#define HME_SEB_CFG_64BIT	0x00000004	/* ? */
> -#define HME_SEB_CFG_PARITY	0x00000008	/* ? */
> +#define HME_SEB_CFG_64BIT	0x00000004	/* extended transfer mode */
> +#define HME_SEB_CFG_PARITY	0x00000008	/* parity check for DVMA/PIO */
>  
>  #define HME_SEB_STAT_GOTFRAME	0x00000001	/* frame received */
>  #define HME_SEB_STAT_RCNTEXP	0x00000002	/* rx frame count expired */
> @@ -154,7 +154,7 @@
>  #define HME_ERXI_FIFO_WPTR	(3*4)		/* FIFO write pointer */
>  #define HME_ERXI_FIFO_SWPTR	(4*4)		/* FIFO shadow write pointer */
>  #define HME_ERXI_FIFO_RPTR	(5*4)		/* FIFO read pointer */
> -#define HME_ERXI_FIFO_SRPTR	(6*4)		/* FIFO shadow read pointer */
> +#define HME_ERXI_FIFO_PKTCNT	(6*4)		/* FIFO packet counter */
>  #define HME_ERXI_STATEMACHINE	(7*4)		/* State machine */
>  
>  /* RXI_CFG bits */
> @@ -167,6 +167,7 @@
>  #define HME_ERX_CFG_RINGSIZE256	0x00000600	/* Descriptor ring size: 256 */
>  #define HME_ERX_CFG_RINGSIZEMSK	0x00000600	/* Descriptor ring size: 256 */
>  #define HME_ERX_CFG_CSUMSTART	0x007f0000	/* cksum offset */
> +#define HME_ERX_CFG_CSUM_SHIFT	16
>  
>  /*
>   * HME MAC-core register offsets
> @@ -289,7 +290,11 @@
>  #define HME_XD_RXLENMSK	0x3fff0000	/* packet length mask (rx) */
>  #define HME_XD_RXLENSHIFT	16
>  #define HME_XD_TXLENMSK	0x00003fff	/* packet length mask (tx) */
> +#define HME_XD_TXCKSUM_SSHIFT	14
> +#define HME_XD_TXCKSUM_OSHIFT	20
>  #define HME_XD_RXCKSUM	0x0000ffff	/* packet checksum (rx) */
> +#define HME_XD_TCPCKSUM 0x13288000	/* precomputed tcp cksum */
> +#define HME_XD_UDPCKSUM 0x12888000	/* precomputed udp cksum */
>  
>  /* Macros to encode/decode the receive buffer size from the flags field */
>  #define HME_XD_ENCODE_RSIZE(sz)		\
> 
> 
> ------------------------------------------------------------------------
> 
> _______________________________________________
> freebsd-sparc64 at freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-sparc64
> To unsubscribe, send any mail to "freebsd-sparc64-unsubscribe at freebsd.org"



More information about the freebsd-sparc64 mailing list