TCP/UDP cksum offload on hme(4)
Scott Long
scottl at freebsd.org
Wed Jun 16 04:53:43 GMT 2004
Excellent work, thanks a lot! Have you sent this to Bill Paul
<wpaul at freebsd.org> for review?
Scott
Pyun YongHyeon wrote:
> Hello All,
>
> I made a patch that enables cksum offloads on hme(4). Originally
> the patch was made for OpenBSD due to FreeBSD's lack of FAS366
> support. Now, we had esp(4) ported by Scott, I could port the patch
> from my OpenBSD patch. During simple test phase, I didn't notice
> problems.
>
> 1. UDP TX cksum offload has an issue. The hardware doesn't flip the
> cksum bits when the computed cksum is 0x0000. I have no idea this
> is the reason why STP2002QFP says it supports only TCP RX/TX cksum.
> (pp. 29, pp. 40, pp. 42)
>
> 2. The patch was tested on Ultra2(2x300MHz, FAS366). I'd like to
> hear ok/nok results from PCI based sparc64 users.
> The dmesg of the Ultra2 is available at:
> http:///www.kr.freebsd.org/~yognari/dmesg.u2.txt
>
> 3. I couldn't feel performance boost from the cksum offloads but
> enabling it reduced system loads considerably.
>
> The attached patch is for -CURRENT(2004.06.07), and is also available at:
> http://www.kr.freebsd.org/~yognari/hme.freebsd.diff
>
> Corrections, suggestions welcome.
>
> Thanks.
>
> Regards,
> Pyun YongHyeon
>
>
> ------------------------------------------------------------------------
>
> --- if_hme.c.orig Wed Jun 16 12:16:56 2004
> +++ if_hme.c Wed Jun 16 12:16:56 2004
> @@ -54,9 +54,15 @@
> * maximum packet size (this is not verified). Buffers starting on odd
> * boundaries must be mapped so that the burst can start on a natural boundary.
> *
> - * Checksumming is not yet supported.
> + * STP2002QFP-UG says that Ethernet hardware supports TCP checksum offload.
> + * In reality, we can do the same technique for UDP datagram too. However,
> + * the hardware doesn't compensate the cksum for UDP datagram which can yield
> + * to 0x0.
> + * When the UDP datagram with cksum 0 sent to a system, it would think it
> + * received a datagram with 'no cksum'. I don't know this is compulsory reason
> + * to disable UDP cksum offload capability.
> */
> -
> +#define HME_CSUM_FEATURES (CSUM_TCP | CSUM_UDP)
> #define HMEDEBUG
> #define KTR_HME KTR_CT2 /* XXX */
>
> @@ -80,6 +86,12 @@
> #include <net/if_media.h>
> #include <net/if_vlan_var.h>
>
> +#include <netinet/in.h>
> +#include <netinet/in_systm.h>
> +#include <netinet/ip.h>
> +#include <netinet/tcp.h>
> +#include <netinet/udp.h>
> +
> #include <dev/mii/mii.h>
> #include <dev/mii/miivar.h>
>
> @@ -106,10 +118,12 @@
> static void hme_mediastatus(struct ifnet *, struct ifmediareq *);
>
> static int hme_load_txmbuf(struct hme_softc *, struct mbuf *);
> -static void hme_read(struct hme_softc *, int, int);
> +static void hme_read(struct hme_softc *, int, int, u_int32_t);
> static void hme_eint(struct hme_softc *, u_int);
> static void hme_rint(struct hme_softc *);
> static void hme_tint(struct hme_softc *);
> +static void hme_txcksum(struct mbuf *, u_int32_t *);
> +static void hme_rxcksum(struct mbuf *, u_int32_t);
>
> static void hme_cdma_callback(void *, bus_dma_segment_t *, int, int);
> static void hme_rxdma_callback(void *, bus_dma_segment_t *, int,
> @@ -316,11 +330,12 @@
> ether_ifattach(ifp, sc->sc_arpcom.ac_enaddr);
>
> /*
> - * Tell the upper layer(s) we support long frames.
> + * Tell the upper layer(s) we support long frames and cksum offloads.
> */
> ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
> - ifp->if_capabilities |= IFCAP_VLAN_MTU;
> - ifp->if_capenable |= IFCAP_VLAN_MTU;
> + ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_HWCSUM;
> + ifp->if_hwassist |= HME_CSUM_FEATURES;
> + ifp->if_capenable |= IFCAP_VLAN_MTU | IFCAP_HWCSUM;
>
> callout_init(&sc->sc_tick_ch, 0);
> return (0);
> @@ -656,7 +671,7 @@
> struct hme_softc *sc = (struct hme_softc *)xsc;
> struct ifnet *ifp = &sc->sc_arpcom.ac_if;
> u_int8_t *ea;
> - u_int32_t v;
> + u_int32_t n, v;
>
> /*
> * Initialization sequence. The numbered steps below correspond
> @@ -740,6 +755,15 @@
> v = HME_SEB_CFG_BURST64;
> break;
> }
> + /*
> + * Blindly setting 64bit transfers may hang PCI cards(Cheerio?).
> + * Allowing 64bit transfers breaks TX checksum offload as well.
> + * Don't know this comes from hardware bug or driver's DMAing
> + * scheme.
> + *
> + * if (sc->sc_pci == 0)
> + * v |= HME_SEB_CFG_64BIT;
> + */
> HME_SEB_WRITE_4(sc, HME_SEBI_CFG, v);
>
> /* step 9. ETX Configuration: use mostly default values */
> @@ -775,6 +799,12 @@
> /* Enable DMA, fix RX first byte offset. */
> v &= ~HME_ERX_CFG_FBO_MASK;
> v |= HME_ERX_CFG_DMAENABLE | (HME_RXOFFS << HME_ERX_CFG_FBO_SHIFT);
> + /* RX TCP/UDP cksum offset */
> + if (ifp->if_capenable & IFCAP_TXCSUM) {
> + n = (ETHER_HDR_LEN + sizeof(struct ip)) / 2;
> + n = (n << HME_ERX_CFG_CSUM_SHIFT) & HME_ERX_CFG_CSUMSTART;
> + v |= n;
> + }
> CTR1(KTR_HME, "hme_init: programming ERX_CFG to %x", (u_int)v);
> HME_ERX_WRITE_4(sc, HME_ERXI_CFG, v);
>
> @@ -893,6 +923,55 @@
> ("hme_txdma_callback: missed end of packet!"));
> }
>
> +/* TX TCP/UDP cksum */
> +static void
> +hme_txcksum(struct mbuf *m, u_int32_t *cflags)
> +{
> + struct ip *ip;
> + u_int32_t offset, offset2, csumflag;
> + caddr_t p;
> +
> + if ((m->m_pkthdr.csum_flags & CSUM_TCP)) {
> + offset2 = offsetof(struct tcphdr, th_sum);
> + csumflag = HME_XD_TCPCKSUM;
> + } else if((m->m_pkthdr.csum_flags & CSUM_UDP)) {
> + offset2 = offsetof(struct udphdr, uh_sum);
> + csumflag = HME_XD_UDPCKSUM;
> + } else
> + return;
> +
> + for(; m && m->m_len == 0; m = m->m_next)
> + ;
> + if (m == NULL || m->m_len < ETHER_HDR_LEN) {
> + printf("hme_txcksum: m_len < ETHER_HDR_LEN\n");
> + return; /* cksum will be corrupted */
> + }
> + if (m->m_len < ETHER_HDR_LEN + sizeof(u_int32_t)) {
> + if (m->m_len != ETHER_HDR_LEN) {
> + printf("hme_txcksum: m_len != ETHER_HDR_LEN\n");
> + return; /* cksum will be corrupted */
> + }
> + /* XXX */
> + for(m = m->m_next; m && m->m_len == 0; m = m->m_next)
> + ;
> + if (m == NULL)
> + return; /* cksum will be corrupted */
> + ip = mtod(m, struct ip *);
> + } else {
> + p = mtod(m, caddr_t);
> + p += ETHER_HDR_LEN;
> + ip = (struct ip *)p;
> + }
> + if ((ip->ip_hl << 2) == sizeof(*ip))
> + *cflags = csumflag;
> + else {
> + offset = (ip->ip_hl << 2) + ETHER_HDR_LEN;
> + *cflags = offset << HME_XD_TXCKSUM_SSHIFT;
> + *cflags |= ((offset + offset2) << HME_XD_TXCKSUM_OSHIFT);
> + *cflags |= HME_XD_TXCKSUM;
> + }
> +}
> +
> /*
> * Routine to dma map an mbuf chain, set up the descriptor rings accordingly and
> * start the transmission.
> @@ -905,11 +984,12 @@
> struct hme_txdma_arg cba;
> struct hme_txdesc *td;
> int error, si, ri;
> - u_int32_t flags;
> + u_int32_t flags, cflags = 0;
>
> si = sc->sc_rb.rb_tdhead;
> if ((td = STAILQ_FIRST(&sc->sc_rb.rb_txfreeq)) == NULL)
> return (-1);
> + hme_txcksum(m0, &cflags);
> td->htx_m = m0;
> cba.hta_sc = sc;
> cba.hta_htx = td;
> @@ -933,7 +1013,7 @@
> do {
> ri = (ri + HME_NTXDESC - 1) % HME_NTXDESC;
> flags = HME_XD_GETFLAGS(sc->sc_pci, sc->sc_rb.rb_txd, ri) |
> - HME_XD_OWN;
> + HME_XD_OWN | cflags;
> CTR3(KTR_HME, "hme_load_mbuf: activating ri %d, si %d (%#x)",
> ri, si, flags);
> HME_XD_SETFLAGS(sc->sc_pci, sc->sc_rb.rb_txd, ri, flags);
> @@ -951,7 +1031,7 @@
> * Pass a packet to the higher levels.
> */
> static void
> -hme_read(struct hme_softc *sc, int ix, int len)
> +hme_read(struct hme_softc *sc, int ix, int len, u_int32_t flags)
> {
> struct ifnet *ifp = &sc->sc_arpcom.ac_if;
> struct mbuf *m;
> @@ -986,6 +1066,9 @@
> m->m_pkthdr.rcvif = ifp;
> m->m_pkthdr.len = m->m_len = len + HME_RXOFFS;
> m_adj(m, HME_RXOFFS);
> + /* RX TCP/UDP cksum */
> + if (ifp->if_capenable & IFCAP_RXCSUM)
> + hme_rxcksum(m, flags);
> /* Pass the packet up. */
> (*ifp->if_input)(ifp, m);
> }
> @@ -1108,6 +1191,71 @@
> }
>
> /*
> + * RX TCP/UDP cksumming
> + */
> +static void
> +hme_rxcksum(struct mbuf *m, u_int32_t flags)
> +{
> + struct ether_header *eh;
> + struct ip *ip;
> + struct udphdr *uh;
> + int32_t hlen, len, pktlen;
> + u_int16_t cksum, *opts;
> + u_int32_t temp32;
> +
> + pktlen = m->m_pkthdr.len;
> + if (pktlen < sizeof(struct ether_header))
> + return;
> + eh = mtod(m, struct ether_header *);
> + if (eh->ether_type != htons(ETHERTYPE_IP))
> + return;
> + ip = (struct ip *)(eh + 1);
> + if (ip->ip_v != IPVERSION)
> + return;
> +
> + hlen = ip->ip_hl << 2;
> + pktlen -= sizeof(struct ether_header);
> + if (hlen < sizeof(struct ip))
> + return;
> + if (ntohs(ip->ip_len) < hlen)
> + return;
> + if (ntohs(ip->ip_len) != pktlen)
> + return;
> + if (ip->ip_off & htons(IP_MF | IP_OFFMASK))
> + return; /* can't handle fragmented packet */
> +
> + switch (ip->ip_p) {
> + case IPPROTO_TCP:
> + if (pktlen < (hlen + sizeof(struct tcphdr)))
> + return;
> + break;
> + case IPPROTO_UDP:
> + if (pktlen < (hlen + sizeof(struct udphdr)))
> + return;
> + uh = (struct udphdr *)((caddr_t)ip + hlen);
> + if (uh->uh_sum == 0)
> + return; /* no checksum */
> + break;
> + default:
> + return;
> + }
> +
> + cksum = ~(flags & HME_XD_RXCKSUM);
> + /* cksum fixup for IP options */
> + len = hlen - sizeof(struct ip);
> + if (len > 0) {
> + opts = (u_int16_t *)(ip + 1);
> + for (; len > 0; len -= sizeof(u_int16_t), opts++) {
> + temp32 = cksum - *opts;
> + temp32 = (temp32 >> 16) + (temp32 & 65535);
> + cksum = temp32 & 65535;
> + }
> + }
> + m->m_pkthdr.csum_flags |= CSUM_DATA_VALID;
> + m->m_pkthdr.csum_data = cksum;
> +}
> +
> +/*
> * Receive interrupt.
> */
> static void
> @@ -1137,7 +1285,7 @@
> hme_discard_rxbuf(sc, ri);
> } else {
> len = HME_XD_DECODE_RSIZE(flags);
> - hme_read(sc, ri, len);
> + hme_read(sc, ri, len, flags);
> }
> }
> if (progress) {
> @@ -1386,6 +1534,15 @@
> case SIOCGIFMEDIA:
> case SIOCSIFMEDIA:
> error = ifmedia_ioctl(ifp, ifr, &sc->sc_mii->mii_media, cmd);
> + break;
> + case SIOCSIFCAP:
> + ifp->if_capenable = ifr->ifr_reqcap;
> + if (ifp->if_capenable & IFCAP_HWCSUM)
> + ifp->if_hwassist = HME_CSUM_FEATURES;
> + else
> + ifp->if_hwassist = 0;
> + if (ifp->if_flags & IFF_RUNNING)
> + hme_init(sc);
> break;
> default:
> error = ether_ioctl(ifp, cmd, data);
> --- if_hme_sbus.c.orig Wed Jun 16 12:16:56 2004
> +++ if_hme_sbus.c Wed Jun 16 12:16:56 2004
> @@ -244,8 +244,14 @@
>
> burst = sbus_get_burstsz(dev);
> /* Translate into plain numerical format */
> - sc->sc_burst = (burst & SBUS_BURST_32) ? 32 :
> - (burst & SBUS_BURST_16) ? 16 : 0;
> + if ((burst & SBUS_BURST_64))
> + sc->sc_burst = 64;
> + else if ((burst & SBUS_BURST_32))
> + sc->sc_burst = 32;
> + else if ((burst & SBUS_BURST_16))
> + sc->sc_burst = 16;
> + else
> + sc->sc_burst = 0;
>
> sc->sc_pci = 0; /* XXX: should all be done in bus_dma. */
> sc->sc_dev = dev;
> --- if_hmereg.h.orig Wed Jun 16 12:16:56 2004
> +++ if_hmereg.h Wed Jun 16 12:16:56 2004
> @@ -54,8 +54,8 @@
> #define HME_SEB_CFG_BURST16 0x00000000 /* 16 byte bursts */
> #define HME_SEB_CFG_BURST32 0x00000001 /* 32 byte bursts */
> #define HME_SEB_CFG_BURST64 0x00000002 /* 64 byte bursts */
> -#define HME_SEB_CFG_64BIT 0x00000004 /* ? */
> -#define HME_SEB_CFG_PARITY 0x00000008 /* ? */
> +#define HME_SEB_CFG_64BIT 0x00000004 /* extended transfer mode */
> +#define HME_SEB_CFG_PARITY 0x00000008 /* parity check for DVMA/PIO */
>
> #define HME_SEB_STAT_GOTFRAME 0x00000001 /* frame received */
> #define HME_SEB_STAT_RCNTEXP 0x00000002 /* rx frame count expired */
> @@ -154,7 +154,7 @@
> #define HME_ERXI_FIFO_WPTR (3*4) /* FIFO write pointer */
> #define HME_ERXI_FIFO_SWPTR (4*4) /* FIFO shadow write pointer */
> #define HME_ERXI_FIFO_RPTR (5*4) /* FIFO read pointer */
> -#define HME_ERXI_FIFO_SRPTR (6*4) /* FIFO shadow read pointer */
> +#define HME_ERXI_FIFO_PKTCNT (6*4) /* FIFO packet counter */
> #define HME_ERXI_STATEMACHINE (7*4) /* State machine */
>
> /* RXI_CFG bits */
> @@ -167,6 +167,7 @@
> #define HME_ERX_CFG_RINGSIZE256 0x00000600 /* Descriptor ring size: 256 */
> #define HME_ERX_CFG_RINGSIZEMSK 0x00000600 /* Descriptor ring size: 256 */
> #define HME_ERX_CFG_CSUMSTART 0x007f0000 /* cksum offset */
> +#define HME_ERX_CFG_CSUM_SHIFT 16
>
> /*
> * HME MAC-core register offsets
> @@ -289,7 +290,11 @@
> #define HME_XD_RXLENMSK 0x3fff0000 /* packet length mask (rx) */
> #define HME_XD_RXLENSHIFT 16
> #define HME_XD_TXLENMSK 0x00003fff /* packet length mask (tx) */
> +#define HME_XD_TXCKSUM_SSHIFT 14
> +#define HME_XD_TXCKSUM_OSHIFT 20
> #define HME_XD_RXCKSUM 0x0000ffff /* packet checksum (rx) */
> +#define HME_XD_TCPCKSUM 0x13288000 /* precomputed tcp cksum */
> +#define HME_XD_UDPCKSUM 0x12888000 /* precomputed udp cksum */
>
> /* Macros to encode/decode the receive buffer size from the flags field */
> #define HME_XD_ENCODE_RSIZE(sz) \
>
>
> ------------------------------------------------------------------------
>
> _______________________________________________
> freebsd-sparc64 at freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-sparc64
> To unsubscribe, send any mail to "freebsd-sparc64-unsubscribe at freebsd.org"
More information about the freebsd-sparc64
mailing list