svn commit: r188998 - user/dfr/xenhvm/6/sys/dev/xen/netfront
Doug Rabson
dfr at FreeBSD.org
Tue Feb 24 08:39:59 PST 2009
Author: dfr
Date: Tue Feb 24 16:39:58 2009
New Revision: 188998
URL: http://svn.freebsd.org/changeset/base/188998
Log:
Merge TSO and LRO, mostly for diff reduction purposes (its not supported in
FreeBSD 6.x).
Modified:
user/dfr/xenhvm/6/sys/dev/xen/netfront/ (props changed)
user/dfr/xenhvm/6/sys/dev/xen/netfront/netfront.c
Modified: user/dfr/xenhvm/6/sys/dev/xen/netfront/netfront.c
==============================================================================
--- user/dfr/xenhvm/6/sys/dev/xen/netfront/netfront.c Tue Feb 24 16:23:34 2009 (r188997)
+++ user/dfr/xenhvm/6/sys/dev/xen/netfront/netfront.c Tue Feb 24 16:39:58 2009 (r188998)
@@ -28,6 +28,7 @@ __FBSDID("$FreeBSD$");
#include <sys/module.h>
#include <sys/kernel.h>
#include <sys/socket.h>
+#include <sys/sysctl.h>
#include <sys/queue.h>
#include <sys/lock.h>
#include <sys/sx.h>
@@ -47,6 +48,10 @@ __FBSDID("$FreeBSD$");
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/if_ether.h>
+#if __FreeBSD_version >= 700000
+#include <netinet/tcp.h>
+#include <netinet/tcp_lro.h>
+#endif
#include <vm/vm.h>
#include <vm/pmap.h>
@@ -76,13 +81,22 @@ __FBSDID("$FreeBSD$");
#include "xenbus_if.h"
-#define XN_CSUM_FEATURES (CSUM_TCP | CSUM_UDP)
+#define XN_CSUM_FEATURES (CSUM_TCP | CSUM_UDP | CSUM_TSO)
#define GRANT_INVALID_REF 0
#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
+/*
+ * Should the driver do LRO on the RX end
+ * this can be toggled on the fly, but the
+ * interface must be reset (down/up) for it
+ * to take effect.
+ */
+static int xn_enable_lro = 1;
+TUNABLE_INT("hw.xn.enable_lro", &xn_enable_lro);
+
#ifdef CONFIG_XEN
static int MODPARM_rx_copy = 0;
module_param_named(rx_copy, MODPARM_rx_copy, bool, 0);
@@ -95,6 +109,7 @@ static const int MODPARM_rx_copy = 1;
static const int MODPARM_rx_flip = 0;
#endif
+#define MAX_SKB_FRAGS (65536/PAGE_SIZE + 2)
#define RX_COPY_THRESHOLD 256
#define net_ratelimit() 0
@@ -195,6 +210,9 @@ struct net_device_stats
struct netfront_info {
struct ifnet *xn_ifp;
+#if __FreeBSD_version >= 700000
+ struct lro_ctrl xn_lro;
+#endif
struct net_device_stats stats;
u_int tx_full;
@@ -339,28 +357,6 @@ xennet_get_rx_ref(struct netfront_info *
#define DPRINTK(fmt, args...)
#endif
-static __inline struct mbuf*
-makembuf (struct mbuf *buf)
-{
- struct mbuf *m = NULL;
-
- MGETHDR (m, M_DONTWAIT, MT_DATA);
-
- if (! m)
- return 0;
-
- M_MOVE_PKTHDR(m, buf);
-
- m_cljget(m, M_DONTWAIT, MJUMPAGESIZE);
- m->m_pkthdr.len = buf->m_pkthdr.len;
- m->m_len = buf->m_len;
- m_copydata(buf, 0, buf->m_pkthdr.len, mtod(m,caddr_t) );
-
- m->m_ext.ext_args = (caddr_t *)(uintptr_t)(vtophys(mtod(m,caddr_t)) >> PAGE_SHIFT);
-
- return m;
-}
-
/**
* Read the 'mac' node at the given device's node in the store, and parse that
* as colon-separated octets, placing result the given mac array. mac must be
@@ -420,6 +416,11 @@ netfront_attach(device_t dev)
return err;
}
+ SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
+ SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+ OID_AUTO, "enable_lro", CTLTYPE_INT|CTLFLAG_RW,
+ &xn_enable_lro, 0, "Large Receive Offload");
+
return 0;
}
@@ -500,7 +501,7 @@ talk_to_backend(device_t dev, struct net
message = "writing feature-sg";
goto abort_transaction;
}
-#ifdef HAVE_TSO
+#if __FreeBSD_version >= 700000
err = xenbus_printf(xbt, node, "feature-gso-tcpv4", "%d", 1);
if (err) {
message = "writing feature-gso-tcpv4";
@@ -868,6 +869,10 @@ static void
xn_rxeof(struct netfront_info *np)
{
struct ifnet *ifp;
+#if __FreeBSD_version >= 700000
+ struct lro_ctrl *lro = &np->xn_lro;
+ struct lro_entry *queued;
+#endif
struct netfront_rx_info rinfo;
struct netif_rx_response *rx = &rinfo.rx;
struct netif_extra_info *extras = rinfo.extras;
@@ -962,13 +967,35 @@ xn_rxeof(struct netfront_info *np)
* Do we really need to drop the rx lock?
*/
XN_RX_UNLOCK(np);
- /* Pass it up. */
+#if __FreeBSD_version >= 700000
+ /* Use LRO if possible */
+ if ((ifp->if_capenable & IFCAP_LRO) == 0 ||
+ lro->lro_cnt == 0 || tcp_lro_rx(lro, m, 0)) {
+ /*
+ * If LRO fails, pass up to the stack
+ * directly.
+ */
+ (*ifp->if_input)(ifp, m);
+ }
+#else
(*ifp->if_input)(ifp, m);
+#endif
XN_RX_LOCK(np);
}
np->rx.rsp_cons = i;
+#if __FreeBSD_version >= 700000
+ /*
+ * Flush any outstanding LRO work
+ */
+ while (!SLIST_EMPTY(&lro->lro_active)) {
+ queued = SLIST_FIRST(&lro->lro_active);
+ SLIST_REMOVE_HEAD(&lro->lro_active, next);
+ tcp_lro_flush(lro, queued);
+ }
+#endif
+
#if 0
/* If we get a callback with very few responses, reduce fill target. */
/* NB. Note exponential increase, linear decrease. */
@@ -989,6 +1016,7 @@ xn_txeof(struct netfront_info *np)
RING_IDX i, prod;
unsigned short id;
struct ifnet *ifp;
+ netif_tx_response_t *txr;
struct mbuf *m;
XN_TX_LOCK_ASSERT(np);
@@ -1004,10 +1032,19 @@ xn_txeof(struct netfront_info *np)
rmb(); /* Ensure we see responses up to 'rp'. */
for (i = np->tx.rsp_cons; i != prod; i++) {
- id = RING_GET_RESPONSE(&np->tx, i)->id;
+ txr = RING_GET_RESPONSE(&np->tx, i);
+ if (txr->status == NETIF_RSP_NULL)
+ continue;
+
+ id = txr->id;
m = np->xn_cdata.xn_tx_chain[id];
- ifp->if_opackets++;
+ /*
+ * Increment packet count if this is the last
+ * mbuf of the chain.
+ */
+ if (!m->m_next)
+ ifp->if_opackets++;
KASSERT(m != NULL, ("mbuf not found in xn_tx_chain"));
M_ASSERTVALID(m);
if (unlikely(gnttab_query_foreign_access(
@@ -1025,7 +1062,7 @@ xn_txeof(struct netfront_info *np)
np->xn_cdata.xn_tx_chain[id] = NULL;
add_id_to_freelist(np->xn_cdata.xn_tx_chain, id);
- m_freem(m);
+ m_free(m);
}
np->tx.rsp_cons = prod;
@@ -1320,13 +1357,14 @@ xn_start_locked(struct ifnet *ifp)
{
int otherend_id;
unsigned short id;
- struct mbuf *m_head, *new_m;
+ struct mbuf *m_head, *m;
struct netfront_info *sc;
netif_tx_request_t *tx;
+ netif_extra_info_t *extra;
RING_IDX i;
grant_ref_t ref;
u_long mfn, tx_bytes;
- int notify;
+ int notify, nfrags;
sc = ifp->if_softc;
otherend_id = xenbus_get_otherend_id(sc->xbdev);
@@ -1346,36 +1384,96 @@ xn_start_locked(struct ifnet *ifp)
break;
}
- id = get_id_from_freelist(sc->xn_cdata.xn_tx_chain);
+
+ /*
+ * Defragment the mbuf if necessary.
+ */
+ for (m = m_head, nfrags = 0; m; m = m->m_next)
+ nfrags++;
+ if (nfrags > MAX_SKB_FRAGS) {
+ m = m_defrag(m_head, M_DONTWAIT);
+ if (!m) {
+ m_freem(m_head);
+ break;
+ }
+ m_head = m;
+ }
/*
* Start packing the mbufs in this chain into
* the fragment pointers. Stop when we run out
* of fragments or hit the end of the mbuf chain.
*/
- new_m = makembuf(m_head);
- tx = RING_GET_REQUEST(&sc->tx, i);
- tx->id = id;
- ref = gnttab_claim_grant_reference(&sc->gref_tx_head);
- KASSERT((short)ref >= 0, ("Negative ref"));
- mfn = virt_to_mfn(mtod(new_m, vm_offset_t));
- gnttab_grant_foreign_access_ref(ref, otherend_id,
- mfn, GNTMAP_readonly);
- tx->gref = sc->grant_tx_ref[id] = ref;
- tx->size = new_m->m_pkthdr.len;
- if (new_m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
- tx->flags = NETTXF_csum_blank | NETTXF_data_validated;
- else
+ m = m_head;
+ extra = NULL;
+ for (m = m_head; m; m = m->m_next) {
+ tx = RING_GET_REQUEST(&sc->tx, i);
+ id = get_id_from_freelist(sc->xn_cdata.xn_tx_chain);
+ sc->xn_cdata.xn_tx_chain[id] = m;
+ tx->id = id;
+ ref = gnttab_claim_grant_reference(&sc->gref_tx_head);
+ KASSERT((short)ref >= 0, ("Negative ref"));
+ mfn = virt_to_mfn(mtod(m, vm_offset_t));
+ gnttab_grant_foreign_access_ref(ref, otherend_id,
+ mfn, GNTMAP_readonly);
+ tx->gref = sc->grant_tx_ref[id] = ref;
+ tx->offset = mtod(m, vm_offset_t) & (PAGE_SIZE - 1);
tx->flags = 0;
- new_m->m_next = NULL;
- new_m->m_nextpkt = NULL;
+ if (m == m_head) {
+ /*
+ * The first fragment has the entire packet
+ * size, subsequent fragments have just the
+ * fragment size. The backend works out the
+ * true size of the first fragment by
+ * subtracting the sizes of the other
+ * fragments.
+ */
+ tx->size = m->m_pkthdr.len;
- m_freem(m_head);
+ /*
+ * The first fragment contains the
+ * checksum flags and is optionally
+ * followed by extra data for TSO etc.
+ */
+ if (m->m_pkthdr.csum_flags
+ & CSUM_DELAY_DATA) {
+ tx->flags |= (NETTXF_csum_blank
+ | NETTXF_data_validated);
+ }
+#if __FreeBSD_version >= 700000
+ if (m->m_pkthdr.csum_flags & CSUM_TSO) {
+ struct netif_extra_info *gso =
+ (struct netif_extra_info *)
+ RING_GET_REQUEST(&sc->tx, ++i);
+
+ if (extra)
+ extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
+ else
+ tx->flags |= NETTXF_extra_info;
+
+ gso->u.gso.size = m->m_pkthdr.tso_segsz;
+ gso->u.gso.type =
+ XEN_NETIF_GSO_TYPE_TCPV4;
+ gso->u.gso.pad = 0;
+ gso->u.gso.features = 0;
+
+ gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
+ gso->flags = 0;
+ extra = gso;
+ }
+#endif
+ } else {
+ tx->size = m->m_len;
+ }
+ if (m->m_next) {
+ tx->flags |= NETTXF_more_data;
+ i++;
+ }
+ }
- sc->xn_cdata.xn_tx_chain[id] = new_m;
- BPF_MTAP(ifp, new_m);
+ BPF_MTAP(ifp, m_head);
- sc->stats.tx_bytes += new_m->m_pkthdr.len;
+ sc->stats.tx_bytes += m_head->m_pkthdr.len;
sc->stats.tx_packets++;
}
@@ -1517,12 +1615,39 @@ xn_ioctl(struct ifnet *ifp, u_long cmd,
break;
case SIOCSIFCAP:
mask = ifr->ifr_reqcap ^ ifp->if_capenable;
- if (mask & IFCAP_HWCSUM) {
- if (IFCAP_HWCSUM & ifp->if_capenable)
- ifp->if_capenable &= ~IFCAP_HWCSUM;
- else
- ifp->if_capenable |= IFCAP_HWCSUM;
+ if (mask & IFCAP_TXCSUM) {
+ if (IFCAP_TXCSUM & ifp->if_capenable) {
+ ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
+ ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
+ | CSUM_IP | CSUM_TSO);
+ } else {
+ ifp->if_capenable |= IFCAP_TXCSUM;
+ ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP
+ | CSUM_IP);
+ }
+ }
+ if (mask & IFCAP_RXCSUM) {
+ ifp->if_capenable ^= IFCAP_RXCSUM;
+ }
+#if __FreeBSD_version >= 700000
+ if (mask & IFCAP_TSO4) {
+ if (IFCAP_TSO4 & ifp->if_capenable) {
+ ifp->if_capenable &= ~IFCAP_TSO4;
+ ifp->if_hwassist &= ~CSUM_TSO;
+ } else if (IFCAP_TXCSUM & ifp->if_capenable) {
+ ifp->if_capenable |= IFCAP_TSO4;
+ ifp->if_hwassist |= CSUM_TSO;
+ } else {
+ DPRINTK("Xen requires tx checksum offload"
+ " be enabled to use TSO\n");
+ error = EINVAL;
+ }
+ }
+ if (mask & IFCAP_LRO) {
+ ifp->if_capenable ^= IFCAP_LRO;
+
}
+#endif
error = 0;
break;
case SIOCADDMULTI:
@@ -1733,6 +1858,18 @@ create_netdev(device_t dev)
ifp->if_hwassist = XN_CSUM_FEATURES;
ifp->if_capabilities = IFCAP_HWCSUM;
+#if __FreeBSD_version >= 700000
+ ifp->if_capabilities |= IFCAP_TSO4;
+ if (xn_enable_lro) {
+ int err = tcp_lro_init(&np->xn_lro);
+ if (err) {
+ device_printf(dev, "LRO initialization failed\n");
+ goto exit;
+ }
+ np->xn_lro.ifp = ifp;
+ ifp->if_capabilities |= IFCAP_LRO;
+ }
+#endif
ifp->if_capenable = ifp->if_capabilities;
ether_ifattach(ifp, np->mac);
More information about the svn-src-user
mailing list