svn commit: r248565 - user/bryanv/vtnetmq/sys/dev/virtio/network
Bryan Venteicher
bryanv at FreeBSD.org
Thu Mar 21 06:56:36 UTC 2013
Author: bryanv
Date: Thu Mar 21 06:56:35 2013
New Revision: 248565
URL: http://svnweb.freebsd.org/changeset/base/248565
Log:
Commit development snapshot of the multiqueue driver
This commit contains lots of cleanup, bug fixes, and enhancements
such as:
- improved Rx/Tx checksumming
- better handling of deferred transmit and interrupt
handlers
- per-queue statistics, exported via sysctl
A lot of work still remains.
Modified:
user/bryanv/vtnetmq/sys/dev/virtio/network/if_vtnet.c
user/bryanv/vtnetmq/sys/dev/virtio/network/if_vtnetvar.h
user/bryanv/vtnetmq/sys/dev/virtio/network/virtio_net.h
Modified: user/bryanv/vtnetmq/sys/dev/virtio/network/if_vtnet.c
==============================================================================
--- user/bryanv/vtnetmq/sys/dev/virtio/network/if_vtnet.c Wed Mar 20 21:47:05 2013 (r248564)
+++ user/bryanv/vtnetmq/sys/dev/virtio/network/if_vtnet.c Thu Mar 21 06:56:35 2013 (r248565)
@@ -62,6 +62,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
#include <netinet/udp.h>
#include <netinet/tcp.h>
#include <netinet/sctp.h>
@@ -78,6 +79,9 @@ __FBSDID("$FreeBSD$");
#include "virtio_if.h"
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
static int vtnet_modevent(module_t, int, void *);
static int vtnet_probe(device_t);
@@ -110,7 +114,7 @@ static int vtnet_rxq_replace_lro_nomgr_b
static int vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
static int vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
static int vtnet_rxq_new_buf(struct vtnet_rxq *);
-static int vtnet_rx_csum(struct vtnet_softc *, struct mbuf *,
+static int vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
struct virtio_net_hdr *);
static void vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
static void vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *);
@@ -119,11 +123,15 @@ static void vtnet_rxq_input(struct vtnet
struct virtio_net_hdr *);
static int vtnet_rxq_eof(struct vtnet_rxq *);
static void vtnet_rx_vq_intr(void *);
-static void vtnet_rxq_taskqueue(void *, int);
+static void vtnet_rxq_tq_intr(void *, int);
static void vtnet_txq_free_mbufs(struct vtnet_txq *);
+static int vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
+ int *, int *, int *);
+static int vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int,
+ int, struct virtio_net_hdr *);
static struct mbuf *
- vtnet_tx_offload(struct vtnet_softc *, struct mbuf *,
+ vtnet_txq_offload(struct vtnet_txq *, struct mbuf *,
struct virtio_net_hdr *);
static int vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **,
struct vtnet_tx_header *);
@@ -134,9 +142,9 @@ static void vtnet_start(struct ifnet *);
#else
static int vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *);
static int vtnet_txq_mq_start(struct ifnet *, struct mbuf *);
-static void vtnet_txq_taskqueue(void *, int);
-static void vtnet_txq_tq_start(struct vtnet_txq *);
+static void vtnet_txq_tq_deferred(void *, int);
#endif
+static void vtnet_txq_tq_intr(void *, int);
static void vtnet_txq_eof(struct vtnet_txq *);
static void vtnet_tx_vq_intr(void *);
@@ -155,13 +163,15 @@ static void vtnet_drain_rxtx_queues(stru
static void vtnet_stop_rendezvous(struct vtnet_softc *);
static void vtnet_stop(struct vtnet_softc *);
static int vtnet_virtio_reinit(struct vtnet_softc *);
+static void vtnet_init_rx_filters(struct vtnet_softc *);
static int vtnet_init_rx_queues(struct vtnet_softc *);
+static int vtnet_init_tx_queues(struct vtnet_softc *);
+static int vtnet_init_rxtx_queues(struct vtnet_softc *);
static void vtnet_set_active_vq_pairs(struct vtnet_softc *);
static int vtnet_reinit(struct vtnet_softc *);
static void vtnet_init_locked(struct vtnet_softc *);
static void vtnet_init(void *);
-static void vtnet_init_rx_filters(struct vtnet_softc *);
static void vtnet_free_ctrl_vq(struct vtnet_softc *);
static void vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
struct sglist *, int, int);
@@ -186,6 +196,11 @@ static void vtnet_get_hwaddr(struct vtne
static void vtnet_set_hwaddr(struct vtnet_softc *);
static void vtnet_vlan_tag_remove(struct mbuf *);
+static void vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
+ struct sysctl_oid_list *, struct vtnet_rxq *);
+static void vtnet_setup_txq_sysctl(struct sysctl_ctx_list *,
+ struct sysctl_oid_list *, struct vtnet_txq *);
+static void vtnet_setup_queue_sysctl(struct vtnet_softc *);
static void vtnet_setup_sysctl(struct vtnet_softc *);
static int vtnet_rxq_enable_intr(struct vtnet_rxq *);
@@ -214,12 +229,14 @@ static int vtnet_rx_process_limit = 256;
TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit);
/*
- * Reducing the number of transmit completed interrupts can
- * improve performance. To do so, the define below keeps the
- * Tx vq interrupt disabled and adds calls to vtnet_txeof()
- * in the start and watchdog paths. The price to pay for this
- * is the m_free'ing of transmitted mbufs may be delayed until
- * the watchdog fires.
+ * Reducing the number of transmit completed interrupts can improve
+ * performance. To do so, the define below keeps the Tx vq interrupt
+ * disabled and adds calls to vtnet_txeof() in the start and watchdog
+ * paths. The price to pay for this is the m_free'ing of transmitted
+ * mbufs may be delayed until the watchdog fires.
+ *
+ * BMV: Reintroduce this later as a run-time option, if it makes
+ * sense after the EVENT_IDX feature is supported.
*/
#define VTNET_TX_INTR_MODERATION
@@ -366,18 +383,15 @@ vtnet_attach(device_t dev)
error = virtio_setup_intr(dev, INTR_TYPE_NET);
if (error) {
device_printf(dev, "cannot setup virtqueue interrupts\n");
+ /* BMV: This will crash if during boot! */
ether_ifdetach(sc->vtnet_ifp);
goto fail;
}
- vtnet_start_taskqueues(sc);
-
- /*
- * Even though this is a polling operation, it must be done after
- * interrupts have been setup.
- */
vtnet_attach_disable_promisc(sc);
+ vtnet_start_taskqueues(sc);
+
fail:
if (error)
vtnet_detach(dev);
@@ -610,7 +624,7 @@ vtnet_init_rxq(struct vtnet_softc *sc, i
rxq->vtnrx_id = id;
rxq->vtnrx_process_limit = vtnet_rx_process_limit;
- TASK_INIT(&rxq->vtnrx_task, 0, vtnet_rxq_taskqueue, rxq);
+ TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq);
rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT,
taskqueue_thread_enqueue, &rxq->vtnrx_tq);
@@ -637,12 +651,13 @@ vtnet_init_txq(struct vtnet_softc *sc, i
if (txq->vtntx_br == NULL)
return (ENOMEM);
- TASK_INIT(&txq->vtntx_task, 0, vtnet_txq_taskqueue, txq);
+ TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq);
+#endif
+ TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq);
txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT,
taskqueue_thread_enqueue, &txq->vtntx_tq);
if (txq->vtntx_tq == NULL)
return (ENOMEM);
-#endif
return (0);
}
@@ -650,18 +665,18 @@ vtnet_init_txq(struct vtnet_softc *sc, i
static int
vtnet_alloc_rxtx_queues(struct vtnet_softc *sc)
{
- int i, pairs, error;
+ int i, npairs, error;
- pairs = sc->vtnet_max_vq_pairs;
+ npairs = sc->vtnet_max_vq_pairs;
- sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * pairs, M_DEVBUF,
+ sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF,
M_NOWAIT | M_ZERO);
- sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * pairs, M_DEVBUF,
+ sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF,
M_NOWAIT | M_ZERO);
if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL)
return (ENOMEM);
- for (i = 0; i < pairs; i++) {
+ for (i = 0; i < npairs; i++) {
error = vtnet_init_rxq(sc, i);
if (error)
return (error);
@@ -670,6 +685,8 @@ vtnet_alloc_rxtx_queues(struct vtnet_sof
return (error);
}
+ vtnet_setup_queue_sysctl(sc);
+
return (0);
}
@@ -1051,10 +1068,7 @@ vtnet_ioctl(struct ifnet *ifp, u_long cm
if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO |
IFCAP_VLAN_HWFILTER)) {
- /*
- * These Rx features require us to renegotiate with
- * the host.
- */
+ /* These Rx features require us to renegotiate. */
reinit = 1;
/*
@@ -1152,6 +1166,9 @@ vtnet_rx_alloc_buf(struct vtnet_softc *s
clsize = sc->vtnet_rx_clsize;
+ KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
+ ("%s: chained mbuf %d request without LRO_NOMRG", __func__, nbufs));
+
m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, clsize);
if (m_head == NULL)
goto fail;
@@ -1159,19 +1176,15 @@ vtnet_rx_alloc_buf(struct vtnet_softc *s
m_head->m_len = clsize;
m_tail = m_head;
- if (nbufs > 1) {
- KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
- ("%s: chained mbuf request without LRO_NOMRG", __func__));
-
- for (i = 1; i < nbufs; i++) {
- m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize);
- if (m == NULL)
- goto fail;
-
- m->m_len = clsize;
- m_tail->m_next = m;
- m_tail = m;
- }
+ /* Allocate the rest of the chain. */
+ for (i = 1; i < nbufs; i++) {
+ m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize);
+ if (m == NULL)
+ goto fail;
+
+ m->m_len = clsize;
+ m_tail->m_next = m;
+ m_tail = m;
}
if (m_tailp != NULL)
@@ -1293,27 +1306,25 @@ vtnet_rxq_replace_buf(struct vtnet_rxq *
("%s: chained mbuf without LRO_NOMRG", __func__));
if (m->m_next == NULL) {
- /*
- * Simplified fast-path for the common case of just one mbuf.
- *
- * BMV: This is a lot like vtnet_rxq_new_buf().
- */
- m->m_len = MIN(m->m_len, len); /* BMV XXX */
+ /* Fast-path for the common case of just one mbuf. */
+ if (m->m_len < len)
+ return (EINVAL);
m_new = vtnet_rx_alloc_buf(sc, 1, NULL);
- if (m_new != NULL) {
- error = vtnet_rxq_enqueue_buf(rxq, m_new);
- if (error) {
- /*
- * The new mbuf is suppose to be an identical
- * copy of the one just dequeued so this is an
- * unexpected error.
- */
- m_freem(m_new);
- sc->vtnet_stats.rx_enq_replacement_failed++;
- }
+ if (m_new == NULL)
+ return (ENOBUFS);
+
+ error = vtnet_rxq_enqueue_buf(rxq, m_new);
+ if (error) {
+ /*
+ * The new mbuf is suppose to be an identical
+ * copy of the one just dequeued so this is an
+ * unexpected error.
+ */
+ m_freem(m_new);
+ sc->vtnet_stats.rx_enq_replacement_failed++;
} else
- error = ENOBUFS;
+ m->m_len = len;
} else
error = vtnet_rxq_replace_lro_nomgr_buf(rxq, m, len);
@@ -1327,7 +1338,6 @@ vtnet_rxq_enqueue_buf(struct vtnet_rxq *
struct sglist_seg segs[VTNET_MAX_RX_SEGS];
struct vtnet_softc *sc;
struct vtnet_rx_header *rxhdr;
- struct virtio_net_hdr *hdr;
uint8_t *mdata;
int offset, error;
@@ -1342,24 +1352,15 @@ vtnet_rxq_enqueue_buf(struct vtnet_rxq *
if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr));
rxhdr = (struct vtnet_rx_header *) mdata;
- hdr = &rxhdr->vrh_hdr;
+ sglist_append(&sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size);
offset = sizeof(struct vtnet_rx_header);
- sglist_append(&sg, hdr, sc->vtnet_hdr_size);
} else
offset = 0;
- /*
- * XXX BMV: Either sglist_append() should never fail here ...
- */
-
- error = sglist_append(&sg, mdata + offset, m->m_len - offset);
- if (error)
- return (error);
-
+ sglist_append(&sg, mdata + offset, m->m_len - offset);
if (m->m_next != NULL) {
error = sglist_append_mbuf(&sg, m->m_next);
- if (error)
- return (error);
+ MPASS(error == 0);
}
error = virtqueue_enqueue(rxq->vtnrx_vq, m, &sg, 0, sg.sg_nseg);
@@ -1388,39 +1389,35 @@ vtnet_rxq_new_buf(struct vtnet_rxq *rxq)
}
/*
- * Set the appropriate CSUM_* flags. Unfortunately, the information
- * provided is not directly useful to us. The VirtIO header gives the
- * offset of the checksum, which is all Linux needs, but this is not
- * how FreeBSD does things. We are forced to peek inside the packet
- * a bit.
- *
- * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD
- * could accept the offsets and let the stack figure it out.
+ * Use the checksum offset in the VirtIO header to set the
+ * correct CSUM_* flags.
*/
static int
-vtnet_rx_csum(struct vtnet_softc *sc, struct mbuf *m,
- struct virtio_net_hdr *hdr)
+vtnet_rxq_csum_by_offset(struct vtnet_rxq *rxq, struct mbuf *m,
+ uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
{
- struct ether_header *eh;
- struct ether_vlan_header *evh;
- int offset;
- uint16_t eth_type;
-
- offset = hdr->csum_start + hdr->csum_offset;
-
- if (offset < sizeof(struct ether_header) + sizeof(struct ip))
- return (1);
- if (m->m_len < offset)
- return (1);
+ struct vtnet_softc *sc;
+#if defined(INET) || defined(INET6)
+ int offset = hdr->csum_start + hdr->csum_offset;
+#endif
- eh = mtod(m, struct ether_header *);
- eth_type = ntohs(eh->ether_type);
- if (eth_type == ETHERTYPE_VLAN) {
- evh = mtod(m, struct ether_vlan_header *);
- eth_type = ntohs(evh->evl_proto);
- }
+ sc = rxq->vtnrx_sc;
- if (eth_type != ETHERTYPE_IP && eth_type != ETHERTYPE_IPV6) {
+ /* Only do a basic sanity check on the offset. */
+ switch (eth_type) {
+#if defined(INET)
+ case ETHERTYPE_IP:
+ if (__predict_false(offset < ip_start + sizeof(struct ip)))
+ return (1);
+ break;
+#endif
+#if defined(INET6)
+ case ETHERTYPE_IPV6:
+ if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr)))
+ return (1);
+ break;
+#endif
+ default:
sc->vtnet_stats.rx_csum_bad_ethtype++;
return (1);
}
@@ -1428,8 +1425,8 @@ vtnet_rx_csum(struct vtnet_softc *sc, st
/*
* Use the offset to determine the appropriate CSUM_* flags. This
* is a bit dirty, but we can get by with it since the checksum
- * offsets happen to be different. The implied assumption is that
- * the host does not do IPv4 header checksum offloading.
+ * offsets happen to be different. We assume the host host does
+ * not do IPv4 header checksum offloading.
*/
switch (hdr->csum_offset) {
case offsetof(struct udphdr, uh_sum):
@@ -1437,21 +1434,116 @@ vtnet_rx_csum(struct vtnet_softc *sc, st
m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
m->m_pkthdr.csum_data = 0xFFFF;
break;
-
case offsetof(struct sctphdr, checksum):
m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
break;
-
default:
sc->vtnet_stats.rx_csum_bad_offset++;
return (1);
}
- sc->vtnet_stats.rx_csum_offloaded++;
+ return (0);
+}
+
+static int
+vtnet_rxq_csum_by_parse(struct vtnet_rxq *rxq, struct mbuf *m,
+ uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
+{
+ struct vtnet_softc *sc;
+ int offset, proto;
+
+ sc = rxq->vtnrx_sc;
+
+ switch (eth_type) {
+#if defined(INET)
+ case ETHERTYPE_IP: {
+ struct ip *ip;
+ if (__predict_false(m->m_len < ip_start + sizeof(struct ip)))
+ return (1);
+ ip = (struct ip *)(m->m_data + ip_start);
+ proto = ip->ip_p;
+ offset = ip_start + (ip->ip_hl << 2);
+ break;
+ }
+#endif
+#if defined(INET6)
+ case ETHERTYPE_IPV6:
+ if (__predict_false(m->m_len < ip_start +
+ sizeof(struct ip6_hdr)))
+ return (1);
+ offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto);
+ if (__predict_false(offset < 0))
+ return (1);
+ break;
+#endif
+ default:
+ sc->vtnet_stats.rx_csum_bad_ethtype++;
+ return (1);
+ }
+
+ switch (proto) {
+ case IPPROTO_TCP:
+ if (__predict_false(m->m_len < offset + sizeof(struct tcphdr)))
+ return (1);
+ m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
+ m->m_pkthdr.csum_data = 0xFFFF;
+ break;
+ case IPPROTO_UDP:
+ if (__predict_false(m->m_len < offset + sizeof(struct udphdr)))
+ return (1);
+ m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
+ m->m_pkthdr.csum_data = 0xFFFF;
+ break;
+ case IPPROTO_SCTP:
+ if (__predict_false(m->m_len < offset + sizeof(struct sctphdr)))
+ return (1);
+ m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
+ break;
+ default:
+ sc->vtnet_stats.rx_csum_bad_proto++;
+ return (1);
+ }
return (0);
}
+/*
+ * Set the appropriate CSUM_* flags. Unfortunately, the information
+ * provided is not directly useful to us. The VirtIO header gives the
+ * offset of the checksum, which is all Linux needs, but this is not
+ * how FreeBSD does things. We are forced to peek inside the packet
+ * a bit.
+ *
+ * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD
+ * could accept the offsets and let the stack figure it out.
+ */
+static int
+vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
+ struct virtio_net_hdr *hdr)
+{
+ struct ether_header *eh;
+ struct ether_vlan_header *evh;
+ uint16_t eth_type;
+ int offset, error;
+
+ eh = mtod(m, struct ether_header *);
+ eth_type = ntohs(eh->ether_type);
+ if (eth_type == ETHERTYPE_VLAN) {
+ /* BMV: We should handle nested VLAN tags too. */
+ evh = mtod(m, struct ether_vlan_header *);
+ eth_type = ntohs(evh->evl_proto);
+ offset = sizeof(struct ether_vlan_header);
+ } else
+ offset = sizeof(struct ether_header);
+
+ if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
+ error = vtnet_rxq_csum_by_offset(rxq, m, eth_type, offset, hdr);
+ else
+ error = vtnet_rxq_csum_by_parse(rxq, m, eth_type, offset, hdr);
+
+ return (error);
+}
+
static void
vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs)
{
@@ -1496,12 +1588,12 @@ vtnet_rxq_merged_eof(struct vtnet_rxq *r
while (--nbufs > 0) {
m = virtqueue_dequeue(vq, &len);
if (m == NULL) {
- ifp->if_ierrors++;
+ rxq->vtnrx_stats.vrxs_ierrors++;
goto fail;
}
if (vtnet_rxq_new_buf(rxq) != 0) {
- ifp->if_iqdrops++;
+ rxq->vtnrx_stats.vrxs_discarded++;
vtnet_rxq_discard_buf(rxq, m);
if (nbufs > 1)
vtnet_rxq_discard_merged_bufs(rxq, nbufs);
@@ -1555,14 +1647,25 @@ vtnet_rxq_input(struct vtnet_rxq *rxq, s
m->m_pkthdr.flowid = rxq->vtnrx_id;
m->m_flags |= M_FLOWID;
- if (ifp->if_capenable & IFCAP_RXCSUM &&
- hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
- if (vtnet_rx_csum(sc, m, hdr) != 0)
- sc->vtnet_stats.rx_csum_failed++;
+ /*
+ * BVM: FreeBSD does not have the UNNECESSARY and PARTIAL checksum
+ * distinction that Linux does. Need to reevaluate if performing
+ * offloading for the NEEDS_CSUM case is really appropriate.
+ */
+ if (hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM |
+ VIRTIO_NET_HDR_F_DATA_VALID)) {
+ if (vtnet_rxq_csum(rxq, m, hdr) == 0)
+ rxq->vtnrx_stats.vrxs_csum++;
+ else
+ rxq->vtnrx_stats.vrxs_csum_failed++;
}
- ifp->if_ipackets++;
+ rxq->vtnrx_stats.vrxs_ipackets++;
+ rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len;
+
+ /* VTNET_RXQ_UNLOCK(rxq); */
(*ifp->if_input)(ifp, m);
+ /* VTNET_RXQ_LOCK(rxq); */
}
static int
@@ -1587,10 +1690,6 @@ vtnet_rxq_eof(struct vtnet_rxq *rxq)
VTNET_RXQ_LOCK_ASSERT(rxq);
- /*
- * `count` limits how many leading descriptors we dequeue. The
- * actual number could be higher if there are merged buffers.
- */
while (count-- > 0) {
m = virtqueue_dequeue(vq, &len);
if (m == NULL)
@@ -1598,7 +1697,7 @@ vtnet_rxq_eof(struct vtnet_rxq *rxq)
deq++;
if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
- ifp->if_ierrors++;
+ rxq->vtnrx_stats.vrxs_discarded++;
vtnet_rxq_discard_buf(rxq, m);
continue;
}
@@ -1618,7 +1717,7 @@ vtnet_rxq_eof(struct vtnet_rxq *rxq)
}
if (vtnet_rxq_replace_buf(rxq, m, len) != 0) {
- ifp->if_iqdrops++;
+ rxq->vtnrx_stats.vrxs_discarded++;
vtnet_rxq_discard_buf(rxq, m);
if (nbufs > 1)
vtnet_rxq_discard_merged_bufs(rxq, nbufs);
@@ -1642,7 +1741,7 @@ vtnet_rxq_eof(struct vtnet_rxq *rxq)
* regular header.
*
* BMV: Is this memcpy() expensive? We know the mbuf data is
- * still valid after we adjust it.
+ * still valid even after the m_adj().
*/
memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr));
m_adj(m, adjsz);
@@ -1662,18 +1761,19 @@ vtnet_rx_vq_intr(void *xrxq)
struct vtnet_softc *sc;
struct vtnet_rxq *rxq;
struct ifnet *ifp;
- int more;
+ int tries, more;
rxq = xrxq;
sc = rxq->vtnrx_sc;
ifp = sc->vtnet_ifp;
+ tries = 0;
if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) {
/*
- * Ignore this interrupt. Either the host generated a spurious
- * interrupt (probably unlikely) or we have multiqueue without
- * per-VQ MSIX so every queue needs to be polled (brain dead
- * configuration we could try harder to avoid).
+ * Ignore this interrupt. Either this is a spurious interrupt
+ * or multiqueue without per-VQ MSIX so every queue needs to
+ * be polled (a brain dead configuration we could try harder
+ * to avoid).
*/
vtnet_rxq_disable_intr(rxq);
return;
@@ -1683,7 +1783,6 @@ again:
VTNET_RXQ_LOCK(rxq);
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
- vtnet_rxq_enable_intr(rxq);
VTNET_RXQ_UNLOCK(rxq);
return;
}
@@ -1692,21 +1791,47 @@ again:
if (more || vtnet_rxq_enable_intr(rxq) != 0) {
if (!more)
vtnet_rxq_disable_intr(rxq);
- sc->vtnet_stats.rx_task_rescheduled++;
+ /*
+ * This is an occasional condition or race (when !more),
+ * so retry a few times before scheduling the taskqueue.
+ */
+ rxq->vtnrx_stats.vrxs_rescheduled++;
+ VTNET_RXQ_UNLOCK(rxq);
+ if (tries++ < VTNET_INTR_DISABLE_RETRIES)
+ goto again;
+ taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
+ } else
VTNET_RXQ_UNLOCK(rxq);
- goto again;
- }
-
- VTNET_RXQ_UNLOCK(rxq);
}
static void
-vtnet_rxq_taskqueue(void *xrxq, int pending)
+vtnet_rxq_tq_intr(void *xrxq, int pending)
{
+ struct vtnet_softc *sc;
+ struct vtnet_rxq *rxq;
+ struct ifnet *ifp;
+ int more;
- /*
- * BMV: Do stuff here when we defer in vtnet_rx_vq_intr().
- */
+ rxq = xrxq;
+ sc = rxq->vtnrx_sc;
+ ifp = sc->vtnet_ifp;
+
+ VTNET_RXQ_LOCK(rxq);
+
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+ VTNET_RXQ_UNLOCK(rxq);
+ return;
+ }
+
+ more = vtnet_rxq_eof(rxq);
+ if (more || vtnet_rxq_enable_intr(rxq) != 0) {
+ if (!more)
+ vtnet_rxq_disable_intr(rxq);
+ rxq->vtnrx_stats.vrxs_rescheduled++;
+ taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
+ }
+
+ VTNET_RXQ_UNLOCK(rxq);
}
static void
@@ -1729,121 +1854,150 @@ vtnet_txq_free_mbufs(struct vtnet_txq *t
}
/*
- * BMV: Uggg ... rewrite this function.
+ * BMV: Much of this can go away once we finally have offsets in
+ * the mbuf packet header. Bug andre at .
*/
-static struct mbuf *
-vtnet_tx_offload(struct vtnet_softc *sc, struct mbuf *m,
- struct virtio_net_hdr *hdr)
+static int
+vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m,
+ int *etype, int *proto, int *start)
{
- struct ifnet *ifp;
- struct ether_header *eh;
+ struct vtnet_softc *sc;
struct ether_vlan_header *evh;
- struct ip *ip;
- struct ip6_hdr *ip6;
- struct tcphdr *tcp;
- int ip_offset;
- uint16_t eth_type, csum_start;
- uint8_t ip_proto, gso_type;
+ int offset;
- ifp = sc->vtnet_ifp;
+ sc = txq->vtntx_sc;
- ip_offset = sizeof(struct ether_header);
- if (m->m_len < ip_offset) {
- if ((m = m_pullup(m, ip_offset)) == NULL)
- return (NULL);
+ evh = mtod(m, struct ether_vlan_header *);
+ if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
+ /* BMV: We should handle nested VLAN tags too. */
+ *etype = ntohs(evh->evl_proto);
+ offset = sizeof(struct ether_vlan_header);
+ } else {
+ *etype = ntohs(evh->evl_encap_proto);
+ offset = sizeof(struct ether_header);
}
- eh = mtod(m, struct ether_header *);
- eth_type = ntohs(eh->ether_type);
- if (eth_type == ETHERTYPE_VLAN) {
- ip_offset = sizeof(struct ether_vlan_header);
- if (m->m_len < ip_offset) {
- if ((m = m_pullup(m, ip_offset)) == NULL)
- return (NULL);
- }
- evh = mtod(m, struct ether_vlan_header *);
- eth_type = ntohs(evh->evl_proto);
+ switch (*etype) {
+#if defined(INET)
+ case ETHERTYPE_IP: {
+ struct ip *ip, iphdr;
+ if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
+ m_copydata(m, offset, sizeof(struct ip),
+ (caddr_t) &iphdr);
+ ip = &iphdr;
+ } else
+ ip = (struct ip *)(m->m_data + offset);
+ *proto = ip->ip_p;
+ *start = offset + (ip->ip_hl << 2);
+ break;
+ }
+#endif
+#if defined(INET6)
+ case ETHERTYPE_IPV6:
+ *proto = -1;
+ *start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
+ /* Assert the network stack sends us a valid packet. */
+ KASSERT(*start > offset,
+ ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
+ *start, offset, *proto));
+ break;
+#endif
+ default:
+ sc->vtnet_stats.tx_csum_bad_ethtype++;
+ return (EINVAL);
}
- switch (eth_type) {
- case ETHERTYPE_IP:
- if (m->m_len < ip_offset + sizeof(struct ip)) {
- m = m_pullup(m, ip_offset + sizeof(struct ip));
- if (m == NULL)
- return (NULL);
- }
+ return (0);
+}
- ip = (struct ip *)(mtod(m, uint8_t *) + ip_offset);
- ip_proto = ip->ip_p;
- csum_start = ip_offset + (ip->ip_hl << 2);
- gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
- break;
+static int
+vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type,
+ int offset, struct virtio_net_hdr *hdr)
+{
+ static struct timeval lastecn;
+ static int curecn;
+ struct vtnet_softc *sc;
+ struct tcphdr *tcp, tcphdr;
- case ETHERTYPE_IPV6:
- if (m->m_len < ip_offset + sizeof(struct ip6_hdr)) {
- m = m_pullup(m, ip_offset + sizeof(struct ip6_hdr));
- if (m == NULL)
- return (NULL);
- }
+ sc = txq->vtntx_sc;
+
+ if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) {
+ m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr);
+ tcp = &tcphdr;
+ } else
+ tcp = (struct tcphdr *)(m->m_data + offset);
+
+ hdr->hdr_len = offset + (tcp->th_off << 2);
+ hdr->gso_size = m->m_pkthdr.tso_segsz;
+ hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 :
+ VIRTIO_NET_HDR_GSO_TCPV6;
- ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + ip_offset);
+ if (tcp->th_flags & TH_CWR) {
/*
- * XXX Assume no extension headers are present. Presently,
- * this will always be true in the case of TSO, and FreeBSD
- * does not perform checksum offloading of IPv6 yet.
+ * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD
+ * ECN support is not on a per-interface basis, but globally via
+ * the net.inet.tcp.ecn.enable sysctl knob. The default is off.
*/
- ip_proto = ip6->ip6_nxt;
- csum_start = ip_offset + sizeof(struct ip6_hdr);
- gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
- break;
-
- default:
- return (m);
+ if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
+ if (ppsratecheck(&lastecn, &curecn, 1))
+ if_printf(sc->vtnet_ifp,
+ "TSO with ECN not negotiated with host\n");
+ return (ENOTSUP);
+ }
+ hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
}
- if (m->m_pkthdr.csum_flags & VTNET_CSUM_OFFLOAD) {
- hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
- hdr->csum_start = csum_start;
- hdr->csum_offset = m->m_pkthdr.csum_data;
+ txq->vtntx_stats.vtxs_tso++;
- sc->vtnet_stats.tx_csum_offloaded++;
- }
+ return (0);
+}
- if (m->m_pkthdr.csum_flags & CSUM_TSO) {
- if (ip_proto != IPPROTO_TCP)
- return (m);
+static struct mbuf *
+vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m,
+ struct virtio_net_hdr *hdr)
+{
+ struct vtnet_softc *sc;
+ int flags, etype, csum_start, proto, error;
- if (m->m_len < csum_start + sizeof(struct tcphdr)) {
- m = m_pullup(m, csum_start + sizeof(struct tcphdr));
- if (m == NULL)
- return (NULL);
- }
+ sc = txq->vtntx_sc;
+ flags = m->m_pkthdr.csum_flags;
- tcp = (struct tcphdr *)(mtod(m, uint8_t *) + csum_start);
- hdr->gso_type = gso_type;
- hdr->hdr_len = csum_start + (tcp->th_off << 2);
- hdr->gso_size = m->m_pkthdr.tso_segsz;
+ error = vtnet_txq_offload_ctx(txq, m, &etype, &csum_start, &proto);
+ if (error)
+ goto drop;
- if (tcp->th_flags & TH_CWR) {
- /*
- * Drop if we did not negotiate VIRTIO_NET_F_HOST_ECN.
- * ECN support is only configurable globally with the
- * net.inet.tcp.ecn.enable sysctl knob.
- */
- if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
- if_printf(ifp, "TSO with ECN not supported "
- "by host\n");
- m_freem(m);
- return (NULL);
- }
+ if ((etype == ETHERTYPE_IP && flags & VTNET_CSUM_OFFLOAD) ||
+ (etype == ETHERTYPE_IPV6 && flags & VTNET_CSUM_OFFLOAD_IPV6)) {
+ /*
+ * We could compare the IP protocol vs the CSUM_ flag too,
+ * but that really should not be necessary.
+ */
+ hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ hdr->csum_start = csum_start;
+ hdr->csum_offset = m->m_pkthdr.csum_data;
+ txq->vtntx_stats.vtxs_csum++;
+ }
- hdr->flags |= VIRTIO_NET_HDR_GSO_ECN;
+ if (flags & CSUM_TSO) {
+ if (__predict_false(proto != IPPROTO_TCP)) {
+ /* Likely failed to correctly parse the mbuf. */
+ sc->vtnet_stats.tx_tso_not_tcp++;
+ goto drop;
}
- sc->vtnet_stats.tx_tso_offloaded++;
+ KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM,
+ ("%s: mbuf %p TSO without checksum offload", __func__, m));
+
+ error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr);
+ if (error)
+ goto drop;
}
return (m);
+
+drop:
+ m_freem(m);
+ return (NULL);
}
static int
@@ -1879,6 +2033,7 @@ again:
*m_head = m;
collapsed = 1;
+ txq->vtntx_stats.vtxs_collapsed++;
goto again;
}
@@ -1920,29 +2075,26 @@ vtnet_txq_encap(struct vtnet_txq *txq, s
* The vtnet_hdr_size is used to enqueue the correct header size.
*/
hdr = &txhdr->vth_uhdr.hdr;
-
+ error = ENOBUFS;
if (m->m_flags & M_VLANTAG) {
m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
- if ((*m_head = m) == NULL) {
- error = ENOBUFS;
+ if ((*m_head = m) == NULL)
goto fail;
- }
m->m_flags &= ~M_VLANTAG;
}
- if (m->m_pkthdr.csum_flags != 0) {
- m = vtnet_tx_offload(sc, m, hdr);
- if ((*m_head = m) == NULL) {
- error = ENOBUFS;
+ if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) {
+ m = vtnet_txq_offload(txq, m, hdr);
+ if ((*m_head = m) == NULL)
goto fail;
- }
}
error = vtnet_txq_enqueue_buf(txq, m_head, txhdr);
+ if (error == 0)
+ return (0);
fail:
- if (error)
- uma_zfree(vtnet_tx_header_zone, txhdr);
+ uma_zfree(vtnet_tx_header_zone, txhdr);
return (error);
}
@@ -2071,7 +2223,6 @@ vtnet_txq_mq_start(struct ifnet *ifp, st
sc = ifp->if_softc;
npairs = sc->vtnet_act_vq_pairs;
- /* BMV: Is this the best way to determine which queue? */
if (m->m_flags & M_FLOWID)
i = m->m_pkthdr.flowid % npairs;
else
@@ -2084,14 +2235,14 @@ vtnet_txq_mq_start(struct ifnet *ifp, st
VTNET_TXQ_UNLOCK(txq);
} else {
error = drbr_enqueue(ifp, txq->vtntx_br, m);
- vtnet_txq_tq_start(txq);
+ taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask);
}
return (error);
}
static void
-vtnet_txq_taskqueue(void *xtxq, int pending)
+vtnet_txq_tq_deferred(void *xtxq, int pending)
{
struct vtnet_softc *sc;
struct vtnet_txq *txq;
@@ -2105,36 +2256,64 @@ vtnet_txq_taskqueue(void *xtxq, int pend
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-user
mailing list