PERFORCE change 194710 for review
Takuya ASADA
syuu at FreeBSD.org
Mon Jun 13 22:55:13 UTC 2011
http://p4web.freebsd.org/@@194710?ac=10
Change 194710 by syuu at x200 on 2011/06/13 22:54:53
Reverted changes for if_tap.
RSS ported from rwatson/tcp branch, SOFTRSS implemented(Refactored version of RPS, part of Kazuya GODA's GSoC project).
Implemented bpf multiqueue emulation on SOFTRSS.
Affected files ...
.. //depot/projects/soc2011/mq_bpf/src/sys/amd64/conf/RSS#1 add
.. //depot/projects/soc2011/mq_bpf/src/sys/amd64/conf/SOFTRSS#1 add
.. //depot/projects/soc2011/mq_bpf/src/sys/conf/files#2 edit
.. //depot/projects/soc2011/mq_bpf/src/sys/conf/options#2 edit
.. //depot/projects/soc2011/mq_bpf/src/sys/dev/e1000/if_em.c#3 edit
.. //depot/projects/soc2011/mq_bpf/src/sys/dev/e1000/if_igb.c#5 edit
.. //depot/projects/soc2011/mq_bpf/src/sys/net/bpf.c#7 edit
.. //depot/projects/soc2011/mq_bpf/src/sys/net/bpf.h#4 edit
.. //depot/projects/soc2011/mq_bpf/src/sys/net/bpfdesc.h#3 edit
.. //depot/projects/soc2011/mq_bpf/src/sys/net/if.h#2 edit
.. //depot/projects/soc2011/mq_bpf/src/sys/net/if_ethersubr.c#2 edit
.. //depot/projects/soc2011/mq_bpf/src/sys/net/if_tap.c#4 edit
.. //depot/projects/soc2011/mq_bpf/src/sys/net/if_var.h#5 edit
.. //depot/projects/soc2011/mq_bpf/src/sys/net/netisr.c#2 edit
.. //depot/projects/soc2011/mq_bpf/src/sys/net/netisr.h#2 edit
.. //depot/projects/soc2011/mq_bpf/src/sys/net/netisr_internal.h#2 edit
.. //depot/projects/soc2011/mq_bpf/src/sys/netinet/in_rss.c#1 add
.. //depot/projects/soc2011/mq_bpf/src/sys/netinet/in_rss.h#1 add
.. //depot/projects/soc2011/mq_bpf/src/sys/netinet/toeplitz.c#1 add
.. //depot/projects/soc2011/mq_bpf/src/sys/netinet/toeplitz.h#1 add
.. //depot/projects/soc2011/mq_bpf/tests/packet_trace.d#1 add
.. //depot/projects/soc2011/mq_bpf/tests/queue_affinity.c#2 edit
Differences ...
==== //depot/projects/soc2011/mq_bpf/src/sys/conf/files#2 (text+ko) ====
@@ -2713,6 +2713,7 @@
netinet/in_proto.c optional inet | inet6 \
compile-with "${NORMAL_C} -I$S/contrib/pf"
netinet/in_rmx.c optional inet
+netinet/in_rss.c optional inet rss | inet6 rss | inet softrss | inet6 softrss
netinet/ip_divert.c optional inet ipdivert ipfirewall
netinet/ipfw/dn_heap.c optional inet dummynet
netinet/ipfw/dn_sched_fifo.c optional inet dummynet
@@ -2772,6 +2773,7 @@
netinet/tcp_timewait.c optional inet | inet6
netinet/tcp_usrreq.c optional inet | inet6
netinet/udp_usrreq.c optional inet | inet6
+netinet/toeplitz.c optional inet rss | inet6 rss | inet softrss | inet6 softrss
netinet/libalias/alias.c optional libalias inet | netgraph_nat inet
netinet/libalias/alias_db.c optional libalias inet | netgraph_nat inet
netinet/libalias/alias_mod.c optional libalias | netgraph_nat
==== //depot/projects/soc2011/mq_bpf/src/sys/conf/options#2 (text+ko) ====
@@ -421,6 +421,8 @@
NFSLOCKD
RADIX_MPATH opt_mpath.h
ROUTETABLES opt_route.h
+RSS opt_rss.h
+SOFTRSS opt_rss.h
SLIP_IFF_OPTS opt_slip.h
TCPDEBUG
TCP_OFFLOAD_DISABLE opt_inet.h #Disable code to dispatch tcp offloading
@@ -895,4 +897,3 @@
# that "lies" about the amount of RAM it has. Until a cleaner method is
# defined, this option will suffice in overriding what Redboot says.
AR71XX_REALMEM opt_global.h
-
==== //depot/projects/soc2011/mq_bpf/src/sys/dev/e1000/if_em.c#3 (text+ko) ====
==== //depot/projects/soc2011/mq_bpf/src/sys/dev/e1000/if_igb.c#5 (text+ko) ====
@@ -2779,6 +2779,7 @@
ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
ifp->if_capabilities |= IFCAP_TSO4;
ifp->if_capabilities |= IFCAP_JUMBO_MTU;
+ ifp->if_capabilities |= IFCAP_MULTIQUEUE;
ifp->if_capenable = ifp->if_capabilities;
/* Don't enable LRO by default */
==== //depot/projects/soc2011/mq_bpf/src/sys/net/bpf.c#7 (text+ko) ====
@@ -40,6 +40,8 @@
#include "opt_bpf.h"
#include "opt_compat.h"
#include "opt_netgraph.h"
+#include "opt_kdtrace.h"
+#include "opt_rss.h"
#include <sys/types.h>
#include <sys/param.h>
@@ -65,7 +67,6 @@
#include <sys/proc.h>
#include <sys/socket.h>
-#include <sys/syslog.h>
#include <net/if.h>
#include <net/bpf.h>
@@ -76,11 +77,14 @@
#include <net/bpf_zerocopy.h>
#include <net/bpfdesc.h>
#include <net/vnet.h>
-
+#ifdef SOFTRSS
+#include <net/netisr.h>
+#endif
#include <netinet/in.h>
#include <netinet/if_ether.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
+#include <sys/sdt.h>
#include <net80211/ieee80211_freebsd.h>
@@ -199,6 +203,29 @@
.f_event = filt_bpfread,
};
+SDT_PROVIDER_DECLARE(bpf);
+SDT_PROVIDER_DEFINE(bpf);
+SDT_PROBE_DEFINE2(bpf, functions, bpf_tap, entry, entry, "void*", "boolean_t");
+SDT_PROBE_DEFINE3(bpf, functions, bpf_mtap_rx, entry, entry, "void *", "uint32_t", "uint32_t");
+SDT_PROBE_DEFINE3(bpf, functions, bpf_mtap_tx, entry, entry, "void *", "uint32_t", "uint32_t");
+SDT_PROBE_DEFINE3(bpf, functions, bpf_mtap2_rx, entry, entry, "void *", "uint32_t", "uint32_t");
+SDT_PROBE_DEFINE3(bpf, functions, bpf_mtap2_tx, entry, entry, "void *", "uint32_t", "uint32_t");
+SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_biocrxqlen, entry, entry, "int");
+SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_bioctxqlen, entry, entry, "int");
+SDT_PROBE_DEFINE2(bpf, functions, bpfioctl_biocrxqaffinity, entry, entry, "int", "int");
+SDT_PROBE_DEFINE2(bpf, functions, bpfioctl_bioctxqaffinity, entry, entry, "int", "int");
+SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_biocenaqmask, entry, entry, "int");
+SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_biocdisqmask, entry, entry, "int");
+SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_biocstrxqmask, entry, entry, "int");
+SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_bioccrrxqmask, entry, entry, "int");
+SDT_PROBE_DEFINE2(bpf, functions, bpfioctl_biocgtrxqmask, entry, entry, "int", "int");
+SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_biocsttxqmask, entry, entry, "int");
+SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_bioccrtxqmask, entry, entry, "int");
+SDT_PROBE_DEFINE2(bpf, functions, bpfioctl_biocgttxqmask, entry, entry, "int", "int");
+SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_biocstothermask, entry, entry, "int");
+SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_bioccrothermask, entry, entry, "int");
+SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_biocgtothermask, entry, entry, "int");
+
/*
* Wrapper functions for various buffering methods. If the set of buffer
* modes expands, we will probably want to introduce a switch data structure
@@ -1519,318 +1546,388 @@
case BIOCRXQLEN:
{
- log(LOG_DEBUG, "BIOCRXQLEN\n");
- struct ifnet *const ifp = d->bd_bif->bif_ifp;
+ struct ifnet *ifp;
+
+ if (d->bd_bif == NULL) {
+ /*
+ * No interface attached yet.
+ */
+ error = EINVAL;
+ SDT_PROBE1(bpf, functions, bpfioctl_biocrxqlen, entry, -1);
+ break;
+ }
+ ifp = d->bd_bif->bif_ifp;
*(int *)addr = ifp->if_rxq_num;
+ SDT_PROBE1(bpf, functions, bpfioctl_biocrxqlen, entry, ifp->if_rxq_num);
break;
}
case BIOCTXQLEN:
{
- log(LOG_DEBUG, "BIOCTXQLEN\n");
- struct ifnet *const ifp = d->bd_bif->bif_ifp;
+ struct ifnet *ifp;
+
+ if (d->bd_bif == NULL) {
+ /*
+ * No interface attached yet.
+ */
+ error = EINVAL;
+ SDT_PROBE1(bpf, functions, bpfioctl_bioctxqlen, entry, -1);
+ break;
+ }
+ ifp = d->bd_bif->bif_ifp;
*(int *)addr = ifp->if_txq_num;
+ SDT_PROBE1(bpf, functions, bpfioctl_bioctxqlen, entry, ifp->if_txq_num);
break;
}
case BIOCRXQAFFINITY:
{
u_long index;
- log(LOG_DEBUG, "BIOCRXQAFFINITY\n");
+ struct ifnet *ifp;
+
if (d->bd_bif == NULL) {
- log(LOG_DEBUG, "d->bd_bif == NULL\n");
/*
* No interface attached yet.
*/
error = EINVAL;
+ SDT_PROBE2(bpf, functions, bpfioctl_biocrxqaffinity, entry, -1, -1);
break;
}
- struct ifnet *const ifp = d->bd_bif->bif_ifp;
+ ifp = d->bd_bif->bif_ifp;
index = *(u_long *)addr;
if (index > ifp->if_rxq_num) {
- log(LOG_DEBUG, "index too large\n");
+ log(LOG_ERR, "BIOCRXQAFFINITY: index too large index:%lx rxq_num:%d\n", index, ifp->if_rxq_num);
error = EINVAL;
+ SDT_PROBE2(bpf, functions, bpfioctl_biocrxqaffinity, entry, -1, -1);
break;
}
if (!ifp->if_rxq_affinity) {
- log(LOG_DEBUG, "!ifp->if_rxq_affinity\n");
+ log(LOG_ERR, "!ifp->if_rxq_affinity\n");
error = EINVAL;
+ SDT_PROBE2(bpf, functions, bpfioctl_biocrxqaffinity, entry, -1, -1);
break;
}
*(u_long *)addr = ifp->if_rxq_affinity[index];
- log(LOG_DEBUG, "index:%lu result:%lu\n", index, *(u_long *)addr);
+ SDT_PROBE2(bpf, functions, bpfioctl_biocrxqaffinity, entry, index, ifp->if_rxq_affinity[index]);
break;
}
case BIOCTXQAFFINITY:
{
u_long index;
- log(LOG_DEBUG, "BIOCTXQAFFINITY\n");
if (d->bd_bif == NULL) {
- log(LOG_DEBUG, "d->bd_bif == NULL\n");
+ log(LOG_ERR, "d->bd_bif == NULL\n");
/*
* No interface attached yet.
*/
error = EINVAL;
+ SDT_PROBE2(bpf, functions, bpfioctl_bioctxqaffinity, entry, -1, -1);
break;
}
struct ifnet *const ifp = d->bd_bif->bif_ifp;
index = *(u_long *)addr;
if (index > ifp->if_txq_num) {
- log(LOG_DEBUG, "index too large\n");
+ log(LOG_ERR, "BIOCTXQAFFINITY: index too large index:%lx txq_num:%x\n", index, ifp->if_txq_num);
error = EINVAL;
+ SDT_PROBE2(bpf, functions, bpfioctl_bioctxqaffinity, entry, -1, -1);
break;
}
if (!ifp->if_txq_affinity) {
- log(LOG_DEBUG, "!ifp->if_txq_affinity\n");
+ log(LOG_ERR, "!ifp->if_txq_affinity\n");
error = EINVAL;
+ SDT_PROBE2(bpf, functions, bpfioctl_bioctxqaffinity, entry, -1, -1);
break;
}
*(u_long *)addr = ifp->if_txq_affinity[index];
- log(LOG_DEBUG, "index:%lu result:%lu\n", index, *(u_long *)addr);
+ SDT_PROBE2(bpf, functions, bpfioctl_bioctxqaffinity, entry, index, ifp->if_txq_affinity[index]);
break;
}
case BIOCENAQMASK:
{
- log(LOG_DEBUG, "BIOCENAQMASK\n");
+ struct ifnet *ifp;
+
if (d->bd_bif == NULL) {
- log(LOG_DEBUG, "d->bd_bif == NULL\n");
+ log(LOG_ERR, "d->bd_bif == NULL\n");
/*
* No interface attached yet.
*/
error = EINVAL;
+ SDT_PROBE1(bpf, functions, bpfioctl_biocenaqmask, entry, -1);
break;
}
if (d->bd_qmask.qm_enabled) {
- log(LOG_DEBUG, "d->bd_qmask.qm_enabled\n");
+ log(LOG_ERR, "d->bd_qmask.qm_enabled\n");
+ error = EINVAL;
+ SDT_PROBE1(bpf, functions, bpfioctl_biocenaqmask, entry, -1);
+ break;
+ }
+ ifp = d->bd_bif->bif_ifp;
+#ifdef SOFTRSS
+ if (!(ifp->if_capenable & IFCAP_MULTIQUEUE)) {
+ ifp->if_rxq_num = netisr_get_cpucount();
+ ifp->if_capabilities |= IFCAP_MULTIQUEUE;
+ ifp->if_capenable |= IFCAP_MULTIQUEUE;
+ }
+#else
+ if (!(ifp->if_capenable & IFCAP_MULTIQUEUE)) {
+ log(LOG_ERR, "if doesn't support multiqueue");
error = EINVAL;
+ SDT_PROBE1(bpf, functions, bpfioctl_biocenaqmask, entry, -1);
break;
}
- struct ifnet *const ifp = d->bd_bif->bif_ifp;
+#endif
+ log(LOG_DEBUG, "if_rxq_num:%d\n", ifp->if_rxq_num);
+ log(LOG_DEBUG, "if_txq_num:%d\n", ifp->if_txq_num);
d->bd_qmask.qm_enabled = TRUE;
- log(LOG_DEBUG, "ifp->if_rxq_num:%d\n", ifp->if_rxq_num);
d->bd_qmask.qm_rxq_mask =
malloc(ifp->if_rxq_num * sizeof(boolean_t), M_BPF,
M_WAITOK | M_ZERO);
- log(LOG_DEBUG, "ifp->if_txq_num:%d\n", ifp->if_txq_num);
d->bd_qmask.qm_txq_mask =
malloc(ifp->if_txq_num * sizeof(boolean_t), M_BPF,
M_WAITOK | M_ZERO);
d->bd_qmask.qm_other_mask = FALSE;
+ SDT_PROBE1(bpf, functions, bpfioctl_biocenaqmask, entry, ifp->if_rxq_num);
break;
}
case BIOCDISQMASK:
{
- log(LOG_DEBUG, "BIOCDISQMASK\n");
if (d->bd_bif == NULL) {
- log(LOG_DEBUG, "d->bd_bif == NULL\n");
+ log(LOG_ERR, "d->bd_bif == NULL\n");
/*
* No interface attached yet.
*/
error = EINVAL;
+ SDT_PROBE1(bpf, functions, bpfioctl_biocdisqmask, entry, -1);
break;
}
if (!d->bd_qmask.qm_enabled) {
- log(LOG_DEBUG, "!d->bd_qmask.qm_enabled\n");
+ log(LOG_ERR, "!d->bd_qmask.qm_enabled\n");
error = EINVAL;
+ SDT_PROBE1(bpf, functions, bpfioctl_biocdisqmask, entry, -1);
break;
}
d->bd_qmask.qm_enabled = FALSE;
free(d->bd_qmask.qm_rxq_mask, M_BPF);
free(d->bd_qmask.qm_txq_mask, M_BPF);
+ SDT_PROBE1(bpf, functions, bpfioctl_biocdisqmask, entry, 0);
break;
}
case BIOCSTRXQMASK:
{
+ struct ifnet *ifp;
int index;
- log(LOG_DEBUG, "BIOCSTRXQMASK\n");
+
if (d->bd_bif == NULL) {
- log(LOG_DEBUG, "d->bd_bif == NULL\n");
+ log(LOG_ERR, "d->bd_bif == NULL\n");
/*
* No interface attached yet.
*/
- error = EINVAL;
+ error = EINVAL;
+ SDT_PROBE1(bpf, functions, bpfioctl_biocstrxqmask, entry, -1);
break;
}
if (!d->bd_qmask.qm_enabled) {
- log(LOG_DEBUG, "!d->bd_qmask.qm_enabled\n");
+ log(LOG_ERR, "!d->bd_qmask.qm_enabled\n");
error = EINVAL;
+ SDT_PROBE1(bpf, functions, bpfioctl_biocstrxqmask, entry, -1);
break;
}
- struct ifnet *const ifp = d->bd_bif->bif_ifp;
+ ifp = d->bd_bif->bif_ifp;
index = *(uint32_t *)addr;
if (index > ifp->if_rxq_num) {
- log(LOG_DEBUG, "index too large\n");
+ log(LOG_ERR, "BIOCSTRXQMASK: index too large index:%x rxq_num:%x\n", index, ifp->if_rxq_num);
error = EINVAL;
+ SDT_PROBE1(bpf, functions, bpfioctl_biocstrxqmask, entry, -1);
break;
}
- log(LOG_DEBUG, "index:%d\n", index);
d->bd_qmask.qm_rxq_mask[index] = TRUE;
+ SDT_PROBE1(bpf, functions, bpfioctl_biocstrxqmask, entry, index);
break;
}
case BIOCCRRXQMASK:
{
int index;
- log(LOG_DEBUG, "BIOCCRRXQMASK\n");
+ struct ifnet *ifp;
+
if (d->bd_bif == NULL) {
- log(LOG_DEBUG, "d->bd_bif == NULL\n");
+ log(LOG_ERR, "d->bd_bif == NULL\n");
/*
* No interface attached yet.
*/
error = EINVAL;
+ SDT_PROBE1(bpf, functions, bpfioctl_bioccrrxqmask, entry, -1);
break;
}
if (!d->bd_qmask.qm_enabled) {
- log(LOG_DEBUG, "!d->bd_qmask.qm_enabled\n");
+ log(LOG_ERR, "!d->bd_qmask.qm_enabled\n");
error = EINVAL;
+ SDT_PROBE1(bpf, functions, bpfioctl_bioccrrxqmask, entry, -1);
break;
}
- struct ifnet *const ifp = d->bd_bif->bif_ifp;
+ ifp = d->bd_bif->bif_ifp;
index = *(uint32_t *)addr;
if (index > ifp->if_rxq_num) {
- log(LOG_DEBUG, "index too large\n");
+ log(LOG_ERR, "BIOCCRRXQMASK: index too large index:%x rxq_num:%x\n", index, ifp->if_rxq_num);
error = EINVAL;
+ SDT_PROBE1(bpf, functions, bpfioctl_bioccrrxqmask, entry, -1);
break;
}
- log(LOG_DEBUG, "index:%d\n", index);
d->bd_qmask.qm_rxq_mask[index] = FALSE;
+ SDT_PROBE1(bpf, functions, bpfioctl_bioccrrxqmask, entry, index);
break;
}
case BIOCGTRXQMASK:
{
int index;
- log(LOG_DEBUG, "BIOCGTRXQMASK\n");
+ struct ifnet *ifp;
+
if (d->bd_bif == NULL) {
- log(LOG_DEBUG, "d->bd_bif == NULL\n");
+ log(LOG_ERR, "d->bd_bif == NULL\n");
/*
* No interface attached yet.
*/
error = EINVAL;
+ SDT_PROBE2(bpf, functions, bpfioctl_biocgtrxqmask, entry, -1, -1);
break;
}
if (!d->bd_qmask.qm_enabled) {
- log(LOG_DEBUG, "!d->bd_qmask.qm_enabled\n");
+ log(LOG_ERR, "!d->bd_qmask.qm_enabled\n");
error = EINVAL;
+ SDT_PROBE2(bpf, functions, bpfioctl_biocgtrxqmask, entry, -1, -1);
break;
}
- struct ifnet *const ifp = d->bd_bif->bif_ifp;
+ ifp = d->bd_bif->bif_ifp;
index = *(uint32_t *)addr;
if (index > ifp->if_rxq_num) {
- log(LOG_DEBUG, "index too large\n");
+ log(LOG_ERR, "BIOCGTRXQMASK: index too large index:%x rxq_num:%x\n", index, ifp->if_rxq_num);
error = EINVAL;
+ SDT_PROBE1(bpf, functions, bpfioctl_biocgtrxqmask, entry, -1);
break;
}
- log(LOG_DEBUG, "index:%d\n", index);
*(uint32_t *)addr = d->bd_qmask.qm_rxq_mask[index];
+ SDT_PROBE2(bpf, functions, bpfioctl_biocgtrxqmask, entry, index, d->bd_qmask.qm_rxq_mask[index]);
break;
}
case BIOCSTTXQMASK:
{
+ struct ifnet *ifp;
int index;
- log(LOG_DEBUG, "BIOCSTTXQMASK\n");
+
if (d->bd_bif == NULL) {
- log(LOG_DEBUG, "d->bd_bif == NULL\n");
+ log(LOG_ERR, "d->bd_bif == NULL\n");
/*
* No interface attached yet.
*/
error = EINVAL;
+ SDT_PROBE1(bpf, functions, bpfioctl_biocsttxqmask, entry, -1);
break;
}
if (!d->bd_qmask.qm_enabled) {
- log(LOG_DEBUG, "!d->bd_qmask.qm_enabled\n");
+ log(LOG_ERR, "!d->bd_qmask.qm_enabled\n");
error = EINVAL;
+ SDT_PROBE1(bpf, functions, bpfioctl_biocsttxqmask, entry, -1);
break;
}
- struct ifnet *const ifp = d->bd_bif->bif_ifp;
+
+ ifp = d->bd_bif->bif_ifp;
index = *(uint32_t *)addr;
if (index > ifp->if_txq_num) {
- log(LOG_DEBUG, "index too large\n");
+ log(LOG_ERR, "BIOCSTTXQMASK: index too large index:%x txq_num:%x\n", index, ifp->if_txq_num);
error = EINVAL;
+ SDT_PROBE1(bpf, functions, bpfioctl_biocsttxqmask, entry, -1);
break;
}
- log(LOG_DEBUG, "index:%d\n", index);
d->bd_qmask.qm_txq_mask[index] = TRUE;
+ SDT_PROBE1(bpf, functions, bpfioctl_biocsttxqmask, entry, index);
break;
}
case BIOCCRTXQMASK:
{
+ struct ifnet *ifp;
int index;
- log(LOG_DEBUG, "BIOCCRTXQMASK\n");
+
if (d->bd_bif == NULL) {
- log(LOG_DEBUG, "d->bd_bif == NULL\n");
+ log(LOG_ERR, "d->bd_bif == NULL\n");
/*
* No interface attached yet.
*/
error = EINVAL;
+ SDT_PROBE1(bpf, functions, bpfioctl_bioccrtxqmask, entry, -1);
break;
}
if (!d->bd_qmask.qm_enabled) {
- log(LOG_DEBUG, "!d->bd_qmask.qm_enabled\n");
+ log(LOG_ERR, "!d->bd_qmask.qm_enabled\n");
error = EINVAL;
+ SDT_PROBE1(bpf, functions, bpfioctl_bioccrtxqmask, entry, -1);
break;
}
- struct ifnet *const ifp = d->bd_bif->bif_ifp;
+
+ ifp = d->bd_bif->bif_ifp;
index = *(uint32_t *)addr;
if (index > ifp->if_txq_num) {
- log(LOG_DEBUG, "index too large\n");
+ log(LOG_ERR, "BIOCCRTXQMASK: index too large index:%x txq_num:%x\n", index, ifp->if_txq_num);
error = EINVAL;
+ SDT_PROBE1(bpf, functions, bpfioctl_bioccrtxqmask, entry, -1);
break;
}
- log(LOG_DEBUG, "index:%d\n", index);
d->bd_qmask.qm_txq_mask[index] = FALSE;
+ SDT_PROBE1(bpf, functions, bpfioctl_bioccrtxqmask, entry, index);
break;
}
case BIOCGTTXQMASK:
{
int index;
- log(LOG_DEBUG, "BIOCGTTXQMASK\n");
+ struct ifnet *ifp;
+
if (d->bd_bif == NULL) {
- log(LOG_DEBUG, "d->bd_bif == NULL\n");
+ log(LOG_ERR, "d->bd_bif == NULL\n");
/*
* No interface attached yet.
*/
error = EINVAL;
+ SDT_PROBE2(bpf, functions, bpfioctl_biocgttxqmask, entry, -1, -1);
break;
}
if (!d->bd_qmask.qm_enabled) {
- log(LOG_DEBUG, "!d->bd_qmask.qm_enabled\n");
+ log(LOG_ERR, "!d->bd_qmask.qm_enabled\n");
error = EINVAL;
+ SDT_PROBE2(bpf, functions, bpfioctl_biocgttxqmask, entry, -1, -1);
break;
}
- struct ifnet *const ifp = d->bd_bif->bif_ifp;
+ ifp = d->bd_bif->bif_ifp;
index = *(uint32_t *)addr;
if (index > ifp->if_txq_num) {
- log(LOG_DEBUG, "index too large\n");
+ log(LOG_ERR, "BIOCGTTXQMASK: index too large index:%x txq_num:%x\n", index, ifp->if_txq_num);
error = EINVAL;
+ SDT_PROBE2(bpf, functions, bpfioctl_biocgttxqmask, entry, -1, -1);
break;
}
- log(LOG_DEBUG, "index:%d\n", index);
*(uint32_t *)addr = d->bd_qmask.qm_txq_mask[index];
+ SDT_PROBE2(bpf, functions, bpfioctl_biocgttxqmask, entry, index, d->bd_qmask.qm_txq_mask[index]);
break;
}
case BIOCSTOTHERMASK:
- log(LOG_DEBUG, "BIOSTOTHERMASK\n");
d->bd_qmask.qm_other_mask = TRUE;
+ SDT_PROBE1(bpf, functions, bpfioctl_biocstothermask, entry, 1);
break;
case BIOCCROTHERMASK:
- log(LOG_DEBUG, "BIOCCROTHERMASK\n");
d->bd_qmask.qm_other_mask = FALSE;
+ SDT_PROBE1(bpf, functions, bpfioctl_bioccrothermask, entry, 0);
break;
case BIOCGTOTHERMASK:
- log(LOG_DEBUG, "BIOCGTOTHERMASK\n");
- log(LOG_DEBUG, "mask:%d\n", d->bd_qmask.qm_other_mask);
*(uint32_t *)addr = (uint32_t)d->bd_qmask.qm_other_mask;
+ SDT_PROBE1(bpf, functions, bpfioctl_biocgtothermask, entry, d->bd_qmask.qm_other_mask);
break;
}
CURVNET_RESTORE();
@@ -2144,8 +2241,7 @@
BPFIF_LOCK(bp);
LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
if (d->bd_qmask.qm_enabled) {
- log(LOG_DEBUG, "bpf_tap other_mask:%d\n",
- d->bd_qmask.qm_other_mask);
+ SDT_PROBE2(bpf, functions, bpf_tap, entry, d, d->bd_qmask.qm_other_mask);
if (!d->bd_qmask.qm_other_mask)
continue;
}
@@ -2195,13 +2291,6 @@
u_int pktlen, slen;
int gottime;
-#if 0
- if (m->m_pkthdr.txqid != (uint32_t)-1 && m->m_pkthdr.txqid != PCPU_GET(cpuid))
- log(LOG_DEBUG, "txqid:%d cpuid:%d\n", m->m_pkthdr.txqid, PCPU_GET(cpuid));
-#endif
- if (m->m_pkthdr.rxqid != (uint32_t)-1 && m->m_pkthdr.rxqid != PCPU_GET(cpuid))
- log(LOG_DEBUG, "rxqid:%d cpuid:%d\n", m->m_pkthdr.rxqid, PCPU_GET(cpuid));
-
/* Skip outgoing duplicate packets. */
if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
m->m_flags &= ~M_PROMISC;
@@ -2214,18 +2303,30 @@
BPFIF_LOCK(bp);
LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
if (d->bd_qmask.qm_enabled) {
-/*
- log(LOG_DEBUG, "bpf_mtap rxqid:%x txqid:%x rxqmask:%x txqmask:%x\n",
- m->m_pkthdr.rxqid, m->m_pkthdr.txqid,
- d->bd_qmask.qm_rxq_mask[m->m_pkthdr.rxqid],
- d->bd_qmask.qm_txq_mask[m->m_pkthdr.txqid]);
-*/
- if (m->m_pkthdr.rxqid != (uint32_t)-1 &&
- !d->bd_qmask.qm_rxq_mask[m->m_pkthdr.rxqid])
- continue;
- if (m->m_pkthdr.txqid != (uint32_t)-1 &&
- !d->bd_qmask.qm_txq_mask[m->m_pkthdr.txqid])
- continue;
+ if (!(m->m_flags & M_FLOWID)) {
+ log(LOG_DEBUG, "m:%p ifp:%p !(m->flags & M_FLOWID)\n",
+ m, m->m_pkthdr.rcvif);
+ if (!d->bd_qmask.qm_other_mask)
+ continue;
+ } else {
+ if (m->m_pkthdr.rxqid != (uint32_t)-1)
+ KASSERT(m->m_pkthdr.rxqid < bp->bif_ifp->if_rxq_num,
+ ("rxqid is not vaild rxqid:%x rxq_num:%x",
+ m->m_pkthdr.rxqid, bp->bif_ifp->if_rxq_num));
+ if (m->m_pkthdr.txqid != (uint32_t)-1)
+ KASSERT(m->m_pkthdr.txqid < bp->bif_ifp->if_txq_num,
+ ("txqid is not vaild txqid:%x txq_num:%x",
+ m->m_pkthdr.txqid, bp->bif_ifp->if_txq_num));
+
+ SDT_PROBE3(bpf, functions, bpf_mtap_rx, entry, d, m->m_pkthdr.rxqid, bp->bif_ifp->if_rxq_num);
+ SDT_PROBE3(bpf, functions, bpf_mtap_tx, entry, d, m->m_pkthdr.txqid, bp->bif_ifp->if_txq_num);
+ if (m->m_pkthdr.rxqid != (uint32_t)-1 &&
+ !d->bd_qmask.qm_rxq_mask[m->m_pkthdr.rxqid])
+ continue;
+ if (m->m_pkthdr.txqid != (uint32_t)-1 &&
+ !d->bd_qmask.qm_txq_mask[m->m_pkthdr.txqid])
+ continue;
+ }
}
if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
continue;
@@ -2267,13 +2368,6 @@
u_int pktlen, slen;
int gottime;
-#if 0
- if (m->m_pkthdr.txqid != (uint32_t)-1 && m->m_pkthdr.txqid != PCPU_GET(cpuid))
- log(LOG_DEBUG, "txqid:%d cpuid:%d\n", m->m_pkthdr.txqid, PCPU_GET(cpuid));
-#endif
- if (m->m_pkthdr.rxqid != (uint32_t)-1 && m->m_pkthdr.rxqid != PCPU_GET(cpuid))
- log(LOG_DEBUG, "rxqid:%d cpuid:%d\n", m->m_pkthdr.rxqid, PCPU_GET(cpuid));
-
/* Skip outgoing duplicate packets. */
if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
m->m_flags &= ~M_PROMISC;
@@ -2295,10 +2389,8 @@
BPFIF_LOCK(bp);
LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
if (d->bd_qmask.qm_enabled) {
- log(LOG_DEBUG, "bpf_mtap2 rxqid:%x txqid:%x rxqmask:%x txqmask:%x\n",
- m->m_pkthdr.rxqid, m->m_pkthdr.txqid,
- d->bd_qmask.qm_rxq_mask[m->m_pkthdr.rxqid],
- d->bd_qmask.qm_txq_mask[m->m_pkthdr.txqid]);
+ SDT_PROBE3(bpf, functions, bpf_mtap2_rx, entry, d, m->m_pkthdr.rxqid, bp->bif_ifp->if_rxq_num);
+ SDT_PROBE3(bpf, functions, bpf_mtap2_tx, entry, d, m->m_pkthdr.txqid, bp->bif_ifp->if_txq_num);
if (m->m_pkthdr.rxqid != (uint32_t)-1 &&
!d->bd_qmask.qm_rxq_mask[m->m_pkthdr.rxqid])
==== //depot/projects/soc2011/mq_bpf/src/sys/net/bpf.h#4 (text+ko) ====
==== //depot/projects/soc2011/mq_bpf/src/sys/net/bpfdesc.h#3 (text+ko) ====
==== //depot/projects/soc2011/mq_bpf/src/sys/net/if.h#2 (text+ko) ====
@@ -220,6 +220,7 @@
#define IFCAP_POLLING_NOCOUNT 0x20000 /* polling ticks cannot be fragmented */
#define IFCAP_VLAN_HWTSO 0x40000 /* can do IFCAP_TSO on VLANs */
#define IFCAP_LINKSTATE 0x80000 /* the runtime link state is dynamic */
+#define IFCAP_MULTIQUEUE 0x100000
#define IFCAP_HWCSUM (IFCAP_RXCSUM | IFCAP_TXCSUM)
#define IFCAP_TSO (IFCAP_TSO4 | IFCAP_TSO6)
==== //depot/projects/soc2011/mq_bpf/src/sys/net/if_ethersubr.c#2 (text+ko) ====
@@ -36,6 +36,7 @@
#include "opt_ipx.h"
#include "opt_netgraph.h"
#include "opt_mbuf_profiling.h"
+#include "opt_rss.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -69,6 +70,7 @@
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/if_ether.h>
+#include <netinet/in_rss.h>
#include <netinet/ip_carp.h>
#include <netinet/ip_var.h>
#include <netinet/ip_fw.h>
@@ -106,6 +108,9 @@
CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN);
#endif
+SYSCTL_DECL(_net_link);
+SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet");
+
/* netgraph node hooks for ng_ether(4) */
void (*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp);
void (*ng_ether_input_orphan_p)(struct ifnet *ifp, struct mbuf *m);
@@ -561,7 +566,7 @@
* mbuf chain m with the ethernet header at the front.
*/
static void
-ether_input(struct ifnet *ifp, struct mbuf *m)
+ether_input_internal(struct ifnet *ifp, struct mbuf *m)
{
struct ether_header *eh;
u_short etype;
@@ -754,6 +759,77 @@
CURVNET_RESTORE();
}
+#if defined(RSS) || defined(SOFTRSS)
+/*
+ * Ethernet input dispatch; by default, direct dispatch here regardless of
+ * global configuration. However, if RSS is enabled, hook up RSS affinity
+ * so that when deferred or hybrid dispatch is enabled, we can redistribute
+ * load based on RSS.
+ *
+ * XXXRW: Would be nice if the ifnet passed up a flag indicating whether or
+ * not it had already done work distribution via multi-queue. Then we could
+ * direct dispatch in the event load balancing was already complete and
+ * handle the case of interfaces with different capabilities better.
+ *
+ * XXXRW: Sort of want an M_DISTRIBUTED flag to avoid multiple distributions
+ * at multiple layers?
+ *
+ * XXXRW: For now, enable all this only if RSS is compiled in, although it
+ * works fine without RSS. Need to characterise the performance overhead
+ * of the detour through the netisr code in the event the result is always
+ * direct dispatch.
+ */
+static void
+ether_nh_input(struct mbuf *m)
+{
+
+ ether_input_internal(m->m_pkthdr.rcvif, m);
+}
+
+static struct netisr_handler ether_nh = {
+ .nh_name = "ether",
+ .nh_handler = ether_nh_input,
+ .nh_proto = NETISR_ETHER,
+#if defined(RSS) || defined(SOFTRSS)
+ .nh_policy = NETISR_POLICY_CPU,
+#if defined(RSS)
+ .nh_dispatch = NETISR_DISPATCH_DIRECT,
+#else
+ .nh_dispatch = NETISR_DISPATCH_HYBRID,
+#endif
+ .nh_m2cpuid = rss_m2cpuid,
+#else
+ .nh_policy = NETISR_POLICY_SOURCE,
+ .nh_dispatch = NETISR_DISPATCH_DIRECT,
+#endif
+};
+
+static void
+ether_init(__unused void *arg)
+{
+
+ netisr_register(ðer_nh);
+}
+SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL);
+#endif /* RSS || SOFTRSS */
+#include <sys/syslog.h>
+static void
+ether_input(struct ifnet *ifp, struct mbuf *m)
+{
+ log(LOG_DEBUG, "%s ifp:%p m:%p\n", __func__, ifp, m);
+#if defined(RSS) || defined(SOFTRSS)
+ /*
+ * We will rely on rcvif being set properly in the deferred context,
+ * so assert it is correct here.
+ */
+ KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch", __func__));
+
+ netisr_dispatch(NETISR_ETHER, m);
+#else
+ ether_input_internal(ifp, m);
+#endif
+}
+
/*
* Upper layer processing for a received Ethernet packet.
*/
@@ -1008,8 +1084,6 @@
}
#endif
-SYSCTL_DECL(_net_link);
-SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet");
#if defined(INET) || defined(INET6)
SYSCTL_VNET_INT(_net_link_ether, OID_AUTO, ipfw, CTLFLAG_RW,
&VNET_NAME(ether_ipfw), 0, "Pass ether pkts through firewall");
==== //depot/projects/soc2011/mq_bpf/src/sys/net/if_tap.c#4 (text+ko) ====
@@ -54,7 +54,6 @@
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/sysctl.h>
-#include <sys/syslog.h>
#include <sys/systm.h>
#include <sys/ttycom.h>
#include <sys/uio.h>
@@ -870,10 +869,6 @@
} while (m == NULL);
mtx_unlock(&tp->tap_mtx);
- m->m_pkthdr.rxqid = (uint32_t)-1;
- m->m_pkthdr.txqid = PCPU_GET(cpuid);
- log(LOG_DEBUG, "%s rxqid:%x txqid:%x\n", __func__, m->m_pkthdr.rxqid, m->m_pkthdr.txqid);
-
/* feed packet to bpf */
BPF_MTAP(ifp, m);
@@ -948,10 +943,6 @@
return (0);
}
- m->m_pkthdr.rxqid = PCPU_GET(cpuid);
- m->m_pkthdr.txqid = (uint32_t)-1;
- log(LOG_DEBUG, "%s rxqid:%x txqid:%x\n", __func__, m->m_pkthdr.rxqid, m->m_pkthdr.txqid);
-
/* Pass packet up to parent. */
(*ifp->if_input)(ifp, m);
ifp->if_ipackets ++; /* ibytes are counted in parent */
@@ -1090,3 +1081,4 @@
knlist_remove(&tp->tap_rsel.si_note, kn, 0);
} /* tapkqdetach */
+
==== //depot/projects/soc2011/mq_bpf/src/sys/net/if_var.h#5 (text+ko) ====
==== //depot/projects/soc2011/mq_bpf/src/sys/net/netisr.c#2 (text+ko) ====
@@ -1,6 +1,6 @@
/*-
* Copyright (c) 2007-2009 Robert N. M. Watson
- * Copyright (c) 2010 Juniper Networks, Inc.
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
* All rights reserved.
*
* This software was developed by Robert N. M. Watson under contract
@@ -65,6 +65,7 @@
#include "opt_ddb.h"
#include "opt_device_polling.h"
+#include "opt_kdtrace.h"
#include <sys/param.h>
#include <sys/bus.h>
@@ -82,6 +83,8 @@
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
+#include <sys/sdt.h>
+#include <sys/syslog.h>
#ifdef DDB
#include <ddb/ddb.h>
@@ -94,6 +97,31 @@
#include <net/netisr_internal.h>
#include <net/vnet.h>
+/*
+ * Locking strategy: three types of locks protect netisr processing:
+ *
+ * netisr configuration lock - serializes "rethreading" events, in which the
+ * number of worker threads is changed.
+ *
+ * netisr_rmlock - stabilizes the netisr system for network processing,
+ * almost always acquired as a read lock (except during configuration
+ * changes).
+ *
+ * nws_mtx - per-workstream lock that serializes access to queues.
+ */
+
+/*
+ * netisr configuration lock: serialize rethread events, in which the thread
+ * count may be increased and decreased, to avoid interlacing of these
+ * events, which might expose incompletely started or stopped threads, etc.
+ * This is a sleep lock so that it can be held over ithread start/stop.
+ */
+static struct sx netisr_config_sx;
+#define NETISR_CONFIG_LOCK_INIT() sx_init(&netisr_config_sx, \
+ "netisr_config_sx")
+#define NETISR_CONFIG_LOCK() sx_xlock(&netisr_config_sx)
+#define NETISR_CONFIG_UNLOCK() sx_xunlock(&netisr_config_sx)
+
/*-
* Synchronize use and modification of the registered netisr data structures;
* acquire a read lock while modifying the set of registered protocols to
@@ -114,51 +142,74 @@
*
* XXXRW: rmlocks don't support assertions.
*/
+#define NETISR_RMLOCKING
+
+#ifdef NETISR_RMLOCKING
static struct rmlock netisr_rmlock;
#define NETISR_LOCK_INIT() rm_init_flags(&netisr_rmlock, "netisr", \
- RM_NOWITNESS)
+ RM_NOWITNESS | RM_RECURSE)
#define NETISR_LOCK_ASSERT()
#define NETISR_RLOCK(tracker) rm_rlock(&netisr_rmlock, (tracker))
#define NETISR_RUNLOCK(tracker) rm_runlock(&netisr_rmlock, (tracker))
#define NETISR_WLOCK() rm_wlock(&netisr_rmlock)
#define NETISR_WUNLOCK() rm_wunlock(&netisr_rmlock)
-/* #define NETISR_LOCKING */
+#else
+#define NETISR_LOCK_INIT()
+#define NETISR_LOCK_ASSERT()
+#define NETISR_RLOCK(x)
+#define NETISR_RUNLOCK(x)
+#define NETISR_WLOCK()
+#define NETISR_WUNLOCK()
+#endif
SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr");
/*-
- * Three direct dispatch policies are supported:
+ * Three global direct dispatch policies are supported:
*
- * - Always defer: all work is scheduled for a netisr, regardless of context.
- * (!direct)
+ * NETISR_DISPATCH_QUEUED: All work is deferred for a netisr, regardless of
+ * context (may be overriden by protocols).
*
- * - Hybrid: if the executing context allows direct dispatch, and we're
- * running on the CPU the work would be done on, then direct dispatch if it
- * wouldn't violate ordering constraints on the workstream.
- * (direct && !direct_force)
+ * NETISR_DISPATCH_HYBRID: If the executing context allows direct dispatch,
+ * and we're running on the CPU the work would be performed on, then direct
+ * dispatch it if it wouldn't violate ordering constraints on the workstream.
*
- * - Always direct: if the executing context allows direct dispatch, always
- * direct dispatch. (direct && direct_force)
+ * NETISR_DISPATCH_DIRECT: If the executing context allows direct dispatch,
+ * always direct dispatch. (The default.)
*
* Notice that changing the global policy could lead to short periods of
* misordered processing, but this is considered acceptable as compared to
- * the complexity of enforcing ordering during policy changes.
+ * the complexity of enforcing ordering during policy changes. Protocols can
+ * override the global policy (when they're not doing that, they select
+ * NETISR_DISPATCH_DEFAULT).
+ */
+#define NETISR_DISPATCH_POLICY_DEFAULT NETISR_DISPATCH_DIRECT
+#define NETISR_DISPATCH_POLICY_MAXSTR 20 /* Used for temporary buffers. */
+static u_int netisr_dispatch_policy = NETISR_DISPATCH_POLICY_DEFAULT;
+static int sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS);
>>> TRUNCATED FOR MAIL (1000 lines) <<<
More information about the p4-projects
mailing list