PERFORCE change 194710 for review

Takuya ASADA syuu at FreeBSD.org
Mon Jun 13 22:55:13 UTC 2011


http://p4web.freebsd.org/@@194710?ac=10

Change 194710 by syuu at x200 on 2011/06/13 22:54:53

	Reverted changes for if_tap.
	RSS ported from rwatson/tcp branch, SOFTRSS implemented(Refactored version of RPS, part of Kazuya GODA's GSoC project).
	Implemented bpf multiqueue emulation on SOFTRSS.

Affected files ...

.. //depot/projects/soc2011/mq_bpf/src/sys/amd64/conf/RSS#1 add
.. //depot/projects/soc2011/mq_bpf/src/sys/amd64/conf/SOFTRSS#1 add
.. //depot/projects/soc2011/mq_bpf/src/sys/conf/files#2 edit
.. //depot/projects/soc2011/mq_bpf/src/sys/conf/options#2 edit
.. //depot/projects/soc2011/mq_bpf/src/sys/dev/e1000/if_em.c#3 edit
.. //depot/projects/soc2011/mq_bpf/src/sys/dev/e1000/if_igb.c#5 edit
.. //depot/projects/soc2011/mq_bpf/src/sys/net/bpf.c#7 edit
.. //depot/projects/soc2011/mq_bpf/src/sys/net/bpf.h#4 edit
.. //depot/projects/soc2011/mq_bpf/src/sys/net/bpfdesc.h#3 edit
.. //depot/projects/soc2011/mq_bpf/src/sys/net/if.h#2 edit
.. //depot/projects/soc2011/mq_bpf/src/sys/net/if_ethersubr.c#2 edit
.. //depot/projects/soc2011/mq_bpf/src/sys/net/if_tap.c#4 edit
.. //depot/projects/soc2011/mq_bpf/src/sys/net/if_var.h#5 edit
.. //depot/projects/soc2011/mq_bpf/src/sys/net/netisr.c#2 edit
.. //depot/projects/soc2011/mq_bpf/src/sys/net/netisr.h#2 edit
.. //depot/projects/soc2011/mq_bpf/src/sys/net/netisr_internal.h#2 edit
.. //depot/projects/soc2011/mq_bpf/src/sys/netinet/in_rss.c#1 add
.. //depot/projects/soc2011/mq_bpf/src/sys/netinet/in_rss.h#1 add
.. //depot/projects/soc2011/mq_bpf/src/sys/netinet/toeplitz.c#1 add
.. //depot/projects/soc2011/mq_bpf/src/sys/netinet/toeplitz.h#1 add
.. //depot/projects/soc2011/mq_bpf/tests/packet_trace.d#1 add
.. //depot/projects/soc2011/mq_bpf/tests/queue_affinity.c#2 edit

Differences ...

==== //depot/projects/soc2011/mq_bpf/src/sys/conf/files#2 (text+ko) ====

@@ -2713,6 +2713,7 @@
 netinet/in_proto.c		optional inet | inet6 \
 	compile-with "${NORMAL_C} -I$S/contrib/pf"
 netinet/in_rmx.c		optional inet
+netinet/in_rss.c		optional inet rss | inet6 rss | inet softrss | inet6 softrss
 netinet/ip_divert.c		optional inet ipdivert ipfirewall
 netinet/ipfw/dn_heap.c		optional inet dummynet
 netinet/ipfw/dn_sched_fifo.c	optional inet dummynet
@@ -2772,6 +2773,7 @@
 netinet/tcp_timewait.c		optional inet | inet6
 netinet/tcp_usrreq.c		optional inet | inet6
 netinet/udp_usrreq.c		optional inet | inet6
+netinet/toeplitz.c		optional inet rss | inet6 rss | inet softrss | inet6 softrss
 netinet/libalias/alias.c	optional libalias inet | netgraph_nat inet
 netinet/libalias/alias_db.c	optional libalias inet | netgraph_nat inet
 netinet/libalias/alias_mod.c	optional libalias | netgraph_nat

==== //depot/projects/soc2011/mq_bpf/src/sys/conf/options#2 (text+ko) ====

@@ -421,6 +421,8 @@
 NFSLOCKD
 RADIX_MPATH		opt_mpath.h
 ROUTETABLES		opt_route.h
+RSS			opt_rss.h
+SOFTRSS			opt_rss.h
 SLIP_IFF_OPTS		opt_slip.h
 TCPDEBUG
 TCP_OFFLOAD_DISABLE	opt_inet.h #Disable code to dispatch tcp offloading
@@ -895,4 +897,3 @@
 # that "lies" about the amount of RAM it has. Until a cleaner method is
 # defined, this option will suffice in overriding what Redboot says.
 AR71XX_REALMEM    opt_global.h
-

==== //depot/projects/soc2011/mq_bpf/src/sys/dev/e1000/if_em.c#3 (text+ko) ====


==== //depot/projects/soc2011/mq_bpf/src/sys/dev/e1000/if_igb.c#5 (text+ko) ====

@@ -2779,6 +2779,7 @@
 	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
 	ifp->if_capabilities |= IFCAP_TSO4;
 	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
+	ifp->if_capabilities |= IFCAP_MULTIQUEUE;
 	ifp->if_capenable = ifp->if_capabilities;
 
 	/* Don't enable LRO by default */

==== //depot/projects/soc2011/mq_bpf/src/sys/net/bpf.c#7 (text+ko) ====

@@ -40,6 +40,8 @@
 #include "opt_bpf.h"
 #include "opt_compat.h"
 #include "opt_netgraph.h"
+#include "opt_kdtrace.h"
+#include "opt_rss.h"
 
 #include <sys/types.h>
 #include <sys/param.h>
@@ -65,7 +67,6 @@
 #include <sys/proc.h>
 
 #include <sys/socket.h>
-#include <sys/syslog.h>
 
 #include <net/if.h>
 #include <net/bpf.h>
@@ -76,11 +77,14 @@
 #include <net/bpf_zerocopy.h>
 #include <net/bpfdesc.h>
 #include <net/vnet.h>
-
+#ifdef SOFTRSS
+#include <net/netisr.h>
+#endif
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
+#include <sys/sdt.h>
 
 #include <net80211/ieee80211_freebsd.h>
 
@@ -199,6 +203,29 @@
 	.f_event = filt_bpfread,
 };
 
+SDT_PROVIDER_DECLARE(bpf);
+SDT_PROVIDER_DEFINE(bpf);
+SDT_PROBE_DEFINE2(bpf, functions, bpf_tap, entry, entry, "void*", "boolean_t");
+SDT_PROBE_DEFINE3(bpf, functions, bpf_mtap_rx, entry, entry, "void *", "uint32_t", "uint32_t");
+SDT_PROBE_DEFINE3(bpf, functions, bpf_mtap_tx, entry, entry, "void *", "uint32_t", "uint32_t");
+SDT_PROBE_DEFINE3(bpf, functions, bpf_mtap2_rx, entry, entry, "void *", "uint32_t", "uint32_t");
+SDT_PROBE_DEFINE3(bpf, functions, bpf_mtap2_tx, entry, entry, "void *", "uint32_t", "uint32_t");
+SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_biocrxqlen, entry, entry, "int");
+SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_bioctxqlen, entry, entry, "int");
+SDT_PROBE_DEFINE2(bpf, functions, bpfioctl_biocrxqaffinity, entry, entry, "int", "int");
+SDT_PROBE_DEFINE2(bpf, functions, bpfioctl_bioctxqaffinity, entry, entry, "int", "int");
+SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_biocenaqmask, entry, entry, "int");
+SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_biocdisqmask, entry, entry, "int");
+SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_biocstrxqmask, entry, entry, "int");
+SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_bioccrrxqmask, entry, entry, "int");
+SDT_PROBE_DEFINE2(bpf, functions, bpfioctl_biocgtrxqmask, entry, entry, "int", "int");
+SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_biocsttxqmask, entry, entry, "int");
+SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_bioccrtxqmask, entry, entry, "int");
+SDT_PROBE_DEFINE2(bpf, functions, bpfioctl_biocgttxqmask, entry, entry, "int", "int");
+SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_biocstothermask, entry, entry, "int");
+SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_bioccrothermask, entry, entry, "int");
+SDT_PROBE_DEFINE1(bpf, functions, bpfioctl_biocgtothermask, entry, entry, "int");
+
 /*
  * Wrapper functions for various buffering methods.  If the set of buffer
  * modes expands, we will probably want to introduce a switch data structure
@@ -1519,318 +1546,388 @@
 
 	case BIOCRXQLEN:
 		{
-			log(LOG_DEBUG, "BIOCRXQLEN\n");
-			struct ifnet *const ifp = d->bd_bif->bif_ifp;
+			struct ifnet *ifp;
+
+			if (d->bd_bif == NULL) {
+				/*
+				 * No interface attached yet.
+				 */
+				error = EINVAL;
+				SDT_PROBE1(bpf, functions, bpfioctl_biocrxqlen, entry, -1);
+				break;
+			}
+			ifp = d->bd_bif->bif_ifp;
 			*(int *)addr = ifp->if_rxq_num;
+			SDT_PROBE1(bpf, functions, bpfioctl_biocrxqlen, entry, ifp->if_rxq_num);
 			break;
 		}
 
 	case BIOCTXQLEN:
 		{
-			log(LOG_DEBUG, "BIOCTXQLEN\n");
-			struct ifnet *const ifp = d->bd_bif->bif_ifp;
+			struct ifnet *ifp;
+
+			if (d->bd_bif == NULL) {
+				/*
+				 * No interface attached yet.
+				 */
+				error = EINVAL;
+				SDT_PROBE1(bpf, functions, bpfioctl_bioctxqlen, entry, -1);
+				break;
+			}
+			ifp = d->bd_bif->bif_ifp;
 			*(int *)addr = ifp->if_txq_num;
+			SDT_PROBE1(bpf, functions, bpfioctl_bioctxqlen, entry, ifp->if_txq_num);
 			break;
 		}
 
 	case BIOCRXQAFFINITY:
 		{
 			u_long index;
-			log(LOG_DEBUG, "BIOCRXQAFFINITY\n");
+			struct ifnet *ifp;
+
 			if (d->bd_bif == NULL) {
-				log(LOG_DEBUG, "d->bd_bif == NULL\n");
 				/*
 				 * No interface attached yet.
 				 */
 				error = EINVAL;
+				SDT_PROBE2(bpf, functions, bpfioctl_biocrxqaffinity, entry, -1, -1);
 				break;
 			}
-			struct ifnet *const ifp = d->bd_bif->bif_ifp;
+			ifp = d->bd_bif->bif_ifp;
 			index = *(u_long *)addr;
 			if (index > ifp->if_rxq_num) {
-				log(LOG_DEBUG, "index too large\n");
+				log(LOG_ERR, "BIOCRXQAFFINITY: index too large index:%lx rxq_num:%d\n", index, ifp->if_rxq_num);
 				error = EINVAL;
+				SDT_PROBE2(bpf, functions, bpfioctl_biocrxqaffinity, entry, -1, -1);
 				break;
 			}
 			if (!ifp->if_rxq_affinity) {
-				log(LOG_DEBUG, "!ifp->if_rxq_affinity\n");
+				log(LOG_ERR, "!ifp->if_rxq_affinity\n");
 				error = EINVAL;
+				SDT_PROBE2(bpf, functions, bpfioctl_biocrxqaffinity, entry, -1, -1);
 				break;
 			}
 			*(u_long *)addr = ifp->if_rxq_affinity[index];
-			log(LOG_DEBUG, "index:%lu result:%lu\n", index, *(u_long *)addr);
+			SDT_PROBE2(bpf, functions, bpfioctl_biocrxqaffinity, entry, index, ifp->if_rxq_affinity[index]);
 			break;
 		}
 
 	case BIOCTXQAFFINITY:
 		{
 			u_long index;
-			log(LOG_DEBUG, "BIOCTXQAFFINITY\n");
 			if (d->bd_bif == NULL) {
-				log(LOG_DEBUG, "d->bd_bif == NULL\n");
+				log(LOG_ERR, "d->bd_bif == NULL\n");
 				/*
 				 * No interface attached yet.
 				 */
 				error = EINVAL;
+				SDT_PROBE2(bpf, functions, bpfioctl_bioctxqaffinity, entry, -1, -1);
 				break;
 			}
 			struct ifnet *const ifp = d->bd_bif->bif_ifp;
 			index = *(u_long *)addr;
 			if (index > ifp->if_txq_num) {
-				log(LOG_DEBUG, "index too large\n");
+				log(LOG_ERR, "BIOCTXQAFFINITY: index too large index:%lx txq_num:%x\n", index, ifp->if_txq_num);
 				error = EINVAL;
+				SDT_PROBE2(bpf, functions, bpfioctl_bioctxqaffinity, entry, -1, -1);
 				break;
 			}
 			if (!ifp->if_txq_affinity) {
-				log(LOG_DEBUG, "!ifp->if_txq_affinity\n");
+				log(LOG_ERR, "!ifp->if_txq_affinity\n");
 				error = EINVAL;
+				SDT_PROBE2(bpf, functions, bpfioctl_bioctxqaffinity, entry, -1, -1);
 				break;
 			}
 			*(u_long *)addr = ifp->if_txq_affinity[index];
-			log(LOG_DEBUG, "index:%lu result:%lu\n", index, *(u_long *)addr);
+			SDT_PROBE2(bpf, functions, bpfioctl_bioctxqaffinity, entry, index, ifp->if_txq_affinity[index]);
 			break;
 		}
 
 	case BIOCENAQMASK:
 		{
-			log(LOG_DEBUG, "BIOCENAQMASK\n");
+			struct ifnet *ifp;
+
 			if (d->bd_bif == NULL) {
-				log(LOG_DEBUG, "d->bd_bif == NULL\n");
+				log(LOG_ERR, "d->bd_bif == NULL\n");
 				/*
 				 * No interface attached yet.
 				 */
 				error = EINVAL;
+				SDT_PROBE1(bpf, functions, bpfioctl_biocenaqmask, entry, -1);
 				break;
 			}
 			if (d->bd_qmask.qm_enabled) {
-				log(LOG_DEBUG, "d->bd_qmask.qm_enabled\n");
+				log(LOG_ERR, "d->bd_qmask.qm_enabled\n");
+				error = EINVAL;
+				SDT_PROBE1(bpf, functions, bpfioctl_biocenaqmask, entry, -1);
+				break;
+			}
+			ifp = d->bd_bif->bif_ifp;
+#ifdef SOFTRSS
+			if (!(ifp->if_capenable & IFCAP_MULTIQUEUE)) {
+				ifp->if_rxq_num = netisr_get_cpucount();
+				ifp->if_capabilities |= IFCAP_MULTIQUEUE;
+				ifp->if_capenable |= IFCAP_MULTIQUEUE;
+			}
+#else
+			if (!(ifp->if_capenable & IFCAP_MULTIQUEUE)) {
+				log(LOG_ERR, "if doesn't support multiqueue");
 				error = EINVAL;
+				SDT_PROBE1(bpf, functions, bpfioctl_biocenaqmask, entry, -1);
 				break;
 			}
-			struct ifnet *const ifp = d->bd_bif->bif_ifp;
+#endif
+			log(LOG_DEBUG, "if_rxq_num:%d\n", ifp->if_rxq_num);
+			log(LOG_DEBUG, "if_txq_num:%d\n", ifp->if_txq_num);
 			d->bd_qmask.qm_enabled = TRUE;
-			log(LOG_DEBUG, "ifp->if_rxq_num:%d\n", ifp->if_rxq_num);
 			d->bd_qmask.qm_rxq_mask =
 				malloc(ifp->if_rxq_num * sizeof(boolean_t), M_BPF, 
 					M_WAITOK | M_ZERO);
-			log(LOG_DEBUG, "ifp->if_txq_num:%d\n", ifp->if_txq_num);
 			d->bd_qmask.qm_txq_mask =
 				malloc(ifp->if_txq_num * sizeof(boolean_t), M_BPF, 
 					M_WAITOK | M_ZERO);
 			d->bd_qmask.qm_other_mask = FALSE;
+			SDT_PROBE1(bpf, functions, bpfioctl_biocenaqmask, entry, ifp->if_rxq_num);
 			break;
 		}
 
 	case BIOCDISQMASK:
 		{
-			log(LOG_DEBUG, "BIOCDISQMASK\n");
 			if (d->bd_bif == NULL) {
-				log(LOG_DEBUG, "d->bd_bif == NULL\n");
+				log(LOG_ERR, "d->bd_bif == NULL\n");
 				/*
 				 * No interface attached yet.
 				 */
 				error = EINVAL;
+				SDT_PROBE1(bpf, functions, bpfioctl_biocdisqmask, entry, -1);
 				break;
 			}
 			if (!d->bd_qmask.qm_enabled) {
-				log(LOG_DEBUG, "!d->bd_qmask.qm_enabled\n");
+				log(LOG_ERR, "!d->bd_qmask.qm_enabled\n");
 				error = EINVAL;
+				SDT_PROBE1(bpf, functions, bpfioctl_biocdisqmask, entry, -1);
 				break;
 			}
 			d->bd_qmask.qm_enabled = FALSE;
 			free(d->bd_qmask.qm_rxq_mask, M_BPF);
 			free(d->bd_qmask.qm_txq_mask, M_BPF);
+			SDT_PROBE1(bpf, functions, bpfioctl_biocdisqmask, entry, 0);
 			break;
 		}
 
 	case BIOCSTRXQMASK:
 		{
+			struct ifnet *ifp;
 			int index;
-			log(LOG_DEBUG, "BIOCSTRXQMASK\n");
+
 			if (d->bd_bif == NULL) {
-				log(LOG_DEBUG, "d->bd_bif == NULL\n");
+				log(LOG_ERR, "d->bd_bif == NULL\n");
 				/*
 				 * No interface attached yet.
 				 */
-				error = EINVAL;
+				error = EINVAL;	
+				SDT_PROBE1(bpf, functions, bpfioctl_biocstrxqmask, entry, -1);
 				break;
 			}
 			if (!d->bd_qmask.qm_enabled) {
-				log(LOG_DEBUG, "!d->bd_qmask.qm_enabled\n");
+				log(LOG_ERR, "!d->bd_qmask.qm_enabled\n");
 				error = EINVAL;
+				SDT_PROBE1(bpf, functions, bpfioctl_biocstrxqmask, entry, -1);
 				break;
 			}
-			struct ifnet *const ifp = d->bd_bif->bif_ifp;
+			ifp = d->bd_bif->bif_ifp;
 			index = *(uint32_t *)addr;
 			if (index > ifp->if_rxq_num) {
-				log(LOG_DEBUG, "index too large\n");
+				log(LOG_ERR, "BIOCSTRXQMASK: index too large index:%x rxq_num:%x\n", index, ifp->if_rxq_num);
 				error = EINVAL;
+				SDT_PROBE1(bpf, functions, bpfioctl_biocstrxqmask, entry, -1);
 				break;
 			}
-			log(LOG_DEBUG, "index:%d\n", index);
 			d->bd_qmask.qm_rxq_mask[index] = TRUE;
+			SDT_PROBE1(bpf, functions, bpfioctl_biocstrxqmask, entry, index);
 			break;
 		}
 
 	case BIOCCRRXQMASK:
 		{
 			int index;
-			log(LOG_DEBUG, "BIOCCRRXQMASK\n");
+			struct ifnet *ifp;
+
 			if (d->bd_bif == NULL) {
-				log(LOG_DEBUG, "d->bd_bif == NULL\n");
+				log(LOG_ERR, "d->bd_bif == NULL\n");
 				/*
 				 * No interface attached yet.
 				 */
 				error = EINVAL;
+				SDT_PROBE1(bpf, functions, bpfioctl_bioccrrxqmask, entry, -1);
 				break;
 			}
 			if (!d->bd_qmask.qm_enabled) {
-				log(LOG_DEBUG, "!d->bd_qmask.qm_enabled\n");
+				log(LOG_ERR, "!d->bd_qmask.qm_enabled\n");
 				error = EINVAL;
+				SDT_PROBE1(bpf, functions, bpfioctl_bioccrrxqmask, entry, -1);
 				break;
 			}
-			struct ifnet *const ifp = d->bd_bif->bif_ifp;
+			ifp = d->bd_bif->bif_ifp;
 			index = *(uint32_t *)addr;
 			if (index > ifp->if_rxq_num) {
-				log(LOG_DEBUG, "index too large\n");
+				log(LOG_ERR, "BIOCCRRXQMASK: index too large index:%x rxq_num:%x\n", index, ifp->if_rxq_num);
 				error = EINVAL;
+				SDT_PROBE1(bpf, functions, bpfioctl_bioccrrxqmask, entry, -1);
 				break;
 			}
-			log(LOG_DEBUG, "index:%d\n", index);
 			d->bd_qmask.qm_rxq_mask[index] = FALSE;
+			SDT_PROBE1(bpf, functions, bpfioctl_bioccrrxqmask, entry, index);
 			break;
 		}
 
 	case BIOCGTRXQMASK:
 		{
 			int index;
-			log(LOG_DEBUG, "BIOCGTRXQMASK\n");
+			struct ifnet *ifp;
+
 			if (d->bd_bif == NULL) {
-				log(LOG_DEBUG, "d->bd_bif == NULL\n");
+				log(LOG_ERR, "d->bd_bif == NULL\n");
 				/*
 				 * No interface attached yet.
 				 */
 				error = EINVAL;
+				SDT_PROBE2(bpf, functions, bpfioctl_biocgtrxqmask, entry, -1, -1);
 				break;
 			}
 			if (!d->bd_qmask.qm_enabled) {
-				log(LOG_DEBUG, "!d->bd_qmask.qm_enabled\n");
+				log(LOG_ERR, "!d->bd_qmask.qm_enabled\n");
 				error = EINVAL;
+				SDT_PROBE2(bpf, functions, bpfioctl_biocgtrxqmask, entry, -1, -1);
 				break;
 			}
-			struct ifnet *const ifp = d->bd_bif->bif_ifp;
+			ifp = d->bd_bif->bif_ifp;
 			index = *(uint32_t *)addr;
 			if (index > ifp->if_rxq_num) {
-				log(LOG_DEBUG, "index too large\n");
+				log(LOG_ERR, "BIOCGTRXQMASK: index too large index:%x rxq_num:%x\n", index, ifp->if_rxq_num);
 				error = EINVAL;
+				SDT_PROBE1(bpf, functions, bpfioctl_biocgtrxqmask, entry, -1);
 				break;
 			}
-			log(LOG_DEBUG, "index:%d\n", index);
 			*(uint32_t *)addr = d->bd_qmask.qm_rxq_mask[index];
+			SDT_PROBE2(bpf, functions, bpfioctl_biocgtrxqmask, entry, index, d->bd_qmask.qm_rxq_mask[index]);
 			break;
 		}
 
 	case BIOCSTTXQMASK:
 		{
+			struct ifnet *ifp;
 			int index;
-			log(LOG_DEBUG, "BIOCSTTXQMASK\n");
+
 			if (d->bd_bif == NULL) {
-				log(LOG_DEBUG, "d->bd_bif == NULL\n");
+				log(LOG_ERR, "d->bd_bif == NULL\n");
 				/*
 				 * No interface attached yet.
 				 */
 				error = EINVAL;
+				SDT_PROBE1(bpf, functions, bpfioctl_biocsttxqmask, entry, -1);
 				break;
 			}
 			if (!d->bd_qmask.qm_enabled) {
-				log(LOG_DEBUG, "!d->bd_qmask.qm_enabled\n");
+				log(LOG_ERR, "!d->bd_qmask.qm_enabled\n");
 				error = EINVAL;
+				SDT_PROBE1(bpf, functions, bpfioctl_biocsttxqmask, entry, -1);
 				break;
 			}
-			struct ifnet *const ifp = d->bd_bif->bif_ifp;
+
+			ifp = d->bd_bif->bif_ifp;
 			index = *(uint32_t *)addr;
 			if (index > ifp->if_txq_num) {
-				log(LOG_DEBUG, "index too large\n");
+				log(LOG_ERR, "BIOCSTTXQMASK: index too large index:%x txq_num:%x\n", index, ifp->if_txq_num);
 				error = EINVAL;
+				SDT_PROBE1(bpf, functions, bpfioctl_biocsttxqmask, entry, -1);
 				break;
 			}
-			log(LOG_DEBUG, "index:%d\n", index);
 			d->bd_qmask.qm_txq_mask[index] = TRUE;
+			SDT_PROBE1(bpf, functions, bpfioctl_biocsttxqmask, entry, index);
 			break;
 		}
 
 	case BIOCCRTXQMASK:
 		{
+			struct ifnet *ifp;
 			int index;
-			log(LOG_DEBUG, "BIOCCRTXQMASK\n");
+
 			if (d->bd_bif == NULL) {
-				log(LOG_DEBUG, "d->bd_bif == NULL\n");
+				log(LOG_ERR, "d->bd_bif == NULL\n");
 				/*
 				 * No interface attached yet.
 				 */
 				error = EINVAL;
+				SDT_PROBE1(bpf, functions, bpfioctl_bioccrtxqmask, entry, -1);
 				break;
 			}
 			if (!d->bd_qmask.qm_enabled) {
-				log(LOG_DEBUG, "!d->bd_qmask.qm_enabled\n");
+				log(LOG_ERR, "!d->bd_qmask.qm_enabled\n");
 				error = EINVAL;
+				SDT_PROBE1(bpf, functions, bpfioctl_bioccrtxqmask, entry, -1);
 				break;
 			}
-			struct ifnet *const ifp = d->bd_bif->bif_ifp;
+
+			ifp = d->bd_bif->bif_ifp;
 			index = *(uint32_t *)addr;
 			if (index > ifp->if_txq_num) {
-				log(LOG_DEBUG, "index too large\n");
+				log(LOG_ERR, "BIOCCRTXQMASK: index too large index:%x txq_num:%x\n", index, ifp->if_txq_num);
 				error = EINVAL;
+				SDT_PROBE1(bpf, functions, bpfioctl_bioccrtxqmask, entry, -1);
 				break;
 			}
-			log(LOG_DEBUG, "index:%d\n", index);
 			d->bd_qmask.qm_txq_mask[index] = FALSE;
+			SDT_PROBE1(bpf, functions, bpfioctl_bioccrtxqmask, entry, index);
 			break;
 		}
 
 	case BIOCGTTXQMASK:
 		{
 			int index;
-			log(LOG_DEBUG, "BIOCGTTXQMASK\n");
+			struct ifnet *ifp;
+
 			if (d->bd_bif == NULL) {
-				log(LOG_DEBUG, "d->bd_bif == NULL\n");
+				log(LOG_ERR, "d->bd_bif == NULL\n");
 				/*
 				 * No interface attached yet.
 				 */
 				error = EINVAL;
+				SDT_PROBE2(bpf, functions, bpfioctl_biocgttxqmask, entry, -1, -1);
 				break;
 			}
 			if (!d->bd_qmask.qm_enabled) {
-				log(LOG_DEBUG, "!d->bd_qmask.qm_enabled\n");
+				log(LOG_ERR, "!d->bd_qmask.qm_enabled\n");
 				error = EINVAL;
+				SDT_PROBE2(bpf, functions, bpfioctl_biocgttxqmask, entry, -1, -1);
 				break;
 			}
-			struct ifnet *const ifp = d->bd_bif->bif_ifp;
+			ifp = d->bd_bif->bif_ifp;
 			index = *(uint32_t *)addr;
 			if (index > ifp->if_txq_num) {
-				log(LOG_DEBUG, "index too large\n");
+				log(LOG_ERR, "BIOCGTTXQMASK: index too large index:%x txq_num:%x\n", index, ifp->if_txq_num);
 				error = EINVAL;
+				SDT_PROBE2(bpf, functions, bpfioctl_biocgttxqmask, entry, -1, -1);
 				break;
 			}
-			log(LOG_DEBUG, "index:%d\n", index);
 			*(uint32_t *)addr = d->bd_qmask.qm_txq_mask[index];
+			SDT_PROBE2(bpf, functions, bpfioctl_biocgttxqmask, entry, index, d->bd_qmask.qm_txq_mask[index]);
 			break;
 		}
 
 	case BIOCSTOTHERMASK:
-		log(LOG_DEBUG, "BIOSTOTHERMASK\n");
 		d->bd_qmask.qm_other_mask = TRUE;
+		SDT_PROBE1(bpf, functions, bpfioctl_biocstothermask, entry, 1);
 		break;
 
 	case BIOCCROTHERMASK:
-		log(LOG_DEBUG, "BIOCCROTHERMASK\n");
 		d->bd_qmask.qm_other_mask = FALSE;
+		SDT_PROBE1(bpf, functions, bpfioctl_bioccrothermask, entry, 0);
 		break;
 
 	case BIOCGTOTHERMASK:
-		log(LOG_DEBUG, "BIOCGTOTHERMASK\n");
-		log(LOG_DEBUG, "mask:%d\n", d->bd_qmask.qm_other_mask);
 		*(uint32_t *)addr = (uint32_t)d->bd_qmask.qm_other_mask;
+		SDT_PROBE1(bpf, functions, bpfioctl_biocgtothermask, entry, d->bd_qmask.qm_other_mask);
 		break;
 	}
 	CURVNET_RESTORE();
@@ -2144,8 +2241,7 @@
 	BPFIF_LOCK(bp);
 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
 		if (d->bd_qmask.qm_enabled) {
-			log(LOG_DEBUG, "bpf_tap other_mask:%d\n", 
-				d->bd_qmask.qm_other_mask);
+			SDT_PROBE2(bpf, functions, bpf_tap, entry, d, d->bd_qmask.qm_other_mask);
 			if (!d->bd_qmask.qm_other_mask)
 				continue;
 		}
@@ -2195,13 +2291,6 @@
 	u_int pktlen, slen;
 	int gottime;
 
-#if 0
-	if (m->m_pkthdr.txqid != (uint32_t)-1 && m->m_pkthdr.txqid != PCPU_GET(cpuid))
-		log(LOG_DEBUG, "txqid:%d cpuid:%d\n", m->m_pkthdr.txqid, PCPU_GET(cpuid));
-#endif
-	if (m->m_pkthdr.rxqid != (uint32_t)-1 && m->m_pkthdr.rxqid != PCPU_GET(cpuid))
-		log(LOG_DEBUG, "rxqid:%d cpuid:%d\n", m->m_pkthdr.rxqid, PCPU_GET(cpuid));
-
 	/* Skip outgoing duplicate packets. */
 	if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
 		m->m_flags &= ~M_PROMISC;
@@ -2214,18 +2303,30 @@
 	BPFIF_LOCK(bp);
 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
 		if (d->bd_qmask.qm_enabled) {
-/*
-			log(LOG_DEBUG, "bpf_mtap rxqid:%x txqid:%x rxqmask:%x txqmask:%x\n",
-				m->m_pkthdr.rxqid, m->m_pkthdr.txqid,
-				d->bd_qmask.qm_rxq_mask[m->m_pkthdr.rxqid],
-				d->bd_qmask.qm_txq_mask[m->m_pkthdr.txqid]);
-*/
-			if (m->m_pkthdr.rxqid != (uint32_t)-1 &&
-				!d->bd_qmask.qm_rxq_mask[m->m_pkthdr.rxqid])
-				continue;
-			if (m->m_pkthdr.txqid != (uint32_t)-1 &&
-				!d->bd_qmask.qm_txq_mask[m->m_pkthdr.txqid])
-				continue;
+			if (!(m->m_flags & M_FLOWID)) {
+				log(LOG_DEBUG, "m:%p ifp:%p !(m->flags & M_FLOWID)\n",
+					m, m->m_pkthdr.rcvif);
+				if (!d->bd_qmask.qm_other_mask)
+					continue;
+			} else {
+				if (m->m_pkthdr.rxqid != (uint32_t)-1)
+					KASSERT(m->m_pkthdr.rxqid < bp->bif_ifp->if_rxq_num,
+						("rxqid is not vaild rxqid:%x rxq_num:%x",
+						m->m_pkthdr.rxqid, bp->bif_ifp->if_rxq_num));
+				if (m->m_pkthdr.txqid != (uint32_t)-1)
+					KASSERT(m->m_pkthdr.txqid < bp->bif_ifp->if_txq_num,
+						("txqid is not vaild txqid:%x txq_num:%x",
+						m->m_pkthdr.txqid, bp->bif_ifp->if_txq_num));
+
+				SDT_PROBE3(bpf, functions, bpf_mtap_rx, entry, d, m->m_pkthdr.rxqid, bp->bif_ifp->if_rxq_num);
+				SDT_PROBE3(bpf, functions, bpf_mtap_tx, entry, d, m->m_pkthdr.txqid, bp->bif_ifp->if_txq_num);
+				if (m->m_pkthdr.rxqid != (uint32_t)-1 &&
+					!d->bd_qmask.qm_rxq_mask[m->m_pkthdr.rxqid])
+					continue;
+				if (m->m_pkthdr.txqid != (uint32_t)-1 &&
+					!d->bd_qmask.qm_txq_mask[m->m_pkthdr.txqid])
+					continue;
+			}
 		}
 		if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
 			continue;
@@ -2267,13 +2368,6 @@
 	u_int pktlen, slen;
 	int gottime;
 
-#if 0
-	if (m->m_pkthdr.txqid != (uint32_t)-1 && m->m_pkthdr.txqid != PCPU_GET(cpuid))
-		log(LOG_DEBUG, "txqid:%d cpuid:%d\n", m->m_pkthdr.txqid, PCPU_GET(cpuid));
-#endif
-	if (m->m_pkthdr.rxqid != (uint32_t)-1 && m->m_pkthdr.rxqid != PCPU_GET(cpuid))
-		log(LOG_DEBUG, "rxqid:%d cpuid:%d\n", m->m_pkthdr.rxqid, PCPU_GET(cpuid));
-
 	/* Skip outgoing duplicate packets. */
 	if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
 		m->m_flags &= ~M_PROMISC;
@@ -2295,10 +2389,8 @@
 	BPFIF_LOCK(bp);
 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
 		if (d->bd_qmask.qm_enabled) {
-			log(LOG_DEBUG, "bpf_mtap2 rxqid:%x txqid:%x rxqmask:%x txqmask:%x\n",
-				m->m_pkthdr.rxqid, m->m_pkthdr.txqid,
-				d->bd_qmask.qm_rxq_mask[m->m_pkthdr.rxqid],
-				d->bd_qmask.qm_txq_mask[m->m_pkthdr.txqid]);
+			SDT_PROBE3(bpf, functions, bpf_mtap2_rx, entry, d, m->m_pkthdr.rxqid, bp->bif_ifp->if_rxq_num);
+			SDT_PROBE3(bpf, functions, bpf_mtap2_tx, entry, d, m->m_pkthdr.txqid, bp->bif_ifp->if_txq_num);
 
 			if (m->m_pkthdr.rxqid != (uint32_t)-1 &&
 				!d->bd_qmask.qm_rxq_mask[m->m_pkthdr.rxqid])

==== //depot/projects/soc2011/mq_bpf/src/sys/net/bpf.h#4 (text+ko) ====


==== //depot/projects/soc2011/mq_bpf/src/sys/net/bpfdesc.h#3 (text+ko) ====


==== //depot/projects/soc2011/mq_bpf/src/sys/net/if.h#2 (text+ko) ====

@@ -220,6 +220,7 @@
 #define	IFCAP_POLLING_NOCOUNT	0x20000 /* polling ticks cannot be fragmented */
 #define	IFCAP_VLAN_HWTSO	0x40000 /* can do IFCAP_TSO on VLANs */
 #define	IFCAP_LINKSTATE		0x80000 /* the runtime link state is dynamic */
+#define IFCAP_MULTIQUEUE	0x100000
 
 #define IFCAP_HWCSUM	(IFCAP_RXCSUM | IFCAP_TXCSUM)
 #define	IFCAP_TSO	(IFCAP_TSO4 | IFCAP_TSO6)

==== //depot/projects/soc2011/mq_bpf/src/sys/net/if_ethersubr.c#2 (text+ko) ====

@@ -36,6 +36,7 @@
 #include "opt_ipx.h"
 #include "opt_netgraph.h"
 #include "opt_mbuf_profiling.h"
+#include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -69,6 +70,7 @@
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
+#include <netinet/in_rss.h>
 #include <netinet/ip_carp.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_fw.h>
@@ -106,6 +108,9 @@
 CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN);
 #endif
 
+SYSCTL_DECL(_net_link);
+SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet");
+
 /* netgraph node hooks for ng_ether(4) */
 void	(*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp);
 void	(*ng_ether_input_orphan_p)(struct ifnet *ifp, struct mbuf *m);
@@ -561,7 +566,7 @@
  * mbuf chain m with the ethernet header at the front.
  */
 static void
-ether_input(struct ifnet *ifp, struct mbuf *m)
+ether_input_internal(struct ifnet *ifp, struct mbuf *m)
 {
 	struct ether_header *eh;
 	u_short etype;
@@ -754,6 +759,77 @@
 	CURVNET_RESTORE();
 }
 
+#if defined(RSS) || defined(SOFTRSS)
+/*
+ * Ethernet input dispatch; by default, direct dispatch here regardless of
+ * global configuration.  However, if RSS is enabled, hook up RSS affinity
+ * so that when deferred or hybrid dispatch is enabled, we can redistribute
+ * load based on RSS.
+ *
+ * XXXRW: Would be nice if the ifnet passed up a flag indicating whether or
+ * not it had already done work distribution via multi-queue.  Then we could
+ * direct dispatch in the event load balancing was already complete and
+ * handle the case of interfaces with different capabilities better.
+ *
+ * XXXRW: Sort of want an M_DISTRIBUTED flag to avoid multiple distributions
+ * at multiple layers?
+ *
+ * XXXRW: For now, enable all this only if RSS is compiled in, although it
+ * works fine without RSS.  Need to characterise the performance overhead
+ * of the detour through the netisr code in the event the result is always
+ * direct dispatch.
+ */
+static void
+ether_nh_input(struct mbuf *m)
+{
+
+	ether_input_internal(m->m_pkthdr.rcvif, m);
+}
+
+static struct netisr_handler	ether_nh = {
+	.nh_name = "ether",
+	.nh_handler = ether_nh_input,
+	.nh_proto = NETISR_ETHER,
+#if defined(RSS) || defined(SOFTRSS)
+	.nh_policy = NETISR_POLICY_CPU,
+#if defined(RSS)
+	.nh_dispatch = NETISR_DISPATCH_DIRECT,
+#else
+	.nh_dispatch = NETISR_DISPATCH_HYBRID,
+#endif
+	.nh_m2cpuid = rss_m2cpuid,
+#else
+	.nh_policy = NETISR_POLICY_SOURCE,
+	.nh_dispatch = NETISR_DISPATCH_DIRECT,
+#endif
+};
+
+static void
+ether_init(__unused void *arg)
+{
+
+	netisr_register(&ether_nh);
+}
+SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL);
+#endif /* RSS || SOFTRSS */
+#include <sys/syslog.h>
+static void
+ether_input(struct ifnet *ifp, struct mbuf *m)
+{
+	log(LOG_DEBUG, "%s ifp:%p m:%p\n", __func__, ifp, m);
+#if defined(RSS) || defined(SOFTRSS)
+	/*
+	 * We will rely on rcvif being set properly in the deferred context,
+	 * so assert it is correct here.
+	 */
+	KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch", __func__));
+
+	netisr_dispatch(NETISR_ETHER, m);
+#else
+	ether_input_internal(ifp, m);
+#endif
+}
+
 /*
  * Upper layer processing for a received Ethernet packet.
  */
@@ -1008,8 +1084,6 @@
 }
 #endif
 
-SYSCTL_DECL(_net_link);
-SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet");
 #if defined(INET) || defined(INET6)
 SYSCTL_VNET_INT(_net_link_ether, OID_AUTO, ipfw, CTLFLAG_RW,
 	     &VNET_NAME(ether_ipfw), 0, "Pass ether pkts through firewall");

==== //depot/projects/soc2011/mq_bpf/src/sys/net/if_tap.c#4 (text+ko) ====

@@ -54,7 +54,6 @@
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
-#include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/ttycom.h>
 #include <sys/uio.h>
@@ -870,10 +869,6 @@
 	} while (m == NULL);
 	mtx_unlock(&tp->tap_mtx);
 
-	m->m_pkthdr.rxqid = (uint32_t)-1;
-	m->m_pkthdr.txqid = PCPU_GET(cpuid);
-	log(LOG_DEBUG, "%s rxqid:%x txqid:%x\n", __func__, m->m_pkthdr.rxqid, m->m_pkthdr.txqid);
-
 	/* feed packet to bpf */
 	BPF_MTAP(ifp, m);
 
@@ -948,10 +943,6 @@
 		return (0);
 	}
 
-	m->m_pkthdr.rxqid = PCPU_GET(cpuid);
-	m->m_pkthdr.txqid = (uint32_t)-1;
-	log(LOG_DEBUG, "%s rxqid:%x txqid:%x\n", __func__, m->m_pkthdr.rxqid, m->m_pkthdr.txqid);
-
 	/* Pass packet up to parent. */
 	(*ifp->if_input)(ifp, m);
 	ifp->if_ipackets ++; /* ibytes are counted in parent */
@@ -1090,3 +1081,4 @@
 
 	knlist_remove(&tp->tap_rsel.si_note, kn, 0);
 } /* tapkqdetach */
+

==== //depot/projects/soc2011/mq_bpf/src/sys/net/if_var.h#5 (text+ko) ====


==== //depot/projects/soc2011/mq_bpf/src/sys/net/netisr.c#2 (text+ko) ====

@@ -1,6 +1,6 @@
 /*-
  * Copyright (c) 2007-2009 Robert N. M. Watson
- * Copyright (c) 2010 Juniper Networks, Inc.
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
  * All rights reserved.
  *
  * This software was developed by Robert N. M. Watson under contract
@@ -65,6 +65,7 @@
 
 #include "opt_ddb.h"
 #include "opt_device_polling.h"
+#include "opt_kdtrace.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
@@ -82,6 +83,8 @@
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
+#include <sys/sdt.h>
+#include <sys/syslog.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
@@ -94,6 +97,31 @@
 #include <net/netisr_internal.h>
 #include <net/vnet.h>
 
+/*
+ * Locking strategy: three types of locks protect netisr processing:
+ *
+ * netisr configuration lock - serializes "rethreading" events, in which the
+ * number of worker threads is changed.
+ *
+ * netisr_rmlock - stabilizes the netisr system for network processing,
+ * almost always acquired as a read lock (except during configuration
+ * changes).
+ *
+ * nws_mtx - per-workstream lock that serializes access to queues.
+ */
+
+/*
+ * netisr configuration lock: serialize rethread events, in which the thread
+ * count may be increased and decreased, to avoid interlacing of these
+ * events, which might expose incompletely started or stopped threads, etc.
+ * This is a sleep lock so that it can be held over ithread start/stop.
+ */
+static struct sx netisr_config_sx;
+#define	NETISR_CONFIG_LOCK_INIT()	sx_init(&netisr_config_sx,	\
+					    "netisr_config_sx")
+#define	NETISR_CONFIG_LOCK()		sx_xlock(&netisr_config_sx)
+#define	NETISR_CONFIG_UNLOCK()		sx_xunlock(&netisr_config_sx)
+
 /*-
  * Synchronize use and modification of the registered netisr data structures;
  * acquire a read lock while modifying the set of registered protocols to
@@ -114,51 +142,74 @@
  *
  * XXXRW: rmlocks don't support assertions.
  */
+#define	NETISR_RMLOCKING
+
+#ifdef NETISR_RMLOCKING
 static struct rmlock	netisr_rmlock;
 #define	NETISR_LOCK_INIT()	rm_init_flags(&netisr_rmlock, "netisr", \
-				    RM_NOWITNESS)
+				    RM_NOWITNESS | RM_RECURSE)
 #define	NETISR_LOCK_ASSERT()
 #define	NETISR_RLOCK(tracker)	rm_rlock(&netisr_rmlock, (tracker))
 #define	NETISR_RUNLOCK(tracker)	rm_runlock(&netisr_rmlock, (tracker))
 #define	NETISR_WLOCK()		rm_wlock(&netisr_rmlock)
 #define	NETISR_WUNLOCK()	rm_wunlock(&netisr_rmlock)
-/* #define	NETISR_LOCKING */
+#else
+#define	NETISR_LOCK_INIT()
+#define	NETISR_LOCK_ASSERT()
+#define	NETISR_RLOCK(x)
+#define	NETISR_RUNLOCK(x)
+#define	NETISR_WLOCK()
+#define	NETISR_WUNLOCK()
+#endif
 
 SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr");
 
 /*-
- * Three direct dispatch policies are supported:
+ * Three global direct dispatch policies are supported:
  *
- * - Always defer: all work is scheduled for a netisr, regardless of context.
- *   (!direct)
+ * NETISR_DISPATCH_QUEUED: All work is deferred for a netisr, regardless of
+ * context (may be overriden by protocols).
  *
- * - Hybrid: if the executing context allows direct dispatch, and we're
- *   running on the CPU the work would be done on, then direct dispatch if it
- *   wouldn't violate ordering constraints on the workstream.
- *   (direct && !direct_force)
+ * NETISR_DISPATCH_HYBRID: If the executing context allows direct dispatch,
+ * and we're running on the CPU the work would be performed on, then direct
+ * dispatch it if it wouldn't violate ordering constraints on the workstream.
  *
- * - Always direct: if the executing context allows direct dispatch, always
- *   direct dispatch.  (direct && direct_force)
+ * NETISR_DISPATCH_DIRECT: If the executing context allows direct dispatch,
+ * always direct dispatch.  (The default.)
  *
  * Notice that changing the global policy could lead to short periods of
  * misordered processing, but this is considered acceptable as compared to
- * the complexity of enforcing ordering during policy changes.
+ * the complexity of enforcing ordering during policy changes.  Protocols can
+ * override the global policy (when they're not doing that, they select
+ * NETISR_DISPATCH_DEFAULT).
+ */
+#define	NETISR_DISPATCH_POLICY_DEFAULT	NETISR_DISPATCH_DIRECT
+#define	NETISR_DISPATCH_POLICY_MAXSTR	20 /* Used for temporary buffers. */
+static u_int	netisr_dispatch_policy = NETISR_DISPATCH_POLICY_DEFAULT;
+static int	sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS);

>>> TRUNCATED FOR MAIL (1000 lines) <<<


More information about the p4-projects mailing list