PERFORCE change 197267 for review

Kazuya Goda goda at FreeBSD.org
Sat Aug 6 08:45:37 UTC 2011


http://p4web.freebsd.org/@@197267?ac=10

Change 197267 by goda at kaffierlime on 2011/08/06 08:44:31

	implement SOFT RSS

Affected files ...

.. //depot/projects/soc2011/kgoda_rpsrfs/src/sys/conf/options#4 edit
.. //depot/projects/soc2011/kgoda_rpsrfs/src/sys/dev/e1000/if_em.h#2 edit
.. //depot/projects/soc2011/kgoda_rpsrfs/src/sys/kern/uipc_socket.c#3 edit
.. //depot/projects/soc2011/kgoda_rpsrfs/src/sys/net/netisr.c#4 edit
.. //depot/projects/soc2011/kgoda_rpsrfs/src/sys/netinet/in_pcb.h#3 edit
.. //depot/projects/soc2011/kgoda_rpsrfs/src/sys/netinet/in_pcbgroup.c#3 edit
.. //depot/projects/soc2011/kgoda_rpsrfs/src/sys/netinet/in_rss.c#3 edit
.. //depot/projects/soc2011/kgoda_rpsrfs/src/sys/netinet/in_rss.h#2 edit
.. //depot/projects/soc2011/kgoda_rpsrfs/src/sys/netinet/ip_input.c#3 edit
.. //depot/projects/soc2011/kgoda_rpsrfs/src/sys/netinet/tcp_input.c#3 edit
.. //depot/projects/soc2011/kgoda_rpsrfs/src/sys/sys/sockbuf.h#2 edit

Differences ...

==== //depot/projects/soc2011/kgoda_rpsrfs/src/sys/conf/options#4 (text+ko) ====

@@ -425,6 +425,7 @@
 ROUTETABLES		opt_route.h
 RSS			opt_rss.h
 RPS			opt_rps.h
+SOFT_RSS		opt_soft_rss.h
 SLIP_IFF_OPTS		opt_slip.h
 TCPDEBUG
 TCP_OFFLOAD_DISABLE	opt_inet.h #Disable code to dispatch tcp offloading

==== //depot/projects/soc2011/kgoda_rpsrfs/src/sys/dev/e1000/if_em.h#2 (text+ko) ====


==== //depot/projects/soc2011/kgoda_rpsrfs/src/sys/kern/uipc_socket.c#3 (text+ko) ====

@@ -107,6 +107,7 @@
 #include "opt_inet6.h"
 #include "opt_zero.h"
 #include "opt_compat.h"
+#include "opt_soft_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -142,6 +143,10 @@
 
 #include <vm/uma.h>
 
+#ifdef SOFT_RSS
+#include <netinet/in_rss.h>
+#endif
+
 #ifdef COMPAT_FREEBSD32
 #include <sys/mount.h>
 #include <sys/sysent.h>
@@ -2302,6 +2307,9 @@
 {
 	int error;
 
+#ifdef SOFT_RSS
+	soft_rss_record_curcpu(&so->so_rcv);
+#endif
 	CURVNET_SET(so->so_vnet);
 	error = (so->so_proto->pr_usrreqs->pru_soreceive(so, psa, uio, mp0,
 	    controlp, flagsp));

==== //depot/projects/soc2011/kgoda_rpsrfs/src/sys/net/netisr.c#4 (text+ko) ====

@@ -65,6 +65,8 @@
 
 #include "opt_ddb.h"
 #include "opt_device_polling.h"
+#include "opt_rps.h"
+#include "opt_soft_rss.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
@@ -94,6 +96,10 @@
 #include <net/netisr_internal.h>
 #include <net/vnet.h>
 
+#ifdef SOFT_RSS
+#include <netinet/in_rss.h>
+#endif
+
 /*
  * Locking strategy: three types of locks protect netisr processing:
  *
@@ -860,6 +866,10 @@
 		VNET_ASSERT(m->m_pkthdr.rcvif != NULL,
 		    ("%s:%d rcvif == NULL: m=%p", __func__, __LINE__, m));
 		CURVNET_SET(m->m_pkthdr.rcvif->if_vnet);
+#ifdef SOFT_RSS
+		if (m->m_pkthdr.flowid)
+			soft_rss_dec_flow_qlen(m->m_pkthdr.flowid);
+#endif
 		netisr_proto[proto].np_handler(m);
 		CURVNET_RESTORE();
 	}
@@ -1109,6 +1119,10 @@
 	 */
 	nwsp->nws_flags |= NWS_DISPATCHING;
 	NWS_UNLOCK(nwsp);
+#ifdef SOFT_RSS
+		if (m->m_pkthdr.flowid)
+			soft_rss_dec_flow_qlen(m->m_pkthdr.flowid);
+#endif
 	netisr_proto[proto].np_handler(m);
 	NWS_LOCK(nwsp);
 	nwsp->nws_flags &= ~NWS_DISPATCHING;
@@ -1302,6 +1316,12 @@
 	}
 #endif
 
+#ifdef RPS
+	netisr_defaultthreads = mp_ncpus;
+	netisr_maxthreads = mp_ncpus;
+	netisr_bindthreads = 1;
+#endif
+
 	if (TUNABLE_STR_FETCH("net.isr.dispatch", tmp, sizeof(tmp))) {
 		error = netisr_dispatch_policy_from_str(tmp,
 		    &dispatch_policy);

==== //depot/projects/soc2011/kgoda_rpsrfs/src/sys/netinet/in_pcb.h#3 (text+ko) ====

@@ -45,6 +45,7 @@
 #include <sys/_rwlock.h>
 
 #ifdef _KERNEL
+#include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <net/vnet.h>
 #include <vm/uma.h>

==== //depot/projects/soc2011/kgoda_rpsrfs/src/sys/netinet/in_pcbgroup.c#3 (text+ko) ====


==== //depot/projects/soc2011/kgoda_rpsrfs/src/sys/netinet/in_rss.c#3 (text+ko) ====

@@ -33,6 +33,7 @@
 
 #include "opt_inet6.h"
 #include "opt_pcbgroup.h"
+#include "opt_soft_rss.h"
 
 #ifndef PCBGROUP
 #error "options RSS depends on options PCBGROUP"
@@ -171,6 +172,16 @@
 };
 static struct rss_table_entry	rss_table[RSS_TABLE_MAXLEN];
 
+#ifdef SOFT_RSS
+struct netisr_flow{
+	uint16_t cpu;
+	unsigned qlen;
+};
+
+static struct netisr_flow *netisr_flow_table;
+static unsigned *socket_flow_table;
+#endif
+
 static void
 rss_init(__unused void *arg)
 {
@@ -502,3 +513,137 @@
 SYSCTL_PROC(_net_inet_rss, OID_AUTO, key,
     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_rss_key,
     "", "RSS keying material");
+
+#ifdef SOFT_RSS
+MALLOC_DEFINE(M_SOFTRSS_FLOWS, "softrss", "soft rss flow entrys");
+
+SYSCTL_NODE(_net_inet, OID_AUTO, softrss, CTLFLAG_RW, 0, 
+	    "Software emulate receive-side steering");
+
+/*
+ *
+ */
+static int	softrss_maxflows = 128;
+TUNABLE_INT("net.inet.softrss.maxflows", &softrss_maxflows);
+SYSCTL_INT(_net_inet_softrss, OID_AUTO, maxflows, CTLFLAG_RDTUN,
+    &softrss_maxflows, 0, "Flow entrys using soft RSS.");
+
+
+static void
+socket_flow_table_init(void)
+{
+	int i;
+
+	socket_flow_table = (unsigned *)malloc(sizeof(unsigned) * softrss_maxflows, 
+					       M_SOFTRSS_FLOWS, M_NOWAIT);
+	
+	if (socket_flow_table == NULL)
+		panic("not allocate memory for soft rss");
+
+	for (i = 0; i < softrss_maxflows; i++) 
+		socket_flow_table[i] = NO_CURR_CPU;
+}
+SYSINIT(scoket_flow_table_init, SI_SUB_CLOCKS, SI_ORDER_MIDDLE, 
+	socket_flow_table_init, NULL);
+
+static void
+netisr_flow_table_init(void)
+{
+	int i;
+
+	netisr_flow_table = (struct netisr_flow *)
+		malloc(sizeof(struct netisr_flow) * softrss_maxflows, 
+		M_SOFTRSS_FLOWS, M_NOWAIT);
+
+	if (netisr_flow_table == NULL)
+		panic("not allocate memory for soft rss");
+
+	for (i = 0; i < softrss_maxflows; i++) {
+		netisr_flow_table[i].cpu = 0;
+		netisr_flow_table[i].qlen = 0;
+	}
+}
+SYSINIT(netisr_flow_table_init, SI_SUB_CLOCKS, SI_ORDER_MIDDLE, 
+	netisr_flow_table_init, NULL);
+
+void
+soft_rss_record_curcpu(struct sockbuf *sb)
+{
+	if (sb->flowid)
+		atomic_store_rel_int(&socket_flow_table[sb->flowid % softrss_maxflows], 
+				     curcpu);
+}
+
+static inline int
+soft_rss_get_curcpu(int index)
+{
+	return(atomic_load_acq_int(&socket_flow_table[index]));
+}
+
+static inline void
+soft_rss_record_dstcpu(int index, uint16_t cpu)
+{
+	atomic_store_rel_16(&netisr_flow_table[index].cpu, cpu);
+}
+
+static inline uint16_t
+soft_rss_get_dstcpu(int index)
+{
+	return(atomic_load_acq_16(&netisr_flow_table[index].cpu));
+}
+
+
+static inline void
+soft_rss_inc_flow_qlen(int index)
+{
+	atomic_add_acq_int(&netisr_flow_table[index].qlen, 1);
+}
+
+void
+soft_rss_dec_flow_qlen(unsigned flowid)
+{
+	atomic_subtract_acq_int(&netisr_flow_table[flowid % softrss_maxflows].qlen, 1);
+}	
+
+static inline int
+soft_rss_get_flow_qlen(int index)
+{
+	return(atomic_load_acq_int(&netisr_flow_table[index].qlen));
+}
+
+static u_int
+soft_rss_getcpu(u_int flowid)
+{
+        int index;
+	u_int cur, dst, qlen;
+
+	index = flowid % softrss_maxflows;
+	cur = soft_rss_get_curcpu(index);
+	dst = soft_rss_get_dstcpu(index);
+	qlen = soft_rss_get_flow_qlen(index);
+
+	if (cur == NO_CURR_CPU){
+		cur = netisr_default_flow2cpu(flowid);
+		soft_rss_record_dstcpu(index, (uint16_t)cur);
+	}
+	else if (cur != dst){
+		if (qlen == 0)
+			soft_rss_record_dstcpu(index, (uint16_t)cur);
+		else
+			cur = dst;
+	}
+	soft_rss_inc_flow_qlen(index);
+	return ((u_int)cur);
+}
+
+/*
+ * netisr CPU affinity lookup routine for use by flowid.
+ */
+struct mbuf *
+soft_rss_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid)
+{
+	*cpuid = soft_rss_getcpu(m->m_pkthdr.flowid);
+	return (m);
+}
+
+#endif /* SOFT_RSS */

==== //depot/projects/soc2011/kgoda_rpsrfs/src/sys/netinet/in_rss.h#2 (text+ko) ====

@@ -31,6 +31,9 @@
 #define	_NETINET_IN_RSS_H_
 
 #include <netinet/in.h>		/* in_addr_t */
+#ifdef SOFT_RSS
+#include <sys/sockbuf.h>        /* struct sockbuf */
+#endif
 
 /*
  * Supported RSS hash functions.
@@ -89,4 +92,16 @@
  */
 struct mbuf	*rss_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid);
 
+#ifdef SOFT_RSS
+#define NO_CURR_CPU 0xffffffff
+
+#ifdef MALLOC_DECLARE
+MALLOC_DECLARE(M_SOFTRSS_FLOWS);
+#endif
+
+void soft_rss_record_curcpu(struct sockbuf *sb);
+void soft_rss_dec_flow_qlen(u_int flowid);
+struct mbuf *soft_rss_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid);
+#endif /* SOFT_RSS */
+
 #endif /* !_NETINET_IN_RSS_H_ */

==== //depot/projects/soc2011/kgoda_rpsrfs/src/sys/netinet/ip_input.c#3 (text+ko) ====

@@ -38,6 +38,7 @@
 #include "opt_ipsec.h"
 #include "opt_route.h"
 #include "opt_rps.h"
+#include "opt_soft_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -77,6 +78,9 @@
 #ifdef IPSEC
 #include <netinet/ip_ipsec.h>
 #endif /* IPSEC */
+#ifdef SOFT_RSS
+#include <netinet/in_rss.h>
+#endif
 
 #include <sys/socketvar.h>
 
@@ -145,8 +149,14 @@
 	.nh_handler = ip_input,
 	.nh_proto = NETISR_IP,
 	.nh_policy = NETISR_POLICY_FLOW,
-#ifdef RPS
+#if defined SOFT_RSS
+	.nh_policy = NETISR_POLICY_CPU,
 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
+	.nh_m2cpuid = soft_rss_m2cpuid,
+#elif defined RPS
+	.nh_dispatch = NETISR_DISPATCH_DEFERRED,
+#else
+	.nh_dispatch = NETISR_DISPATCH_DIRECT,
 #endif
 };
 

==== //depot/projects/soc2011/kgoda_rpsrfs/src/sys/netinet/tcp_input.c#3 (text+ko) ====

@@ -55,6 +55,7 @@
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_tcpdebug.h"
+#include "opt_soft_rss.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -857,6 +858,9 @@
 		rstreason = BANDLIM_RST_CLOSEDPORT;
 		goto dropwithreset;
 	}
+#ifdef SOFT_RSS
+	inp->inp_socket->so_rcv.flowid = m->m_pkthdr.flowid;
+#endif
 	INP_WLOCK_ASSERT(inp);
 	if (!(inp->inp_flags & INP_HW_FLOWID)
 	    && (m->m_flags & M_FLOWID)

==== //depot/projects/soc2011/kgoda_rpsrfs/src/sys/sys/sockbuf.h#2 (text+ko) ====

@@ -32,6 +32,11 @@
  */
 #ifndef _SYS_SOCKBUF_H_
 #define _SYS_SOCKBUF_H_
+
+#ifdef HAVE_KERNEL_OPTION_HEADERS
+#include "opt_soft_rss.h"
+#endif
+
 #include <sys/selinfo.h>		/* for struct selinfo */
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
@@ -101,6 +106,9 @@
 	short	sb_flags;	/* (c/d) flags, see below */
 	int	(*sb_upcall)(struct socket *, void *, int); /* (c/d) */
 	void	*sb_upcallarg;	/* (c/d) */
+#ifdef SOFT_RSS
+	uint32_t flowid;
+#endif
 };
 
 #ifdef _KERNEL


More information about the p4-projects mailing list