svn commit: r347430 - in head/sys: kern netinet sys

Andrew Gallatin gallatin at FreeBSD.org
Fri May 10 13:41:21 UTC 2019


Author: gallatin
Date: Fri May 10 13:41:19 2019
New Revision: 347430
URL: https://svnweb.freebsd.org/changeset/base/347430

Log:
  Bind TCP HPTS (pacer) threads to NUMA domains
  
  Bind the TCP pacer threads to NUMA domains and build per-domain
  pacer-thread lookup tables. These tables allow us to use the
  inpcb's NUMA domain information to match an inpcb with a pacer
  thread on the same domain.
  
  The motivation for this is to keep the TCP connection local to a
  NUMA domain as much as possible.
  
  Thanks to jhb for pre-reviewing an earlier version of the patch.
  
  Reviewed by:	rrs
  Sponsored by:	Netflix
  Differential Revision:	https://reviews.freebsd.org/D20134

Modified:
  head/sys/kern/kern_intr.c
  head/sys/netinet/tcp_hpts.c
  head/sys/sys/interrupt.h

Modified: head/sys/kern/kern_intr.c
==============================================================================
--- head/sys/kern/kern_intr.c	Fri May 10 13:18:22 2019	(r347429)
+++ head/sys/kern/kern_intr.c	Fri May 10 13:41:19 2019	(r347430)
@@ -380,6 +380,25 @@ intr_event_bind_ithread(struct intr_event *ie, int cpu
 	return (_intr_event_bind(ie, cpu, false, true));
 }
 
+/*
+ * Bind an interrupt event's ithread to the specified cpuset.
+ */
+int
+intr_event_bind_ithread_cpuset(struct intr_event *ie, cpuset_t *cs)
+{
+	lwpid_t id;
+
+	mtx_lock(&ie->ie_lock);
+	if (ie->ie_thread != NULL) {
+		id = ie->ie_thread->it_thread->td_tid;
+		mtx_unlock(&ie->ie_lock);
+		return (cpuset_setthread(id, cs));
+	} else {
+		mtx_unlock(&ie->ie_lock);
+	}
+	return (ENODEV);
+}
+
 static struct intr_event *
 intr_lookup(int irq)
 {

Modified: head/sys/netinet/tcp_hpts.c
==============================================================================
--- head/sys/netinet/tcp_hpts.c	Fri May 10 13:18:22 2019	(r347429)
+++ head/sys/netinet/tcp_hpts.c	Fri May 10 13:41:19 2019	(r347430)
@@ -131,6 +131,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/kern_prefetch.h>
 
 #include <vm/uma.h>
+#include <vm/vm.h>
 
 #include <net/route.h>
 #include <net/vnet.h>
@@ -171,7 +172,7 @@ MALLOC_DEFINE(M_TCPHPTS, "tcp_hpts", "TCP hpts");
 #include <net/rss_config.h>
 static int tcp_bind_threads = 1;
 #else
-static int tcp_bind_threads = 0;
+static int tcp_bind_threads = 2;
 #endif
 TUNABLE_INT("net.inet.tcp.bind_hptss", &tcp_bind_threads);
 
@@ -207,6 +208,13 @@ static int32_t logging_on = 0;
 static int32_t hpts_sleep_max = (NUM_OF_HPTSI_SLOTS - 2);
 static int32_t tcp_hpts_precision = 120;
 
+struct hpts_domain_info {
+	int count;
+	int cpu[MAXCPU];
+};
+
+struct hpts_domain_info hpts_domains[MAXMEMDOM];
+
 SYSCTL_INT(_net_inet_tcp_hpts, OID_AUTO, precision, CTLFLAG_RW,
     &tcp_hpts_precision, 120,
     "Value for PRE() precision of callout");
@@ -1079,8 +1087,10 @@ hpts_random_cpu(struct inpcb *inp){
 static uint16_t
 hpts_cpuid(struct inpcb *inp){
 	u_int cpuid;
+#ifdef NUMA
+	struct hpts_domain_info *di;
+#endif
 
-
 	/*
 	 * If one has been set use it i.e. we want both in and out on the
 	 * same hpts.
@@ -1103,11 +1113,21 @@ hpts_cpuid(struct inpcb *inp){
 	 * unknown cpuids to curcpu.  Not the best, but apparently better
 	 * than defaulting to swi 0.
 	 */
-	if (inp->inp_flowtype != M_HASHTYPE_NONE) {
+	
+	if (inp->inp_flowtype == M_HASHTYPE_NONE)
+		return (hpts_random_cpu(inp));
+	/*
+	 * Hash to a thread based on the flowid.  If we are using numa,
+	 * then restrict the hash to the numa domain where the inp lives.
+	 */
+#ifdef NUMA
+	if (tcp_bind_threads == 2 && inp->inp_numa_domain != M_NODOM) {
+		di = &hpts_domains[inp->inp_numa_domain];
+		cpuid = di->cpu[inp->inp_flowid % di->count];
+	} else
+#endif
 		cpuid = inp->inp_flowid % mp_ncpus;
-		return (cpuid);
-	}
-	cpuid = hpts_random_cpu(inp);
+
 	return (cpuid);
 #endif
 }
@@ -1781,8 +1801,11 @@ tcp_init_hptsi(void *st)
 	struct timeval tv;
 	sbintime_t sb;
 	struct tcp_hpts_entry *hpts;
+	struct pcpu *pc;
+	cpuset_t cs;
 	char unit[16];
 	uint32_t ncpus = mp_ncpus ? mp_ncpus : MAXCPU;
+	int count, domain;
 
 	tcp_pace.rp_proc = NULL;
 	tcp_pace.rp_num_hptss = ncpus;
@@ -1861,6 +1884,11 @@ tcp_init_hptsi(void *st)
 		}
 		callout_init(&hpts->co, 1);
 	}
+
+	/* Don't try to bind to NUMA domains if we don't have any */
+	if (vm_ndomains == 1 && tcp_bind_threads == 2)
+		tcp_bind_threads = 0;
+
 	/*
 	 * Now lets start ithreads to handle the hptss.
 	 */
@@ -1875,9 +1903,20 @@ tcp_init_hptsi(void *st)
 			    hpts, i, error);
 		}
 		created++;
-		if (tcp_bind_threads) {
+		if (tcp_bind_threads == 1) {
 			if (intr_event_bind(hpts->ie, i) == 0)
 				bound++;
+		} else if (tcp_bind_threads == 2) {
+			pc = pcpu_find(i);
+			domain = pc->pc_domain;
+			CPU_COPY(&cpuset_domain[domain], &cs);
+			if (intr_event_bind_ithread_cpuset(hpts->ie, &cs)
+			    == 0) {
+				bound++;
+				count = hpts_domains[domain].count;
+				hpts_domains[domain].cpu[count] = i;
+				hpts_domains[domain].count++;
+			}
 		}
 		tv.tv_sec = 0;
 		tv.tv_usec = hpts->p_hpts_sleep_time * HPTS_TICKS_PER_USEC;
@@ -1893,9 +1932,20 @@ tcp_init_hptsi(void *st)
 			    C_PREL(tcp_hpts_precision));
 		}
 	}
-	printf("TCP Hpts created %d swi interrupt thread and bound %d\n",
-	    created, bound);
-	return;
+	/*
+	 * If we somehow have an empty domain, fall back to choosing
+	 * among all htps threads.
+	 */
+	for (i = 0; i < vm_ndomains; i++) {
+		if (hpts_domains[i].count == 0) {
+			tcp_bind_threads = 0;
+			break;
+		}
+	}
+
+	printf("TCP Hpts created %d swi interrupt threads and bound %d to %s\n",
+	    created, bound,
+	    tcp_bind_threads == 2 ? "NUMA domains" : "cpus");
 }
 
 SYSINIT(tcphptsi, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, tcp_init_hptsi, NULL);

Modified: head/sys/sys/interrupt.h
==============================================================================
--- head/sys/sys/interrupt.h	Fri May 10 13:18:22 2019	(r347429)
+++ head/sys/sys/interrupt.h	Fri May 10 13:41:19 2019	(r347430)
@@ -176,6 +176,8 @@ int	intr_event_add_handler(struct intr_event *ie, cons
 int	intr_event_bind(struct intr_event *ie, int cpu);
 int	intr_event_bind_irqonly(struct intr_event *ie, int cpu);
 int	intr_event_bind_ithread(struct intr_event *ie, int cpu);
+int	intr_event_bind_ithread_cpuset(struct intr_event *ie,
+	    cpuset_t *mask);
 int	intr_event_create(struct intr_event **event, void *source,
 	    int flags, int irq, void (*pre_ithread)(void *),
 	    void (*post_ithread)(void *), void (*post_filter)(void *),


More information about the svn-src-all mailing list