svn commit: r332967 - in head: cddl/lib/libdtrace sys/kern sys/netinet sys/netinet6 sys/sys

Sean Bruno sbruno at FreeBSD.org
Tue Apr 24 19:55:14 UTC 2018


Author: sbruno
Date: Tue Apr 24 19:55:12 2018
New Revision: 332967
URL: https://svnweb.freebsd.org/changeset/base/332967

Log:
  Revert r332894 at the request of the submitter.
  
  Submitted by:	Johannes Lundberg <johalun0_gmail.com>
  Sponsored by:	Limelight Networks

Modified:
  head/cddl/lib/libdtrace/tcp.d
  head/sys/kern/uipc_debug.c
  head/sys/kern/uipc_socket.c
  head/sys/netinet/in_pcb.c
  head/sys/netinet/in_pcb.h
  head/sys/netinet/ip_output.c
  head/sys/netinet/tcp_subr.c
  head/sys/netinet/udp_usrreq.c
  head/sys/netinet6/in6_pcb.c
  head/sys/netinet6/in6_src.c
  head/sys/netinet6/ip6_output.c
  head/sys/netinet6/udp6_usrreq.c
  head/sys/sys/socket.h
  head/sys/sys/socketvar.h

Modified: head/cddl/lib/libdtrace/tcp.d
==============================================================================
--- head/cddl/lib/libdtrace/tcp.d	Tue Apr 24 19:51:05 2018	(r332966)
+++ head/cddl/lib/libdtrace/tcp.d	Tue Apr 24 19:55:12 2018	(r332967)
@@ -192,12 +192,12 @@ translator tcpsinfo_t < struct tcpcb *p > {
 	tcps_rport =		p == NULL ? 0 : ntohs(p->t_inpcb->inp_inc.inc_ie.ie_fport);
 	tcps_laddr =		p == NULL ? 0 :
 	    p->t_inpcb->inp_vflag == INP_IPV4 ?
-	    inet_ntoa(&p->t_inpcb->inp_inc.inc_ie.ie_dependladdr.id46_addr.ia46_addr4.s_addr) :
-	    inet_ntoa6(&p->t_inpcb->inp_inc.inc_ie.ie_dependladdr.id6_addr);
+	    inet_ntoa(&p->t_inpcb->inp_inc.inc_ie.ie_dependladdr.ie46_local.ia46_addr4.s_addr) :
+	    inet_ntoa6(&p->t_inpcb->inp_inc.inc_ie.ie_dependladdr.ie6_local);
 	tcps_raddr =		p == NULL ? 0 :
 	    p->t_inpcb->inp_vflag == INP_IPV4 ?
-	    inet_ntoa(&p->t_inpcb->inp_inc.inc_ie.ie_dependfaddr.id46_addr.ia46_addr4.s_addr) :
-	    inet_ntoa6(&p->t_inpcb->inp_inc.inc_ie.ie_dependfaddr.id6_addr);
+	    inet_ntoa(&p->t_inpcb->inp_inc.inc_ie.ie_dependfaddr.ie46_foreign.ia46_addr4.s_addr) :
+	    inet_ntoa6(&p->t_inpcb->inp_inc.inc_ie.ie_dependfaddr.ie6_foreign);
 	tcps_state =		p == NULL ? -1 : p->t_state;
 	tcps_iss =		p == NULL ? 0  : p->iss;
 	tcps_irs =		p == NULL ? 0  : p->irs;

Modified: head/sys/kern/uipc_debug.c
==============================================================================
--- head/sys/kern/uipc_debug.c	Tue Apr 24 19:51:05 2018	(r332966)
+++ head/sys/kern/uipc_debug.c	Tue Apr 24 19:55:12 2018	(r332967)
@@ -77,7 +77,7 @@ db_print_sotype(short so_type)
 }
 
 static void
-db_print_sooptions(int so_options)
+db_print_sooptions(short so_options)
 {
 	int comma;
 
@@ -120,10 +120,6 @@ db_print_sooptions(int so_options)
 	}
 	if (so_options & SO_REUSEPORT) {
 		db_printf("%sSO_REUSEPORT", comma ? ", " : "");
-		comma = 1;
-	}
-	if (so_options & SO_REUSEPORT_LB) {
-		db_printf("%sSO_REUSEPORT_LB", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_TIMESTAMP) {

Modified: head/sys/kern/uipc_socket.c
==============================================================================
--- head/sys/kern/uipc_socket.c	Tue Apr 24 19:51:05 2018	(r332966)
+++ head/sys/kern/uipc_socket.c	Tue Apr 24 19:55:12 2018	(r332967)
@@ -1057,100 +1057,6 @@ sofree(struct socket *so)
 }
 
 /*
- * Let socket in same load balance group (same port and address)
- * inherit pending sockets of the closing socket.
- *
- * "so_inh" will inherit sockets from "so"
- */
-void
-soinherit(struct socket *so, struct socket *so_inh)
-{
-	TAILQ_HEAD(, socket) comp, incomp;
-	struct socket *sp, *head, *head_inh;
-	int qlen, incqlen;
-
-	KASSERT(so->so_options & SO_ACCEPTCONN,
-	    ("so does not accept connection"));
-	KASSERT(so_inh->so_options & SO_ACCEPTCONN,
-	    ("so_inh does not accept connection"));
-
-
-restart:
-	SOCK_LOCK(so);
-	if ((head = so->so_listen) != NULL &&
-	    __predict_false(SOLISTEN_TRYLOCK(head) == 0)) {
-		SOCK_UNLOCK(so);
-		goto restart;
-	}
-
-restart_inh:
-	SOCK_LOCK(so_inh);
-	if ((head_inh = so_inh->so_listen) != NULL &&
-	    __predict_false(SOLISTEN_TRYLOCK(head_inh) == 0)) {
-		SOCK_UNLOCK(so_inh);
-		goto restart_inh;
-	}
-
-	TAILQ_INIT(&comp);
-	TAILQ_INIT(&incomp);
-
-	/*
-	 * Save completed queue and incompleted queue
-	 */
-	TAILQ_CONCAT(&comp, &so->sol_comp, so_list);
-	qlen = so->sol_qlen;
-	so->sol_qlen = 0;
-
-	TAILQ_CONCAT(&incomp, &so->sol_incomp, so_list);
-	incqlen = so->sol_incqlen;
-	so->sol_incqlen = 0;
-
-	/*
-	 * Append the saved completed queue and incompleted
-	 * queue to the socket inherits them.
-	 *
-	 * XXX
-	 * This may temporarily break the inheriting socket's
-	 * so_qlimit.
-	 */
-	TAILQ_FOREACH(sp, &comp, so_list) {
-		refcount_acquire(&so_inh->so_count);
-		sp->so_listen = so_inh;
-		crfree(sp->so_cred);
-		sp->so_cred = crhold(so_inh->so_cred);
-	}
-
-	TAILQ_FOREACH(sp, &incomp, so_list) {
-		refcount_acquire(&so_inh->so_count);
-		sp->so_listen = so_inh;
-		crfree(sp->so_cred);
-		sp->so_cred = crhold(so_inh->so_cred);
-	}
-
-	TAILQ_CONCAT(&so_inh->sol_comp, &comp, so_list);
-	so_inh->sol_qlen += qlen;
-
-	TAILQ_CONCAT(&so_inh->sol_incomp, &incomp, so_list);
-	so_inh->sol_incqlen += incqlen;
-
-	SOCK_UNLOCK(so);
-	if(head != NULL)
-		SOLISTEN_UNLOCK(head);
-
-	SOCK_UNLOCK(so_inh);
-	if(head_inh != NULL) {
-		if(qlen > 0) {
-			/*
-			 * "New" connections have arrived
-			 */
-			solisten_wakeup(head_inh);
-		} else {
-			SOLISTEN_UNLOCK(head_inh);
-		}
-	}
-}
-
-/*
  * Close a socket on last file table reference removal.  Initiate disconnect
  * if connected.  Free socket when disconnect complete.
  *
@@ -2870,7 +2776,6 @@ sosetopt(struct socket *so, struct sockopt *sopt)
 		case SO_BROADCAST:
 		case SO_REUSEADDR:
 		case SO_REUSEPORT:
-		case SO_REUSEPORT_LB:
 		case SO_OOBINLINE:
 		case SO_TIMESTAMP:
 		case SO_BINTIME:
@@ -3089,7 +2994,6 @@ sogetopt(struct socket *so, struct sockopt *sopt)
 		case SO_KEEPALIVE:
 		case SO_REUSEADDR:
 		case SO_REUSEPORT:
-		case SO_REUSEPORT_LB:
 		case SO_BROADCAST:
 		case SO_OOBINLINE:
 		case SO_ACCEPTCONN:

Modified: head/sys/netinet/in_pcb.c
==============================================================================
--- head/sys/netinet/in_pcb.c	Tue Apr 24 19:51:05 2018	(r332966)
+++ head/sys/netinet/in_pcb.c	Tue Apr 24 19:55:12 2018	(r332967)
@@ -108,9 +108,6 @@ __FBSDID("$FreeBSD$");
 
 #include <security/mac/mac_framework.h>
 
-#define INPCBLBGROUP_SIZMIN	8
-#define INPCBLBGROUP_SIZMAX	256
-
 static struct callout	ipport_tick_callout;
 
 /*
@@ -220,186 +217,7 @@ SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomtim
  * functions often modify hash chains or addresses in pcbs.
  */
 
-static struct inpcblbgroup *
-in_pcblbgroup_alloc(struct inpcblbgrouphead *hdr, u_char vflag,
-    uint16_t port, const union in_dependaddr *addr, int size)
-{
-	struct inpcblbgroup *grp;
-
-	size_t bytes = __offsetof(struct inpcblbgroup, il_inp[size]);
-	grp = malloc(bytes, M_PCB, M_ZERO | M_NOWAIT);
-	if(!grp)
-		return NULL;
-	grp->il_vflag = vflag;
-	grp->il_lport = port;
-	grp->il_dependladdr = *addr;
-	grp->il_inpsiz = size;
-	LIST_INSERT_HEAD(hdr, grp, il_list);
-
-	return grp;
-}
-
-static void
-in_pcblbgroup_free(struct inpcblbgroup *grp)
-{
-	LIST_REMOVE(grp, il_list);
-	free(grp, M_TEMP);
-}
-
-static struct inpcblbgroup *
-in_pcblbgroup_resize(struct inpcblbgrouphead *hdr,
-    struct inpcblbgroup *old_grp, int size)
-{
-	struct inpcblbgroup *grp;
-	int i;
-
-	grp = in_pcblbgroup_alloc(hdr, old_grp->il_vflag,
-	    old_grp->il_lport, &old_grp->il_dependladdr, size);
-	if(!grp)
-		return NULL;
-
-	KASSERT(old_grp->il_inpcnt < grp->il_inpsiz,
-	    ("invalid new local group size %d and old local group count %d",
-	     grp->il_inpsiz, old_grp->il_inpcnt));
-	for (i = 0; i < old_grp->il_inpcnt; ++i)
-		grp->il_inp[i] = old_grp->il_inp[i];
-	grp->il_inpcnt = old_grp->il_inpcnt;
-
-	in_pcblbgroup_free(old_grp);
-
-	return grp;
-}
-
 /*
- * Add PCB to lb group (load balance used by SO_REUSEPORT_LB)
- */
-static int
-in_pcbinslbgrouphash(struct inpcb *inp, struct inpcbinfo *pcbinfo)
-{
-	struct inpcblbgrouphead *hdr;
-	struct inpcblbgroup *grp;
-
-	uint16_t hashmask = pcbinfo->ipi_lbgrouphashmask;
-	uint16_t lport = inp->inp_lport;
-	uint32_t group_index = INP_PCBLBGROUP_PORTHASH(lport, hashmask);
-
-	hdr = &pcbinfo->ipi_lbgrouphashbase[group_index];
-
-	struct ucred *cred;
-
-	if (pcbinfo->ipi_lbgrouphashbase == NULL)
-		return 0;
-
-	/*
-	 * don't allow jailed socket to join local group
-	 */
-	if (inp->inp_socket != NULL)
-		cred = inp->inp_socket->so_cred;
-	else
-		cred = NULL;
-	if (cred != NULL && jailed(cred))
-		return 0;
-
-#ifdef INET6
-	/*
-	 * don't allow IPv4 mapped INET6 wild socket
-	 */
-	if ((inp->inp_vflag & INP_IPV4) &&
-	    inp->inp_laddr.s_addr == INADDR_ANY &&
-	    INP_CHECK_SOCKAF(inp->inp_socket, AF_INET6)) {
-		return 0;
-	}
-#endif
-
-	hdr = &pcbinfo->ipi_lbgrouphashbase[
-	    INP_PCBLBGROUP_PORTHASH(inp->inp_lport, pcbinfo->ipi_lbgrouphashmask)];
-
-	LIST_FOREACH(grp, hdr, il_list) {
-		if (grp->il_vflag == inp->inp_vflag &&
-		    grp->il_lport == inp->inp_lport &&
-		    memcmp(&grp->il_dependladdr,
-		        &inp->inp_inc.inc_ie.ie_dependladdr,
-		        sizeof(grp->il_dependladdr)) == 0) {
-			break;
-		}
-	}
-	if (grp == NULL) {
-		/* Create new load balance group */
-		grp = in_pcblbgroup_alloc(hdr, inp->inp_vflag,
-		    inp->inp_lport, &inp->inp_inc.inc_ie.ie_dependladdr,
-		    INPCBLBGROUP_SIZMIN);
-		if(!grp)
-			return (ENOBUFS);
-	} else if (grp->il_inpcnt == grp->il_inpsiz) {
-		if (grp->il_inpsiz >= INPCBLBGROUP_SIZMAX) {
-			static int limit_logged = 0;
-
-			if (!limit_logged) {
-				limit_logged = 1;
-				printf("lb group port %d, "
-					   "limit reached\n", ntohs(grp->il_lport));
-			}
-			return 0;
-		}
-
-		/* Expand this local group */
-		grp = in_pcblbgroup_resize(hdr, grp, grp->il_inpsiz * 2);
-		if(!grp)
-			return (ENOBUFS);
-	}
-
-	KASSERT(grp->il_inpcnt < grp->il_inpsiz,
-			("invalid local group size %d and count %d",
-			 grp->il_inpsiz, grp->il_inpcnt));
-
-	grp->il_inp[grp->il_inpcnt] = inp;
-	grp->il_inpcnt++;
-	return 0;
-}
-
-static void
-in_pcbremlbgrouphash(struct inpcb *inp, struct inpcbinfo *pcbinfo)
-{
-	struct inpcblbgrouphead *hdr;
-	struct inpcblbgroup *grp;
-
-	if (pcbinfo->ipi_lbgrouphashbase == NULL)
-		return;
-
-	hdr = &pcbinfo->ipi_lbgrouphashbase[
-	    INP_PCBLBGROUP_PORTHASH(inp->inp_lport, pcbinfo->ipi_lbgrouphashmask)];
-
-	LIST_FOREACH(grp, hdr, il_list) {
-		int i;
-
-		for (i = 0; i < grp->il_inpcnt; ++i) {
-			if (grp->il_inp[i] != inp)
-				continue;
-
-			if (grp->il_inpcnt == 1) {
-				/* Free this local group */
-				in_pcblbgroup_free(grp);
-			} else {
-				/* Pull up inpcbs */
-				for (; i + 1 < grp->il_inpcnt; ++i)
-					grp->il_inp[i] = grp->il_inp[i + 1];
-				grp->il_inpcnt--;
-
-				if (grp->il_inpsiz > INPCBLBGROUP_SIZMIN &&
-				    grp->il_inpcnt <= (grp->il_inpsiz / 4)) {
-					/* Shrink this local group */
-					struct inpcblbgroup *new_grp =
-						in_pcblbgroup_resize(hdr, grp, grp->il_inpsiz / 2);
-					if(new_grp)
-						grp = new_grp;
-				}
-			}
-			return;
-		}
-	}
-}
-
-/*
  * Different protocols initialize their inpcbs differently - giving
  * different name to the lock.  But they all are disposed the same.
  */
@@ -434,8 +252,6 @@ in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char 
 	    &pcbinfo->ipi_hashmask);
 	pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB,
 	    &pcbinfo->ipi_porthashmask);
-	pcbinfo->ipi_lbgrouphashbase = hashinit(hash_nelements, M_PCB,
-	    &pcbinfo->ipi_lbgrouphashmask);
 #ifdef PCBGROUP
 	in_pcbgroup_init(pcbinfo, hashfields, hash_nelements);
 #endif
@@ -459,8 +275,6 @@ in_pcbinfo_destroy(struct inpcbinfo *pcbinfo)
 	hashdestroy(pcbinfo->ipi_hashbase, M_PCB, pcbinfo->ipi_hashmask);
 	hashdestroy(pcbinfo->ipi_porthashbase, M_PCB,
 	    pcbinfo->ipi_porthashmask);
-	hashdestroy(pcbinfo->ipi_lbgrouphashbase, M_PCB,
-	    pcbinfo->ipi_lbgrouphashmask);
 #ifdef PCBGROUP
 	in_pcbgroup_destroy(pcbinfo);
 #endif
@@ -699,20 +513,18 @@ in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp
 /*
  * Return cached socket options.
  */
-int
+short
 inp_so_options(const struct inpcb *inp)
 {
-	int so_options;
+   short so_options;
 
-	so_options = 0;
+   so_options = 0;
 
-	if ((inp->inp_flags2 & INP_REUSEPORT_LB) != 0)
-		so_options |= SO_REUSEPORT_LB;
-	if ((inp->inp_flags2 & INP_REUSEPORT) != 0)
-		so_options |= SO_REUSEPORT;
-	if ((inp->inp_flags2 & INP_REUSEADDR) != 0)
-		so_options |= SO_REUSEADDR;
-	return (so_options);
+   if ((inp->inp_flags2 & INP_REUSEPORT) != 0)
+	   so_options |= SO_REUSEPORT;
+   if ((inp->inp_flags2 & INP_REUSEADDR) != 0)
+	   so_options |= SO_REUSEADDR;
+   return (so_options);
 }
 #endif /* INET || INET6 */
 
@@ -769,12 +581,6 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *n
 	int error;
 
 	/*
-	 * XXX Maybe we could let SO_REUSEPORT_LB set SO_REUSEPORT bit here
-	 * so that we don't have to add to the (already messy) code below
-	 */
-	int reuseport_lb = (so->so_options & SO_REUSEPORT_LB);
-
-	/*
 	 * No state changes, so read locks are sufficient here.
 	 */
 	INP_LOCK_ASSERT(inp);
@@ -785,7 +591,7 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *n
 	laddr.s_addr = *laddrp;
 	if (nam != NULL && laddr.s_addr != INADDR_ANY)
 		return (EINVAL);
-	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT|SO_REUSEPORT_LB)) == 0)
+	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
 		lookupflags = INPLOOKUP_WILDCARD;
 	if (nam == NULL) {
 		if ((error = prison_local_ip4(cred, &laddr)) != 0)
@@ -822,20 +628,16 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *n
 			 */
 			if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0)
 				reuseport = SO_REUSEADDR|SO_REUSEPORT;
-			// XXX: How to deal with SO_REUSEPORT_LB here?
-			// Added equivalent treatment as SO_REUSEPORT here for now
-			if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT_LB)) != 0)
-				reuseport_lb = SO_REUSEADDR|SO_REUSEPORT_LB;
 		} else if (sin->sin_addr.s_addr != INADDR_ANY) {
 			sin->sin_port = 0;		/* yech... */
 			bzero(&sin->sin_zero, sizeof(sin->sin_zero));
 			/*
-			 * Is the address a local IP address?
+			 * Is the address a local IP address? 
 			 * If INP_BINDANY is set, then the socket may be bound
 			 * to any endpoint address, local or not.
 			 */
 			if ((inp->inp_flags & INP_BINDANY) == 0 &&
-			    ifa_ifwithaddr_check((struct sockaddr *)sin) == 0)
+			    ifa_ifwithaddr_check((struct sockaddr *)sin) == 0) 
 				return (EADDRNOTAVAIL);
 		}
 		laddr = sin->sin_addr;
@@ -865,8 +667,7 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *n
 				     ntohl(t->inp_faddr.s_addr) == INADDR_ANY) &&
 				    (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
 				     ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
-				     (t->inp_flags2 & INP_REUSEPORT) ||
-				     (t->inp_flags2 & INP_REUSEPORT_LB) == 0) &&
+				     (t->inp_flags2 & INP_REUSEPORT) == 0) &&
 				    (inp->inp_cred->cr_uid !=
 				     t->inp_cred->cr_uid))
 					return (EADDRINUSE);
@@ -891,14 +692,11 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *n
 				 */
 				tw = intotw(t);
 				if (tw == NULL ||
-				    ((reuseport & tw->tw_so_options) == 0 &&
-					(reuseport_lb & tw->tw_so_options) == 0)) {
+				    (reuseport & tw->tw_so_options) == 0)
 					return (EADDRINUSE);
-				}
 			} else if (t &&
-				   ((inp->inp_flags2 & INP_BINDMULTI) == 0) &&
-				   (reuseport & inp_so_options(t)) == 0 &&
-				   (reuseport_lb & inp_so_options(t)) == 0) {
+			    ((inp->inp_flags2 & INP_BINDMULTI) == 0) &&
+			    (reuseport & inp_so_options(t)) == 0) {
 #ifdef INET6
 				if (ntohl(sin->sin_addr.s_addr) !=
 				    INADDR_ANY ||
@@ -907,7 +705,7 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *n
 				    (inp->inp_vflag & INP_IPV6PROTO) == 0 ||
 				    (t->inp_vflag & INP_IPV6PROTO) == 0)
 #endif
-						return (EADDRINUSE);
+				return (EADDRINUSE);
 				if (t && (! in_pcbbind_check_bindmulti(inp, t)))
 					return (EADDRINUSE);
 			}
@@ -1611,7 +1409,6 @@ in_pcbdrop(struct inpcb *inp)
 		struct inpcbport *phd = inp->inp_phd;
 
 		INP_HASH_WLOCK(inp->inp_pcbinfo);
-		in_pcbremlbgrouphash(inp, inp->inp_pcbinfo);
 		LIST_REMOVE(inp, inp_hash);
 		LIST_REMOVE(inp, inp_portlist);
 		if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
@@ -1872,98 +1669,6 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct i
 }
 #undef INP_LOOKUP_MAPPED_PCB_COST
 
-struct inpcb *
-in_pcblookup_lbgroup_last(const struct inpcb *inp)
-{
-	const struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
-	const struct inpcblbgrouphead *hdr;
-	const struct inpcblbgroup *grp;
-	int i;
-
-	if (pcbinfo->ipi_lbgrouphashbase == NULL)
-		return NULL;
-
-	hdr = &pcbinfo->ipi_lbgrouphashbase[
-	    INP_PCBLBGROUP_PORTHASH(inp->inp_lport, pcbinfo->ipi_lbgrouphashmask)];
-
-	LIST_FOREACH(grp, hdr, il_list) {
-		if (grp->il_vflag == inp->inp_vflag &&
-		    grp->il_lport == inp->inp_lport &&
-		    memcmp(&grp->il_dependladdr,
-			&inp->inp_inc.inc_ie.ie_dependladdr,
-			sizeof(grp->il_dependladdr)) == 0) {
-			break;
-		}
-	}
-	if (grp == NULL || grp->il_inpcnt == 1)
-		return NULL;
-
-	KASSERT(grp->il_inpcnt >= 2,
-	    ("invalid lbgroup inp count %d", grp->il_inpcnt));
-	for (i = 0; i < grp->il_inpcnt; ++i) {
-		if (grp->il_inp[i] == inp) {
-			int last = grp->il_inpcnt - 1;
-
-			if (i == last)
-				last = grp->il_inpcnt - 2;
-			return grp->il_inp[last];
-		}
-	}
-	return NULL;
-}
-
-static struct inpcb *
-in_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
-  const struct in_addr *laddr, uint16_t lport, const struct in_addr *faddr,
-  uint16_t fport, int lookupflags)
-{
-	struct inpcb *local_wild = NULL;
-	const struct inpcblbgrouphead *hdr;
-	struct inpcblbgroup *grp;
-	struct inpcblbgroup *grp_local_wild;
-
-	hdr = &pcbinfo->ipi_lbgrouphashbase[
-		  INP_PCBLBGROUP_PORTHASH(lport, pcbinfo->ipi_lbgrouphashmask)];
-
-	/*
-	 * Order of socket selection:
-	 * 1. non-wild.
-	 * 2. wild (if lookupflags contains INPLOOKUP_WILDCARD).
-	 *
-	 * NOTE:
-	 * - Load balanced group does not contain jailed sockets
-	 * - Load balanced group does not contain IPv4 mapped INET6 wild sockets
-	 */
-	LIST_FOREACH(grp, hdr, il_list) {
-#ifdef INET6
-		if (!(grp->il_vflag & INP_IPV4))
-			continue;
-#endif
-
-		if (grp->il_lport == lport) {
-
-			uint32_t idx = 0;
-			int pkt_hash = INP_PCBLBGROUP_PKTHASH(faddr->s_addr, lport, fport);
-
-			idx = pkt_hash % grp->il_inpcnt;
-
-			if (grp->il_laddr.s_addr == laddr->s_addr) {
-				return grp->il_inp[idx];
-			} else {
-				if (grp->il_laddr.s_addr == INADDR_ANY &&
-					(lookupflags & INPLOOKUP_WILDCARD)) {
-					local_wild = grp->il_inp[idx];
-					grp_local_wild = grp;
-				}
-			}
-		}
-	}
-	if (local_wild != NULL) {
-		return local_wild;
-	}
-	return NULL;
-}
-
 #ifdef PCBGROUP
 /*
  * Lookup PCB in hash list, using pcbgroup tables.
@@ -2243,18 +1948,6 @@ in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, st
 		return (tmpinp);
 
 	/*
-	 * Then look in lb group (for wildcard match)
-	 */
-	if (pcbinfo->ipi_lbgrouphashbase != NULL &&
-		(lookupflags & INPLOOKUP_WILDCARD)) {
-		inp = in_pcblookup_lbgroup(pcbinfo, &laddr, lport, &faddr, fport,
-								   lookupflags);
-		if (inp != NULL) {
-			return inp;
-		}
-	}
-
-	/*
 	 * Then look for a wildcard match, if requested.
 	 */
 	if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
@@ -2471,7 +2164,6 @@ in_pcbinshash_internal(struct inpcb *inp, int do_pcbgr
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	struct inpcbport *phd;
 	u_int32_t hashkey_faddr;
-	int so_options;
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(pcbinfo);
@@ -2492,21 +2184,7 @@ in_pcbinshash_internal(struct inpcb *inp, int do_pcbgr
 	pcbporthash = &pcbinfo->ipi_porthashbase[
 	    INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)];
 
-
 	/*
-	 * Add entry in lb group
-	 * Only do this if SO_REUSEPORT_LB is set
-	 */
-	so_options = inp_so_options(inp);
-	if(so_options & SO_REUSEPORT_LB) {
-		int ret = in_pcbinslbgrouphash(inp, pcbinfo);
-		if(ret) {
-			// pcb lb group malloc fail (ret=ENOBUFS)
-			return ret;
-		}
-	}
-
-	/*
 	 * Go through port list and look for a head for this lport.
 	 */
 	LIST_FOREACH(phd, pcbporthash, phd_hash) {
@@ -2632,10 +2310,6 @@ in_pcbremlists(struct inpcb *inp)
 		struct inpcbport *phd = inp->inp_phd;
 
 		INP_HASH_WLOCK(pcbinfo);
-
-		// XXX Only do if SO_REUSEPORT_LB set?
-		in_pcbremlbgrouphash(inp, pcbinfo);
-
 		LIST_REMOVE(inp, inp_hash);
 		LIST_REMOVE(inp, inp_portlist);
 		if (LIST_FIRST(&phd->phd_pcblist) == NULL) {

Modified: head/sys/netinet/in_pcb.h
==============================================================================
--- head/sys/netinet/in_pcb.h	Tue Apr 24 19:51:05 2018	(r332966)
+++ head/sys/netinet/in_pcb.h	Tue Apr 24 19:55:12 2018	(r332967)
@@ -78,11 +78,6 @@ struct in_addr_4in6 {
 	struct	in_addr	ia46_addr4;
 };
 
-union in_dependaddr {
-	struct in_addr_4in6 id46_addr;
-	struct in6_addr	id6_addr;
-};
-
 /*
  * NOTE: ipv6 addrs should be 64-bit aligned, per RFC 2553.  in_conninfo has
  * some extra padding to accomplish this.
@@ -93,14 +88,22 @@ struct in_endpoints {
 	u_int16_t	ie_fport;		/* foreign port */
 	u_int16_t	ie_lport;		/* local port */
 	/* protocol dependent part, local and foreign addr */
-	union in_dependaddr ie_dependfaddr;	/* foreign host table entry */
-	union in_dependaddr ie_dependladdr;	/* local host table entry */
-#define	ie_faddr	ie_dependfaddr.id46_addr.ia46_addr4
-#define	ie_laddr	ie_dependladdr.id46_addr.ia46_addr4
-#define	ie6_faddr	ie_dependfaddr.id6_addr
-#define	ie6_laddr	ie_dependladdr.id6_addr
+	union {
+		/* foreign host table entry */
+		struct	in_addr_4in6 ie46_foreign;
+		struct	in6_addr ie6_foreign;
+	} ie_dependfaddr;
+	union {
+		/* local host table entry */
+		struct	in_addr_4in6 ie46_local;
+		struct	in6_addr ie6_local;
+	} ie_dependladdr;
 	u_int32_t	ie6_zoneid;		/* scope zone id */
 };
+#define	ie_faddr	ie_dependfaddr.ie46_foreign.ia46_addr4
+#define	ie_laddr	ie_dependladdr.ie46_local.ia46_addr4
+#define	ie6_faddr	ie_dependfaddr.ie6_foreign
+#define	ie6_laddr	ie_dependladdr.ie6_local
 
 /*
  * XXX The defines for inc_* are hacks and should be changed to direct
@@ -404,21 +407,6 @@ struct inpcbport {
 	u_short phd_port;
 };
 
-struct inpcblbgroup {
-	LIST_ENTRY(inpcblbgroup) il_list;
-	uint16_t	il_lport;
-	u_char		il_vflag;
-	u_char		il_pad;
-	uint32_t	il_pad2;
-	union in_dependaddr il_dependladdr;
-#define il_laddr	il_dependladdr.id46_addr.ia46_addr4
-#define il6_laddr	il_dependladdr.id6_addr
-	uint32_t	il_inpsiz; /* size of il_inp[] */
-	uint32_t	il_inpcnt; /* # of elem in il_inp[] */
-	struct inpcb	*il_inp[];
-};
-LIST_HEAD(inpcblbgrouphead, inpcblbgroup);
-
 /*-
  * Global data structure for each high-level protocol (UDP, TCP, ...) in both
  * IPv4 and IPv6.  Holds inpcb lists and information for managing them.
@@ -512,13 +500,6 @@ struct inpcbinfo {
 	u_long			 ipi_wildmask;		/* (p) */
 
 	/*
-	 * Load balanced group used by the SO_REUSEPORT_LB option,
-	 * hashed by local address and local port.
-	 */
-	struct	inpcblbgrouphead *ipi_lbgrouphashbase;
-	u_long	ipi_lbgrouphashmask;
-
-	/*
 	 * Pointer to network stack instance
 	 */
 	struct vnet		*ipi_vnet;		/* (c) */
@@ -604,7 +585,7 @@ struct tcpcb *
 	inp_inpcbtotcpcb(struct inpcb *inp);
 void 	inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
 		uint32_t *faddr, uint16_t *fp);
-int		inp_so_options(const struct inpcb *inp);
+short	inp_so_options(const struct inpcb *inp);
 
 #endif /* _KERNEL */
 
@@ -667,10 +648,6 @@ int		inp_so_options(const struct inpcb *inp);
 	(((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask))
 #define INP_PCBPORTHASH(lport, mask) \
 	(ntohs((lport)) & (mask))
-#define INP_PCBLBGROUP_PORTHASH(lport, mask) \
-	(ntohs((lport)) & (mask))
-#define INP_PCBLBGROUP_PKTHASH(faddr, lport, fport) \
-	((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport)))
 #define	INP6_PCBHASHKEY(faddr)	((faddr)->s6_addr32[3])
 
 /*
@@ -739,7 +716,6 @@ int		inp_so_options(const struct inpcb *inp);
 #define	INP_RATE_LIMIT_CHANGED	0x00000400 /* rate limit needs attention */
 #define	INP_ORIGDSTADDR		0x00000800 /* receive IP dst address/port */
 #define INP_CANNOT_DO_ECN	0x00001000 /* The stack does not do ECN */
-#define	INP_REUSEPORT_LB	0x00002000 /* SO_REUSEPORT_LB option is set */
 
 /*
  * Flags passed to in_pcblookup*() functions.
@@ -842,8 +818,6 @@ struct inpcb *
 struct inpcb *
 	in_pcblookup(struct inpcbinfo *, struct in_addr, u_int,
 	    struct in_addr, u_int, int, struct ifnet *);
-struct inpcb *
-	in_pcblookup_lbgroup_last(const struct inpcb *inp);
 struct inpcb *
 	in_pcblookup_mbuf(struct inpcbinfo *, struct in_addr, u_int,
 	    struct in_addr, u_int, int, struct ifnet *, struct mbuf *);

Modified: head/sys/netinet/ip_output.c
==============================================================================
--- head/sys/netinet/ip_output.c	Tue Apr 24 19:51:05 2018	(r332966)
+++ head/sys/netinet/ip_output.c	Tue Apr 24 19:55:12 2018	(r332967)
@@ -986,15 +986,6 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
 				INP_WUNLOCK(inp);
 				error = 0;
 				break;
-			case SO_REUSEPORT_LB:
-				INP_WLOCK(inp);
-				if ((so->so_options & SO_REUSEPORT_LB) != 0)
-					inp->inp_flags2 |= INP_REUSEPORT_LB;
-				else
-					inp->inp_flags2 &= ~INP_REUSEPORT_LB;
-				INP_WUNLOCK(inp);
-				error = 0;
-				break;
 			case SO_SETFIB:
 				INP_WLOCK(inp);
 				inp->inp_inc.inc_fibnum = so->so_fibnum;

Modified: head/sys/netinet/tcp_subr.c
==============================================================================
--- head/sys/netinet/tcp_subr.c	Tue Apr 24 19:51:05 2018	(r332966)
+++ head/sys/netinet/tcp_subr.c	Tue Apr 24 19:55:12 2018	(r332967)
@@ -1956,28 +1956,10 @@ tcp_close(struct tcpcb *tp)
 {
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so;
-	struct inpcb *inp_inh = NULL;
-	int listen = tp->t_state & TCPS_LISTEN;
 
 	INP_INFO_LOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
-	if (listen) {
-		/*
-		 * Pending socket/syncache inheritance
-		 *
-		 * If this is a listen(2) socket, find another listen(2)
-		 * socket in the same local group, which could inherit
-		 * the syncache and sockets pending on the completion
-		 * and incompletion queues.
-		 *
-		 * NOTE:
-		 * Currently the inheritance could only happen on the
-		 * listen(2) sockets with SO_REUSEPORT_LB set.
-		 */
-		inp_inh = in_pcblookup_lbgroup_last(inp);
-	}
-
 #ifdef TCP_OFFLOAD
 	if (tp->t_state == TCPS_LISTEN)
 		tcp_offload_listen_stop(tp);
@@ -1997,16 +1979,7 @@ tcp_close(struct tcpcb *tp)
 		tcp_state_change(tp, TCPS_CLOSED);
 	KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL"));
 	so = inp->inp_socket;
-
 	soisdisconnected(so);
-
-	if(listen)
-	{
-		if(inp_inh != NULL && inp_inh->inp_socket != NULL) {
-			soinherit(so, inp_inh->inp_socket);
-		}
-	}
-
 	if (inp->inp_flags & INP_SOCKREF) {
 		KASSERT(so->so_state & SS_PROTOREF,
 		    ("tcp_close: !SS_PROTOREF"));

Modified: head/sys/netinet/udp_usrreq.c
==============================================================================
--- head/sys/netinet/udp_usrreq.c	Tue Apr 24 19:51:05 2018	(r332966)
+++ head/sys/netinet/udp_usrreq.c	Tue Apr 24 19:55:12 2018	(r332967)
@@ -612,7 +612,7 @@ udp_input(struct mbuf **mp, int *offp, int proto)
 			 * will never clear these options after setting them.
 			 */
 			if ((last->inp_socket->so_options &
-			    (SO_REUSEPORT|SO_REUSEPORT_LB|SO_REUSEADDR)) == 0)
+			    (SO_REUSEPORT|SO_REUSEADDR)) == 0)
 				break;
 		}
 

Modified: head/sys/netinet6/in6_pcb.c
==============================================================================
--- head/sys/netinet6/in6_pcb.c	Tue Apr 24 19:51:05 2018	(r332966)
+++ head/sys/netinet6/in6_pcb.c	Tue Apr 24 19:55:12 2018	(r332967)
@@ -125,12 +125,6 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
 	int error, lookupflags = 0;
 	int reuseport = (so->so_options & SO_REUSEPORT);
 
-	/*
-	 * XXX Maybe we could let SO_REUSEPORT_LB set SO_REUSEPORT bit here
-	 * so that we don't have to add to the (already messy) code below
-	 */
-	int reuseport_lb = (so->so_options & SO_REUSEPORT_LB);
-
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(pcbinfo);
 
@@ -138,7 +132,7 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
 		return (EADDRNOTAVAIL);
 	if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
 		return (EINVAL);
-	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT|SO_REUSEPORT_LB)) == 0)
+	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
 		lookupflags = INPLOOKUP_WILDCARD;
 	if (nam == NULL) {
 		if ((error = prison_local_ip6(cred, &inp->in6p_laddr,
@@ -172,10 +166,6 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
 			 */
 			if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0)
 				reuseport = SO_REUSEADDR|SO_REUSEPORT;
-			// XXX: How to deal with SO_REUSEPORT_LB here?
-			// Added equivalent treatment as SO_REUSEPORT here for now
-			if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT_LB)) != 0)
-				reuseport_lb = SO_REUSEADDR|SO_REUSEPORT_LB;
 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 			struct ifaddr *ifa;
 
@@ -224,8 +214,7 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
 				     IN6_IS_ADDR_UNSPECIFIED(&t->in6p_faddr)) &&
 				    (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
 				     !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) ||
-				     (t->inp_flags2 & INP_REUSEPORT) ||
-				     (t->inp_flags2 & INP_REUSEPORT_LB) == 0) &&
+				     (t->inp_flags2 & INP_REUSEPORT) == 0) &&
 				    (inp->inp_cred->cr_uid !=
 				     t->inp_cred->cr_uid))
 					return (EADDRINUSE);
@@ -275,39 +264,34 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
 				 */
 				tw = intotw(t);
 				if (tw == NULL ||
-				    ((reuseport & tw->tw_so_options) == 0 &&
-					 (reuseport_lb & tw->tw_so_options) == 0))
+				    (reuseport & tw->tw_so_options) == 0)
 					return (EADDRINUSE);
-			} else if (t && (reuseport & inp_so_options(t)) == 0 &&
-					   (reuseport_lb & inp_so_options(t)) == 0) {
+			} else if (t && (reuseport & inp_so_options(t)) == 0) {
 				return (EADDRINUSE);
 			}
 #ifdef INET
 			if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 &&
-				IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+			    IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 				struct sockaddr_in sin;
 
 				in6_sin6_2_sin(&sin, sin6);
 				t = in_pcblookup_local(pcbinfo, sin.sin_addr,
-									   lport, lookupflags, cred);
+				    lport, lookupflags, cred);
 				if (t && t->inp_flags & INP_TIMEWAIT) {
 					tw = intotw(t);
 					if (tw == NULL)
 						return (EADDRINUSE);
 					if ((reuseport & tw->tw_so_options) == 0
-						&& (reuseport_lb & tw->tw_so_options) == 0
-						&& (ntohl(t->inp_laddr.s_addr) !=
-							INADDR_ANY || ((inp->inp_vflag &
-											INP_IPV6PROTO) ==
-										   (t->inp_vflag & INP_IPV6PROTO))))
+					    && (ntohl(t->inp_laddr.s_addr) !=
+					     INADDR_ANY || ((inp->inp_vflag &
+					     INP_IPV6PROTO) ==
+					     (t->inp_vflag & INP_IPV6PROTO))))
 						return (EADDRINUSE);
 				} else if (t &&
-						   (reuseport & inp_so_options(t)) == 0 &&
-						   (reuseport_lb & inp_so_options(t)) == 0 &&
-						   (ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
-							(t->inp_vflag & INP_IPV6PROTO) != 0)) {
+				    (reuseport & inp_so_options(t)) == 0 &&
+				    (ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
+				    (t->inp_vflag & INP_IPV6PROTO) != 0))
 					return (EADDRINUSE);
-				}
 			}
 #endif
 		}
@@ -872,54 +856,6 @@ in6_rtchange(struct inpcb *inp, int errno)
 	return inp;
 }
 
-static struct inpcb *
-in6_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
-  const struct in6_addr *laddr, uint16_t lport, const struct in6_addr *faddr,
-  uint16_t fport, int lookupflags)
-{
-	struct inpcb *local_wild = NULL;
-	const struct inpcblbgrouphead *hdr;
-	struct inpcblbgroup *grp;
-	struct inpcblbgroup *grp_local_wild;
-
-	hdr = &pcbinfo->ipi_lbgrouphashbase[
-		  INP_PCBLBGROUP_PORTHASH(lport, pcbinfo->ipi_lbgrouphashmask)];
-
-	/*
-	 * Order of socket selection:
-	 * 1. non-wild.
-	 * 2. wild (if lookupflags contains INPLOOKUP_WILDCARD).
-	 *
-	 * NOTE:
-	 * - Load balanced group does not contain jailed sockets
-	 * - Load balanced does not contain IPv4 mapped INET6 wild sockets
-	 */
-	LIST_FOREACH(grp, hdr, il_list) {
-
-		if (grp->il_lport == lport) {
-			uint32_t idx = 0;
-			int pkt_hash = INP_PCBLBGROUP_PKTHASH(
-						       INP6_PCBHASHKEY(faddr), lport, fport);
-
-			idx = pkt_hash % grp->il_inpcnt;
-
-			if (IN6_ARE_ADDR_EQUAL(&grp->il6_laddr, laddr)) {
-				return grp->il_inp[idx];
-			} else {
-				if (IN6_IS_ADDR_UNSPECIFIED(&grp->il6_laddr) &&
-					(lookupflags & INPLOOKUP_WILDCARD)) {
-					local_wild = grp->il_inp[idx];
-					grp_local_wild = grp;
-				}
-			}
-		}
-	}
-	if (local_wild != NULL) {
-		return local_wild;
-	}
-	return NULL;
-}
-
 #ifdef PCBGROUP
 /*
  * Lookup PCB in hash list, using pcbgroup tables.
@@ -1121,8 +1057,6 @@ found:

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-head mailing list