svn commit: r332967 - in head: cddl/lib/libdtrace sys/kern sys/netinet sys/netinet6 sys/sys
Sean Bruno
sbruno at FreeBSD.org
Tue Apr 24 19:55:14 UTC 2018
Author: sbruno
Date: Tue Apr 24 19:55:12 2018
New Revision: 332967
URL: https://svnweb.freebsd.org/changeset/base/332967
Log:
Revert r332894 at the request of the submitter.
Submitted by: Johannes Lundberg <johalun0_gmail.com>
Sponsored by: Limelight Networks
Modified:
head/cddl/lib/libdtrace/tcp.d
head/sys/kern/uipc_debug.c
head/sys/kern/uipc_socket.c
head/sys/netinet/in_pcb.c
head/sys/netinet/in_pcb.h
head/sys/netinet/ip_output.c
head/sys/netinet/tcp_subr.c
head/sys/netinet/udp_usrreq.c
head/sys/netinet6/in6_pcb.c
head/sys/netinet6/in6_src.c
head/sys/netinet6/ip6_output.c
head/sys/netinet6/udp6_usrreq.c
head/sys/sys/socket.h
head/sys/sys/socketvar.h
Modified: head/cddl/lib/libdtrace/tcp.d
==============================================================================
--- head/cddl/lib/libdtrace/tcp.d Tue Apr 24 19:51:05 2018 (r332966)
+++ head/cddl/lib/libdtrace/tcp.d Tue Apr 24 19:55:12 2018 (r332967)
@@ -192,12 +192,12 @@ translator tcpsinfo_t < struct tcpcb *p > {
tcps_rport = p == NULL ? 0 : ntohs(p->t_inpcb->inp_inc.inc_ie.ie_fport);
tcps_laddr = p == NULL ? 0 :
p->t_inpcb->inp_vflag == INP_IPV4 ?
- inet_ntoa(&p->t_inpcb->inp_inc.inc_ie.ie_dependladdr.id46_addr.ia46_addr4.s_addr) :
- inet_ntoa6(&p->t_inpcb->inp_inc.inc_ie.ie_dependladdr.id6_addr);
+ inet_ntoa(&p->t_inpcb->inp_inc.inc_ie.ie_dependladdr.ie46_local.ia46_addr4.s_addr) :
+ inet_ntoa6(&p->t_inpcb->inp_inc.inc_ie.ie_dependladdr.ie6_local);
tcps_raddr = p == NULL ? 0 :
p->t_inpcb->inp_vflag == INP_IPV4 ?
- inet_ntoa(&p->t_inpcb->inp_inc.inc_ie.ie_dependfaddr.id46_addr.ia46_addr4.s_addr) :
- inet_ntoa6(&p->t_inpcb->inp_inc.inc_ie.ie_dependfaddr.id6_addr);
+ inet_ntoa(&p->t_inpcb->inp_inc.inc_ie.ie_dependfaddr.ie46_foreign.ia46_addr4.s_addr) :
+ inet_ntoa6(&p->t_inpcb->inp_inc.inc_ie.ie_dependfaddr.ie6_foreign);
tcps_state = p == NULL ? -1 : p->t_state;
tcps_iss = p == NULL ? 0 : p->iss;
tcps_irs = p == NULL ? 0 : p->irs;
Modified: head/sys/kern/uipc_debug.c
==============================================================================
--- head/sys/kern/uipc_debug.c Tue Apr 24 19:51:05 2018 (r332966)
+++ head/sys/kern/uipc_debug.c Tue Apr 24 19:55:12 2018 (r332967)
@@ -77,7 +77,7 @@ db_print_sotype(short so_type)
}
static void
-db_print_sooptions(int so_options)
+db_print_sooptions(short so_options)
{
int comma;
@@ -120,10 +120,6 @@ db_print_sooptions(int so_options)
}
if (so_options & SO_REUSEPORT) {
db_printf("%sSO_REUSEPORT", comma ? ", " : "");
- comma = 1;
- }
- if (so_options & SO_REUSEPORT_LB) {
- db_printf("%sSO_REUSEPORT_LB", comma ? ", " : "");
comma = 1;
}
if (so_options & SO_TIMESTAMP) {
Modified: head/sys/kern/uipc_socket.c
==============================================================================
--- head/sys/kern/uipc_socket.c Tue Apr 24 19:51:05 2018 (r332966)
+++ head/sys/kern/uipc_socket.c Tue Apr 24 19:55:12 2018 (r332967)
@@ -1057,100 +1057,6 @@ sofree(struct socket *so)
}
/*
- * Let socket in same load balance group (same port and address)
- * inherit pending sockets of the closing socket.
- *
- * "so_inh" will inherit sockets from "so"
- */
-void
-soinherit(struct socket *so, struct socket *so_inh)
-{
- TAILQ_HEAD(, socket) comp, incomp;
- struct socket *sp, *head, *head_inh;
- int qlen, incqlen;
-
- KASSERT(so->so_options & SO_ACCEPTCONN,
- ("so does not accept connection"));
- KASSERT(so_inh->so_options & SO_ACCEPTCONN,
- ("so_inh does not accept connection"));
-
-
-restart:
- SOCK_LOCK(so);
- if ((head = so->so_listen) != NULL &&
- __predict_false(SOLISTEN_TRYLOCK(head) == 0)) {
- SOCK_UNLOCK(so);
- goto restart;
- }
-
-restart_inh:
- SOCK_LOCK(so_inh);
- if ((head_inh = so_inh->so_listen) != NULL &&
- __predict_false(SOLISTEN_TRYLOCK(head_inh) == 0)) {
- SOCK_UNLOCK(so_inh);
- goto restart_inh;
- }
-
- TAILQ_INIT(&comp);
- TAILQ_INIT(&incomp);
-
- /*
- * Save completed queue and incompleted queue
- */
- TAILQ_CONCAT(&comp, &so->sol_comp, so_list);
- qlen = so->sol_qlen;
- so->sol_qlen = 0;
-
- TAILQ_CONCAT(&incomp, &so->sol_incomp, so_list);
- incqlen = so->sol_incqlen;
- so->sol_incqlen = 0;
-
- /*
- * Append the saved completed queue and incompleted
- * queue to the socket inherits them.
- *
- * XXX
- * This may temporarily break the inheriting socket's
- * so_qlimit.
- */
- TAILQ_FOREACH(sp, &comp, so_list) {
- refcount_acquire(&so_inh->so_count);
- sp->so_listen = so_inh;
- crfree(sp->so_cred);
- sp->so_cred = crhold(so_inh->so_cred);
- }
-
- TAILQ_FOREACH(sp, &incomp, so_list) {
- refcount_acquire(&so_inh->so_count);
- sp->so_listen = so_inh;
- crfree(sp->so_cred);
- sp->so_cred = crhold(so_inh->so_cred);
- }
-
- TAILQ_CONCAT(&so_inh->sol_comp, &comp, so_list);
- so_inh->sol_qlen += qlen;
-
- TAILQ_CONCAT(&so_inh->sol_incomp, &incomp, so_list);
- so_inh->sol_incqlen += incqlen;
-
- SOCK_UNLOCK(so);
- if(head != NULL)
- SOLISTEN_UNLOCK(head);
-
- SOCK_UNLOCK(so_inh);
- if(head_inh != NULL) {
- if(qlen > 0) {
- /*
- * "New" connections have arrived
- */
- solisten_wakeup(head_inh);
- } else {
- SOLISTEN_UNLOCK(head_inh);
- }
- }
-}
-
-/*
* Close a socket on last file table reference removal. Initiate disconnect
* if connected. Free socket when disconnect complete.
*
@@ -2870,7 +2776,6 @@ sosetopt(struct socket *so, struct sockopt *sopt)
case SO_BROADCAST:
case SO_REUSEADDR:
case SO_REUSEPORT:
- case SO_REUSEPORT_LB:
case SO_OOBINLINE:
case SO_TIMESTAMP:
case SO_BINTIME:
@@ -3089,7 +2994,6 @@ sogetopt(struct socket *so, struct sockopt *sopt)
case SO_KEEPALIVE:
case SO_REUSEADDR:
case SO_REUSEPORT:
- case SO_REUSEPORT_LB:
case SO_BROADCAST:
case SO_OOBINLINE:
case SO_ACCEPTCONN:
Modified: head/sys/netinet/in_pcb.c
==============================================================================
--- head/sys/netinet/in_pcb.c Tue Apr 24 19:51:05 2018 (r332966)
+++ head/sys/netinet/in_pcb.c Tue Apr 24 19:55:12 2018 (r332967)
@@ -108,9 +108,6 @@ __FBSDID("$FreeBSD$");
#include <security/mac/mac_framework.h>
-#define INPCBLBGROUP_SIZMIN 8
-#define INPCBLBGROUP_SIZMAX 256
-
static struct callout ipport_tick_callout;
/*
@@ -220,186 +217,7 @@ SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomtim
* functions often modify hash chains or addresses in pcbs.
*/
-static struct inpcblbgroup *
-in_pcblbgroup_alloc(struct inpcblbgrouphead *hdr, u_char vflag,
- uint16_t port, const union in_dependaddr *addr, int size)
-{
- struct inpcblbgroup *grp;
-
- size_t bytes = __offsetof(struct inpcblbgroup, il_inp[size]);
- grp = malloc(bytes, M_PCB, M_ZERO | M_NOWAIT);
- if(!grp)
- return NULL;
- grp->il_vflag = vflag;
- grp->il_lport = port;
- grp->il_dependladdr = *addr;
- grp->il_inpsiz = size;
- LIST_INSERT_HEAD(hdr, grp, il_list);
-
- return grp;
-}
-
-static void
-in_pcblbgroup_free(struct inpcblbgroup *grp)
-{
- LIST_REMOVE(grp, il_list);
- free(grp, M_TEMP);
-}
-
-static struct inpcblbgroup *
-in_pcblbgroup_resize(struct inpcblbgrouphead *hdr,
- struct inpcblbgroup *old_grp, int size)
-{
- struct inpcblbgroup *grp;
- int i;
-
- grp = in_pcblbgroup_alloc(hdr, old_grp->il_vflag,
- old_grp->il_lport, &old_grp->il_dependladdr, size);
- if(!grp)
- return NULL;
-
- KASSERT(old_grp->il_inpcnt < grp->il_inpsiz,
- ("invalid new local group size %d and old local group count %d",
- grp->il_inpsiz, old_grp->il_inpcnt));
- for (i = 0; i < old_grp->il_inpcnt; ++i)
- grp->il_inp[i] = old_grp->il_inp[i];
- grp->il_inpcnt = old_grp->il_inpcnt;
-
- in_pcblbgroup_free(old_grp);
-
- return grp;
-}
-
/*
- * Add PCB to lb group (load balance used by SO_REUSEPORT_LB)
- */
-static int
-in_pcbinslbgrouphash(struct inpcb *inp, struct inpcbinfo *pcbinfo)
-{
- struct inpcblbgrouphead *hdr;
- struct inpcblbgroup *grp;
-
- uint16_t hashmask = pcbinfo->ipi_lbgrouphashmask;
- uint16_t lport = inp->inp_lport;
- uint32_t group_index = INP_PCBLBGROUP_PORTHASH(lport, hashmask);
-
- hdr = &pcbinfo->ipi_lbgrouphashbase[group_index];
-
- struct ucred *cred;
-
- if (pcbinfo->ipi_lbgrouphashbase == NULL)
- return 0;
-
- /*
- * don't allow jailed socket to join local group
- */
- if (inp->inp_socket != NULL)
- cred = inp->inp_socket->so_cred;
- else
- cred = NULL;
- if (cred != NULL && jailed(cred))
- return 0;
-
-#ifdef INET6
- /*
- * don't allow IPv4 mapped INET6 wild socket
- */
- if ((inp->inp_vflag & INP_IPV4) &&
- inp->inp_laddr.s_addr == INADDR_ANY &&
- INP_CHECK_SOCKAF(inp->inp_socket, AF_INET6)) {
- return 0;
- }
-#endif
-
- hdr = &pcbinfo->ipi_lbgrouphashbase[
- INP_PCBLBGROUP_PORTHASH(inp->inp_lport, pcbinfo->ipi_lbgrouphashmask)];
-
- LIST_FOREACH(grp, hdr, il_list) {
- if (grp->il_vflag == inp->inp_vflag &&
- grp->il_lport == inp->inp_lport &&
- memcmp(&grp->il_dependladdr,
- &inp->inp_inc.inc_ie.ie_dependladdr,
- sizeof(grp->il_dependladdr)) == 0) {
- break;
- }
- }
- if (grp == NULL) {
- /* Create new load balance group */
- grp = in_pcblbgroup_alloc(hdr, inp->inp_vflag,
- inp->inp_lport, &inp->inp_inc.inc_ie.ie_dependladdr,
- INPCBLBGROUP_SIZMIN);
- if(!grp)
- return (ENOBUFS);
- } else if (grp->il_inpcnt == grp->il_inpsiz) {
- if (grp->il_inpsiz >= INPCBLBGROUP_SIZMAX) {
- static int limit_logged = 0;
-
- if (!limit_logged) {
- limit_logged = 1;
- printf("lb group port %d, "
- "limit reached\n", ntohs(grp->il_lport));
- }
- return 0;
- }
-
- /* Expand this local group */
- grp = in_pcblbgroup_resize(hdr, grp, grp->il_inpsiz * 2);
- if(!grp)
- return (ENOBUFS);
- }
-
- KASSERT(grp->il_inpcnt < grp->il_inpsiz,
- ("invalid local group size %d and count %d",
- grp->il_inpsiz, grp->il_inpcnt));
-
- grp->il_inp[grp->il_inpcnt] = inp;
- grp->il_inpcnt++;
- return 0;
-}
-
-static void
-in_pcbremlbgrouphash(struct inpcb *inp, struct inpcbinfo *pcbinfo)
-{
- struct inpcblbgrouphead *hdr;
- struct inpcblbgroup *grp;
-
- if (pcbinfo->ipi_lbgrouphashbase == NULL)
- return;
-
- hdr = &pcbinfo->ipi_lbgrouphashbase[
- INP_PCBLBGROUP_PORTHASH(inp->inp_lport, pcbinfo->ipi_lbgrouphashmask)];
-
- LIST_FOREACH(grp, hdr, il_list) {
- int i;
-
- for (i = 0; i < grp->il_inpcnt; ++i) {
- if (grp->il_inp[i] != inp)
- continue;
-
- if (grp->il_inpcnt == 1) {
- /* Free this local group */
- in_pcblbgroup_free(grp);
- } else {
- /* Pull up inpcbs */
- for (; i + 1 < grp->il_inpcnt; ++i)
- grp->il_inp[i] = grp->il_inp[i + 1];
- grp->il_inpcnt--;
-
- if (grp->il_inpsiz > INPCBLBGROUP_SIZMIN &&
- grp->il_inpcnt <= (grp->il_inpsiz / 4)) {
- /* Shrink this local group */
- struct inpcblbgroup *new_grp =
- in_pcblbgroup_resize(hdr, grp, grp->il_inpsiz / 2);
- if(new_grp)
- grp = new_grp;
- }
- }
- return;
- }
- }
-}
-
-/*
* Different protocols initialize their inpcbs differently - giving
* different name to the lock. But they all are disposed the same.
*/
@@ -434,8 +252,6 @@ in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char
&pcbinfo->ipi_hashmask);
pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB,
&pcbinfo->ipi_porthashmask);
- pcbinfo->ipi_lbgrouphashbase = hashinit(hash_nelements, M_PCB,
- &pcbinfo->ipi_lbgrouphashmask);
#ifdef PCBGROUP
in_pcbgroup_init(pcbinfo, hashfields, hash_nelements);
#endif
@@ -459,8 +275,6 @@ in_pcbinfo_destroy(struct inpcbinfo *pcbinfo)
hashdestroy(pcbinfo->ipi_hashbase, M_PCB, pcbinfo->ipi_hashmask);
hashdestroy(pcbinfo->ipi_porthashbase, M_PCB,
pcbinfo->ipi_porthashmask);
- hashdestroy(pcbinfo->ipi_lbgrouphashbase, M_PCB,
- pcbinfo->ipi_lbgrouphashmask);
#ifdef PCBGROUP
in_pcbgroup_destroy(pcbinfo);
#endif
@@ -699,20 +513,18 @@ in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp
/*
* Return cached socket options.
*/
-int
+short
inp_so_options(const struct inpcb *inp)
{
- int so_options;
+ short so_options;
- so_options = 0;
+ so_options = 0;
- if ((inp->inp_flags2 & INP_REUSEPORT_LB) != 0)
- so_options |= SO_REUSEPORT_LB;
- if ((inp->inp_flags2 & INP_REUSEPORT) != 0)
- so_options |= SO_REUSEPORT;
- if ((inp->inp_flags2 & INP_REUSEADDR) != 0)
- so_options |= SO_REUSEADDR;
- return (so_options);
+ if ((inp->inp_flags2 & INP_REUSEPORT) != 0)
+ so_options |= SO_REUSEPORT;
+ if ((inp->inp_flags2 & INP_REUSEADDR) != 0)
+ so_options |= SO_REUSEADDR;
+ return (so_options);
}
#endif /* INET || INET6 */
@@ -769,12 +581,6 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *n
int error;
/*
- * XXX Maybe we could let SO_REUSEPORT_LB set SO_REUSEPORT bit here
- * so that we don't have to add to the (already messy) code below
- */
- int reuseport_lb = (so->so_options & SO_REUSEPORT_LB);
-
- /*
* No state changes, so read locks are sufficient here.
*/
INP_LOCK_ASSERT(inp);
@@ -785,7 +591,7 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *n
laddr.s_addr = *laddrp;
if (nam != NULL && laddr.s_addr != INADDR_ANY)
return (EINVAL);
- if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT|SO_REUSEPORT_LB)) == 0)
+ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
lookupflags = INPLOOKUP_WILDCARD;
if (nam == NULL) {
if ((error = prison_local_ip4(cred, &laddr)) != 0)
@@ -822,20 +628,16 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *n
*/
if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0)
reuseport = SO_REUSEADDR|SO_REUSEPORT;
- // XXX: How to deal with SO_REUSEPORT_LB here?
- // Added equivalent treatment as SO_REUSEPORT here for now
- if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT_LB)) != 0)
- reuseport_lb = SO_REUSEADDR|SO_REUSEPORT_LB;
} else if (sin->sin_addr.s_addr != INADDR_ANY) {
sin->sin_port = 0; /* yech... */
bzero(&sin->sin_zero, sizeof(sin->sin_zero));
/*
- * Is the address a local IP address?
+ * Is the address a local IP address?
* If INP_BINDANY is set, then the socket may be bound
* to any endpoint address, local or not.
*/
if ((inp->inp_flags & INP_BINDANY) == 0 &&
- ifa_ifwithaddr_check((struct sockaddr *)sin) == 0)
+ ifa_ifwithaddr_check((struct sockaddr *)sin) == 0)
return (EADDRNOTAVAIL);
}
laddr = sin->sin_addr;
@@ -865,8 +667,7 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *n
ntohl(t->inp_faddr.s_addr) == INADDR_ANY) &&
(ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
- (t->inp_flags2 & INP_REUSEPORT) ||
- (t->inp_flags2 & INP_REUSEPORT_LB) == 0) &&
+ (t->inp_flags2 & INP_REUSEPORT) == 0) &&
(inp->inp_cred->cr_uid !=
t->inp_cred->cr_uid))
return (EADDRINUSE);
@@ -891,14 +692,11 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *n
*/
tw = intotw(t);
if (tw == NULL ||
- ((reuseport & tw->tw_so_options) == 0 &&
- (reuseport_lb & tw->tw_so_options) == 0)) {
+ (reuseport & tw->tw_so_options) == 0)
return (EADDRINUSE);
- }
} else if (t &&
- ((inp->inp_flags2 & INP_BINDMULTI) == 0) &&
- (reuseport & inp_so_options(t)) == 0 &&
- (reuseport_lb & inp_so_options(t)) == 0) {
+ ((inp->inp_flags2 & INP_BINDMULTI) == 0) &&
+ (reuseport & inp_so_options(t)) == 0) {
#ifdef INET6
if (ntohl(sin->sin_addr.s_addr) !=
INADDR_ANY ||
@@ -907,7 +705,7 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *n
(inp->inp_vflag & INP_IPV6PROTO) == 0 ||
(t->inp_vflag & INP_IPV6PROTO) == 0)
#endif
- return (EADDRINUSE);
+ return (EADDRINUSE);
if (t && (! in_pcbbind_check_bindmulti(inp, t)))
return (EADDRINUSE);
}
@@ -1611,7 +1409,6 @@ in_pcbdrop(struct inpcb *inp)
struct inpcbport *phd = inp->inp_phd;
INP_HASH_WLOCK(inp->inp_pcbinfo);
- in_pcbremlbgrouphash(inp, inp->inp_pcbinfo);
LIST_REMOVE(inp, inp_hash);
LIST_REMOVE(inp, inp_portlist);
if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
@@ -1872,98 +1669,6 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct i
}
#undef INP_LOOKUP_MAPPED_PCB_COST
-struct inpcb *
-in_pcblookup_lbgroup_last(const struct inpcb *inp)
-{
- const struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
- const struct inpcblbgrouphead *hdr;
- const struct inpcblbgroup *grp;
- int i;
-
- if (pcbinfo->ipi_lbgrouphashbase == NULL)
- return NULL;
-
- hdr = &pcbinfo->ipi_lbgrouphashbase[
- INP_PCBLBGROUP_PORTHASH(inp->inp_lport, pcbinfo->ipi_lbgrouphashmask)];
-
- LIST_FOREACH(grp, hdr, il_list) {
- if (grp->il_vflag == inp->inp_vflag &&
- grp->il_lport == inp->inp_lport &&
- memcmp(&grp->il_dependladdr,
- &inp->inp_inc.inc_ie.ie_dependladdr,
- sizeof(grp->il_dependladdr)) == 0) {
- break;
- }
- }
- if (grp == NULL || grp->il_inpcnt == 1)
- return NULL;
-
- KASSERT(grp->il_inpcnt >= 2,
- ("invalid lbgroup inp count %d", grp->il_inpcnt));
- for (i = 0; i < grp->il_inpcnt; ++i) {
- if (grp->il_inp[i] == inp) {
- int last = grp->il_inpcnt - 1;
-
- if (i == last)
- last = grp->il_inpcnt - 2;
- return grp->il_inp[last];
- }
- }
- return NULL;
-}
-
-static struct inpcb *
-in_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
- const struct in_addr *laddr, uint16_t lport, const struct in_addr *faddr,
- uint16_t fport, int lookupflags)
-{
- struct inpcb *local_wild = NULL;
- const struct inpcblbgrouphead *hdr;
- struct inpcblbgroup *grp;
- struct inpcblbgroup *grp_local_wild;
-
- hdr = &pcbinfo->ipi_lbgrouphashbase[
- INP_PCBLBGROUP_PORTHASH(lport, pcbinfo->ipi_lbgrouphashmask)];
-
- /*
- * Order of socket selection:
- * 1. non-wild.
- * 2. wild (if lookupflags contains INPLOOKUP_WILDCARD).
- *
- * NOTE:
- * - Load balanced group does not contain jailed sockets
- * - Load balanced group does not contain IPv4 mapped INET6 wild sockets
- */
- LIST_FOREACH(grp, hdr, il_list) {
-#ifdef INET6
- if (!(grp->il_vflag & INP_IPV4))
- continue;
-#endif
-
- if (grp->il_lport == lport) {
-
- uint32_t idx = 0;
- int pkt_hash = INP_PCBLBGROUP_PKTHASH(faddr->s_addr, lport, fport);
-
- idx = pkt_hash % grp->il_inpcnt;
-
- if (grp->il_laddr.s_addr == laddr->s_addr) {
- return grp->il_inp[idx];
- } else {
- if (grp->il_laddr.s_addr == INADDR_ANY &&
- (lookupflags & INPLOOKUP_WILDCARD)) {
- local_wild = grp->il_inp[idx];
- grp_local_wild = grp;
- }
- }
- }
- }
- if (local_wild != NULL) {
- return local_wild;
- }
- return NULL;
-}
-
#ifdef PCBGROUP
/*
* Lookup PCB in hash list, using pcbgroup tables.
@@ -2243,18 +1948,6 @@ in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, st
return (tmpinp);
/*
- * Then look in lb group (for wildcard match)
- */
- if (pcbinfo->ipi_lbgrouphashbase != NULL &&
- (lookupflags & INPLOOKUP_WILDCARD)) {
- inp = in_pcblookup_lbgroup(pcbinfo, &laddr, lport, &faddr, fport,
- lookupflags);
- if (inp != NULL) {
- return inp;
- }
- }
-
- /*
* Then look for a wildcard match, if requested.
*/
if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
@@ -2471,7 +2164,6 @@ in_pcbinshash_internal(struct inpcb *inp, int do_pcbgr
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
struct inpcbport *phd;
u_int32_t hashkey_faddr;
- int so_options;
INP_WLOCK_ASSERT(inp);
INP_HASH_WLOCK_ASSERT(pcbinfo);
@@ -2492,21 +2184,7 @@ in_pcbinshash_internal(struct inpcb *inp, int do_pcbgr
pcbporthash = &pcbinfo->ipi_porthashbase[
INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)];
-
/*
- * Add entry in lb group
- * Only do this if SO_REUSEPORT_LB is set
- */
- so_options = inp_so_options(inp);
- if(so_options & SO_REUSEPORT_LB) {
- int ret = in_pcbinslbgrouphash(inp, pcbinfo);
- if(ret) {
- // pcb lb group malloc fail (ret=ENOBUFS)
- return ret;
- }
- }
-
- /*
* Go through port list and look for a head for this lport.
*/
LIST_FOREACH(phd, pcbporthash, phd_hash) {
@@ -2632,10 +2310,6 @@ in_pcbremlists(struct inpcb *inp)
struct inpcbport *phd = inp->inp_phd;
INP_HASH_WLOCK(pcbinfo);
-
- // XXX Only do if SO_REUSEPORT_LB set?
- in_pcbremlbgrouphash(inp, pcbinfo);
-
LIST_REMOVE(inp, inp_hash);
LIST_REMOVE(inp, inp_portlist);
if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
Modified: head/sys/netinet/in_pcb.h
==============================================================================
--- head/sys/netinet/in_pcb.h Tue Apr 24 19:51:05 2018 (r332966)
+++ head/sys/netinet/in_pcb.h Tue Apr 24 19:55:12 2018 (r332967)
@@ -78,11 +78,6 @@ struct in_addr_4in6 {
struct in_addr ia46_addr4;
};
-union in_dependaddr {
- struct in_addr_4in6 id46_addr;
- struct in6_addr id6_addr;
-};
-
/*
* NOTE: ipv6 addrs should be 64-bit aligned, per RFC 2553. in_conninfo has
* some extra padding to accomplish this.
@@ -93,14 +88,22 @@ struct in_endpoints {
u_int16_t ie_fport; /* foreign port */
u_int16_t ie_lport; /* local port */
/* protocol dependent part, local and foreign addr */
- union in_dependaddr ie_dependfaddr; /* foreign host table entry */
- union in_dependaddr ie_dependladdr; /* local host table entry */
-#define ie_faddr ie_dependfaddr.id46_addr.ia46_addr4
-#define ie_laddr ie_dependladdr.id46_addr.ia46_addr4
-#define ie6_faddr ie_dependfaddr.id6_addr
-#define ie6_laddr ie_dependladdr.id6_addr
+ union {
+ /* foreign host table entry */
+ struct in_addr_4in6 ie46_foreign;
+ struct in6_addr ie6_foreign;
+ } ie_dependfaddr;
+ union {
+ /* local host table entry */
+ struct in_addr_4in6 ie46_local;
+ struct in6_addr ie6_local;
+ } ie_dependladdr;
u_int32_t ie6_zoneid; /* scope zone id */
};
+#define ie_faddr ie_dependfaddr.ie46_foreign.ia46_addr4
+#define ie_laddr ie_dependladdr.ie46_local.ia46_addr4
+#define ie6_faddr ie_dependfaddr.ie6_foreign
+#define ie6_laddr ie_dependladdr.ie6_local
/*
* XXX The defines for inc_* are hacks and should be changed to direct
@@ -404,21 +407,6 @@ struct inpcbport {
u_short phd_port;
};
-struct inpcblbgroup {
- LIST_ENTRY(inpcblbgroup) il_list;
- uint16_t il_lport;
- u_char il_vflag;
- u_char il_pad;
- uint32_t il_pad2;
- union in_dependaddr il_dependladdr;
-#define il_laddr il_dependladdr.id46_addr.ia46_addr4
-#define il6_laddr il_dependladdr.id6_addr
- uint32_t il_inpsiz; /* size of il_inp[] */
- uint32_t il_inpcnt; /* # of elem in il_inp[] */
- struct inpcb *il_inp[];
-};
-LIST_HEAD(inpcblbgrouphead, inpcblbgroup);
-
/*-
* Global data structure for each high-level protocol (UDP, TCP, ...) in both
* IPv4 and IPv6. Holds inpcb lists and information for managing them.
@@ -512,13 +500,6 @@ struct inpcbinfo {
u_long ipi_wildmask; /* (p) */
/*
- * Load balanced group used by the SO_REUSEPORT_LB option,
- * hashed by local address and local port.
- */
- struct inpcblbgrouphead *ipi_lbgrouphashbase;
- u_long ipi_lbgrouphashmask;
-
- /*
* Pointer to network stack instance
*/
struct vnet *ipi_vnet; /* (c) */
@@ -604,7 +585,7 @@ struct tcpcb *
inp_inpcbtotcpcb(struct inpcb *inp);
void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
uint32_t *faddr, uint16_t *fp);
-int inp_so_options(const struct inpcb *inp);
+short inp_so_options(const struct inpcb *inp);
#endif /* _KERNEL */
@@ -667,10 +648,6 @@ int inp_so_options(const struct inpcb *inp);
(((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask))
#define INP_PCBPORTHASH(lport, mask) \
(ntohs((lport)) & (mask))
-#define INP_PCBLBGROUP_PORTHASH(lport, mask) \
- (ntohs((lport)) & (mask))
-#define INP_PCBLBGROUP_PKTHASH(faddr, lport, fport) \
- ((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport)))
#define INP6_PCBHASHKEY(faddr) ((faddr)->s6_addr32[3])
/*
@@ -739,7 +716,6 @@ int inp_so_options(const struct inpcb *inp);
#define INP_RATE_LIMIT_CHANGED 0x00000400 /* rate limit needs attention */
#define INP_ORIGDSTADDR 0x00000800 /* receive IP dst address/port */
#define INP_CANNOT_DO_ECN 0x00001000 /* The stack does not do ECN */
-#define INP_REUSEPORT_LB 0x00002000 /* SO_REUSEPORT_LB option is set */
/*
* Flags passed to in_pcblookup*() functions.
@@ -842,8 +818,6 @@ struct inpcb *
struct inpcb *
in_pcblookup(struct inpcbinfo *, struct in_addr, u_int,
struct in_addr, u_int, int, struct ifnet *);
-struct inpcb *
- in_pcblookup_lbgroup_last(const struct inpcb *inp);
struct inpcb *
in_pcblookup_mbuf(struct inpcbinfo *, struct in_addr, u_int,
struct in_addr, u_int, int, struct ifnet *, struct mbuf *);
Modified: head/sys/netinet/ip_output.c
==============================================================================
--- head/sys/netinet/ip_output.c Tue Apr 24 19:51:05 2018 (r332966)
+++ head/sys/netinet/ip_output.c Tue Apr 24 19:55:12 2018 (r332967)
@@ -986,15 +986,6 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
INP_WUNLOCK(inp);
error = 0;
break;
- case SO_REUSEPORT_LB:
- INP_WLOCK(inp);
- if ((so->so_options & SO_REUSEPORT_LB) != 0)
- inp->inp_flags2 |= INP_REUSEPORT_LB;
- else
- inp->inp_flags2 &= ~INP_REUSEPORT_LB;
- INP_WUNLOCK(inp);
- error = 0;
- break;
case SO_SETFIB:
INP_WLOCK(inp);
inp->inp_inc.inc_fibnum = so->so_fibnum;
Modified: head/sys/netinet/tcp_subr.c
==============================================================================
--- head/sys/netinet/tcp_subr.c Tue Apr 24 19:51:05 2018 (r332966)
+++ head/sys/netinet/tcp_subr.c Tue Apr 24 19:55:12 2018 (r332967)
@@ -1956,28 +1956,10 @@ tcp_close(struct tcpcb *tp)
{
struct inpcb *inp = tp->t_inpcb;
struct socket *so;
- struct inpcb *inp_inh = NULL;
- int listen = tp->t_state & TCPS_LISTEN;
INP_INFO_LOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
- if (listen) {
- /*
- * Pending socket/syncache inheritance
- *
- * If this is a listen(2) socket, find another listen(2)
- * socket in the same local group, which could inherit
- * the syncache and sockets pending on the completion
- * and incompletion queues.
- *
- * NOTE:
- * Currently the inheritance could only happen on the
- * listen(2) sockets with SO_REUSEPORT_LB set.
- */
- inp_inh = in_pcblookup_lbgroup_last(inp);
- }
-
#ifdef TCP_OFFLOAD
if (tp->t_state == TCPS_LISTEN)
tcp_offload_listen_stop(tp);
@@ -1997,16 +1979,7 @@ tcp_close(struct tcpcb *tp)
tcp_state_change(tp, TCPS_CLOSED);
KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL"));
so = inp->inp_socket;
-
soisdisconnected(so);
-
- if(listen)
- {
- if(inp_inh != NULL && inp_inh->inp_socket != NULL) {
- soinherit(so, inp_inh->inp_socket);
- }
- }
-
if (inp->inp_flags & INP_SOCKREF) {
KASSERT(so->so_state & SS_PROTOREF,
("tcp_close: !SS_PROTOREF"));
Modified: head/sys/netinet/udp_usrreq.c
==============================================================================
--- head/sys/netinet/udp_usrreq.c Tue Apr 24 19:51:05 2018 (r332966)
+++ head/sys/netinet/udp_usrreq.c Tue Apr 24 19:55:12 2018 (r332967)
@@ -612,7 +612,7 @@ udp_input(struct mbuf **mp, int *offp, int proto)
* will never clear these options after setting them.
*/
if ((last->inp_socket->so_options &
- (SO_REUSEPORT|SO_REUSEPORT_LB|SO_REUSEADDR)) == 0)
+ (SO_REUSEPORT|SO_REUSEADDR)) == 0)
break;
}
Modified: head/sys/netinet6/in6_pcb.c
==============================================================================
--- head/sys/netinet6/in6_pcb.c Tue Apr 24 19:51:05 2018 (r332966)
+++ head/sys/netinet6/in6_pcb.c Tue Apr 24 19:55:12 2018 (r332967)
@@ -125,12 +125,6 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
int error, lookupflags = 0;
int reuseport = (so->so_options & SO_REUSEPORT);
- /*
- * XXX Maybe we could let SO_REUSEPORT_LB set SO_REUSEPORT bit here
- * so that we don't have to add to the (already messy) code below
- */
- int reuseport_lb = (so->so_options & SO_REUSEPORT_LB);
-
INP_WLOCK_ASSERT(inp);
INP_HASH_WLOCK_ASSERT(pcbinfo);
@@ -138,7 +132,7 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
return (EADDRNOTAVAIL);
if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
return (EINVAL);
- if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT|SO_REUSEPORT_LB)) == 0)
+ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
lookupflags = INPLOOKUP_WILDCARD;
if (nam == NULL) {
if ((error = prison_local_ip6(cred, &inp->in6p_laddr,
@@ -172,10 +166,6 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
*/
if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0)
reuseport = SO_REUSEADDR|SO_REUSEPORT;
- // XXX: How to deal with SO_REUSEPORT_LB here?
- // Added equivalent treatment as SO_REUSEPORT here for now
- if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT_LB)) != 0)
- reuseport_lb = SO_REUSEADDR|SO_REUSEPORT_LB;
} else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
struct ifaddr *ifa;
@@ -224,8 +214,7 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
IN6_IS_ADDR_UNSPECIFIED(&t->in6p_faddr)) &&
(!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
!IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) ||
- (t->inp_flags2 & INP_REUSEPORT) ||
- (t->inp_flags2 & INP_REUSEPORT_LB) == 0) &&
+ (t->inp_flags2 & INP_REUSEPORT) == 0) &&
(inp->inp_cred->cr_uid !=
t->inp_cred->cr_uid))
return (EADDRINUSE);
@@ -275,39 +264,34 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
*/
tw = intotw(t);
if (tw == NULL ||
- ((reuseport & tw->tw_so_options) == 0 &&
- (reuseport_lb & tw->tw_so_options) == 0))
+ (reuseport & tw->tw_so_options) == 0)
return (EADDRINUSE);
- } else if (t && (reuseport & inp_so_options(t)) == 0 &&
- (reuseport_lb & inp_so_options(t)) == 0) {
+ } else if (t && (reuseport & inp_so_options(t)) == 0) {
return (EADDRINUSE);
}
#ifdef INET
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 &&
- IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+ IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
struct sockaddr_in sin;
in6_sin6_2_sin(&sin, sin6);
t = in_pcblookup_local(pcbinfo, sin.sin_addr,
- lport, lookupflags, cred);
+ lport, lookupflags, cred);
if (t && t->inp_flags & INP_TIMEWAIT) {
tw = intotw(t);
if (tw == NULL)
return (EADDRINUSE);
if ((reuseport & tw->tw_so_options) == 0
- && (reuseport_lb & tw->tw_so_options) == 0
- && (ntohl(t->inp_laddr.s_addr) !=
- INADDR_ANY || ((inp->inp_vflag &
- INP_IPV6PROTO) ==
- (t->inp_vflag & INP_IPV6PROTO))))
+ && (ntohl(t->inp_laddr.s_addr) !=
+ INADDR_ANY || ((inp->inp_vflag &
+ INP_IPV6PROTO) ==
+ (t->inp_vflag & INP_IPV6PROTO))))
return (EADDRINUSE);
} else if (t &&
- (reuseport & inp_so_options(t)) == 0 &&
- (reuseport_lb & inp_so_options(t)) == 0 &&
- (ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
- (t->inp_vflag & INP_IPV6PROTO) != 0)) {
+ (reuseport & inp_so_options(t)) == 0 &&
+ (ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
+ (t->inp_vflag & INP_IPV6PROTO) != 0))
return (EADDRINUSE);
- }
}
#endif
}
@@ -872,54 +856,6 @@ in6_rtchange(struct inpcb *inp, int errno)
return inp;
}
-static struct inpcb *
-in6_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
- const struct in6_addr *laddr, uint16_t lport, const struct in6_addr *faddr,
- uint16_t fport, int lookupflags)
-{
- struct inpcb *local_wild = NULL;
- const struct inpcblbgrouphead *hdr;
- struct inpcblbgroup *grp;
- struct inpcblbgroup *grp_local_wild;
-
- hdr = &pcbinfo->ipi_lbgrouphashbase[
- INP_PCBLBGROUP_PORTHASH(lport, pcbinfo->ipi_lbgrouphashmask)];
-
- /*
- * Order of socket selection:
- * 1. non-wild.
- * 2. wild (if lookupflags contains INPLOOKUP_WILDCARD).
- *
- * NOTE:
- * - Load balanced group does not contain jailed sockets
- * - Load balanced does not contain IPv4 mapped INET6 wild sockets
- */
- LIST_FOREACH(grp, hdr, il_list) {
-
- if (grp->il_lport == lport) {
- uint32_t idx = 0;
- int pkt_hash = INP_PCBLBGROUP_PKTHASH(
- INP6_PCBHASHKEY(faddr), lport, fport);
-
- idx = pkt_hash % grp->il_inpcnt;
-
- if (IN6_ARE_ADDR_EQUAL(&grp->il6_laddr, laddr)) {
- return grp->il_inp[idx];
- } else {
- if (IN6_IS_ADDR_UNSPECIFIED(&grp->il6_laddr) &&
- (lookupflags & INPLOOKUP_WILDCARD)) {
- local_wild = grp->il_inp[idx];
- grp_local_wild = grp;
- }
- }
- }
- }
- if (local_wild != NULL) {
- return local_wild;
- }
- return NULL;
-}
-
#ifdef PCBGROUP
/*
* Lookup PCB in hash list, using pcbgroup tables.
@@ -1121,8 +1057,6 @@ found:
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-head
mailing list