git: 61f7427f02a3 - main - protosw: cleanup protocols that existed merely to provide pr_input

From: Gleb Smirnoff <glebius_at_FreeBSD.org>
Date: Tue, 30 Aug 2022 22:49:25 UTC
The branch main has been updated by glebius:

URL: https://cgit.FreeBSD.org/src/commit/?id=61f7427f02a307d28af674a12c45dd546e3898e4

commit 61f7427f02a307d28af674a12c45dd546e3898e4
Author:     Gleb Smirnoff <glebius@FreeBSD.org>
AuthorDate: 2022-08-30 22:09:21 +0000
Commit:     Gleb Smirnoff <glebius@FreeBSD.org>
CommitDate: 2022-08-30 22:09:21 +0000

    protosw: cleanup protocols that existed merely to provide pr_input
    
    Since 4.4BSD the protosw was used to implement socket types created
    by socket(2) syscall and at the same to demultiplex incoming IPv4
    datagrams (later copied to IPv6).  This story ended with 78b1fc05b20.
    
    These entries (e.g. IPPROTO_ICMP) in inetsw that were added to catch
    packets in ip_input(), they would also be returned by pffindproto()
    if user says socket(AF_INET, SOCK_RAW, IPPROTO_ICMP).  Thus, for raw
    sockets to work correctly, all the entries were pointing at raw_usrreq
    differentiating only in the value of pr_protocol.
    
    With 78b1fc05b20 all these entries are no longer needed, as ip_protox
    is independent of protosw.  Any socket syscall requesting SOCK_RAW type
    would end up with rip_protosw.  And this protosw has its pr_protocol
    set to 0, allowing to mark socket with any protocol.
    
    For IPv6 raw socket the change required two small fixes:
    o Validate user provided protocol value
    o Always use protocol number stored in inp in rip6_attach, instead
      of protosw value, which is now always 0.
    
    Differential revision:  https://reviews.freebsd.org/D36380
---
 sys/kern/uipc_domain.c   | 26 -------------
 sys/kern/uipc_socket.c   | 10 ++---
 sys/netinet/in_proto.c   | 26 +------------
 sys/netinet/raw_ip.c     | 80 ++++++++------------------------------
 sys/netinet6/in6_proto.c | 26 +------------
 sys/netinet6/raw_ip6.c   | 99 +++++++++++++-----------------------------------
 sys/sys/protosw.h        |  1 -
 7 files changed, 49 insertions(+), 219 deletions(-)

diff --git a/sys/kern/uipc_domain.c b/sys/kern/uipc_domain.c
index 5fb602226bae..832afca510fa 100644
--- a/sys/kern/uipc_domain.c
+++ b/sys/kern/uipc_domain.c
@@ -306,32 +306,6 @@ pffindtype(int family, int type)
 	return (NULL);
 }
 
-struct protosw *
-pffindproto(int family, int protocol, int type)
-{
-	struct domain *dp;
-	struct protosw *pr;
-	struct protosw *maybe;
-
-	dp = pffinddomain(family);
-	if (dp == NULL)
-		return (NULL);
-
-	maybe = NULL;
-	for (int i = 0; i < dp->dom_nprotosw; i++) {
-		if ((pr = dp->dom_protosw[i]) == NULL)
-			continue;
-		if ((pr->pr_protocol == protocol) && (pr->pr_type == type))
-			return (pr);
-
-		/* XXX: raw catches all. Why? */
-		if (type == SOCK_RAW && pr->pr_type == SOCK_RAW &&
-		    pr->pr_protocol == 0 && maybe == NULL)
-			maybe = pr;
-	}
-	return (maybe);
-}
-
 /*
  * The caller must make sure that the new protocol is fully set up and ready to
  * accept requests before it is registered.
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 1bc172eacd89..a93256cd7f63 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -520,11 +520,7 @@ socreate(int dom, struct socket **aso, int type, int proto,
 		    td->td_proc->p_comm);
 	}
 
-	if (proto)
-		prp = pffindproto(dom, proto, type);
-	else
-		prp = pffindtype(dom, type);
-
+	prp = pffindtype(dom, type);
 	if (prp == NULL) {
 		/* No support for domain. */
 		if (pffinddomain(dom) == NULL)
@@ -534,6 +530,8 @@ socreate(int dom, struct socket **aso, int type, int proto,
 			return (EPROTOTYPE);
 		return (EPROTONOSUPPORT);
 	}
+	if (prp->pr_protocol != 0 && proto != 0 && prp->pr_protocol != proto)
+		return (EPROTONOSUPPORT);
 
 	MPASS(prp->pr_attach);
 
@@ -543,8 +541,6 @@ socreate(int dom, struct socket **aso, int type, int proto,
 	if (prison_check_af(cred, prp->pr_domain->dom_family) != 0)
 		return (EPROTONOSUPPORT);
 
-	if (prp->pr_type != type)
-		return (EPROTOTYPE);
 	so = soalloc(CRED_TO_VNET(cred));
 	if (so == NULL)
 		return (ENOBUFS);
diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c
index 35b02d706e72..4eb037dbeed0 100644
--- a/sys/netinet/in_proto.c
+++ b/sys/netinet/in_proto.c
@@ -94,9 +94,7 @@ __FBSDID("$FreeBSD$");
 #endif
 
 /* netinet/raw_ip.c */
-extern struct protosw rip_protosw, rsvp_protosw, rawipv4_protosw,
-    rawipv6_protosw, mobile_protosw, etherip_protosw, icmp_protosw,
-    igmp_protosw, gre_protosw, pim_protosw, ripwild_protosw;
+extern struct protosw rip_protosw;
 /* netinet/udp_usrreq.c */
 extern struct protosw udp_protosw, udplite_protosw;
 
@@ -111,7 +109,7 @@ struct domain inetdomain = {
 #endif
 	.dom_ifattach =		in_domifattach,
 	.dom_ifdetach =		in_domifdetach,
-	.dom_nprotosw =		24,
+	.dom_nprotosw =		14,
 	.dom_protosw = {
 		&tcp_protosw,
 		&udp_protosw,
@@ -123,28 +121,8 @@ struct domain inetdomain = {
 #endif
 		&udplite_protosw,
 		&rip_protosw,
-		/*
-		 * XXXGL: it is entirely possible that all below raw-based
-		 * protosw definitions are not needed.  They could have existed
-		 * just to define pr_input, pr_drain, pr_*timo or PR_LASTHDR
-		 * flag, and were never supposed to create a special socket.
-		 */
-		&icmp_protosw,
-		&igmp_protosw,
-		&rsvp_protosw,
-		&rawipv4_protosw,
-		&mobile_protosw,
-		&etherip_protosw,
-		&gre_protosw,
-#ifdef INET6
-		&rawipv6_protosw,
-#else
-		NULL,
-#endif
-		&pim_protosw,
 		/* Spacer 8 times for loadable protocols. XXXGL: why 8? */
 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		&ripwild_protosw,
 	},
 };
 
diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
index 0e7e75b1fe99..3e7c81341c08 100644
--- a/sys/netinet/raw_ip.c
+++ b/sys/netinet/raw_ip.c
@@ -1068,70 +1068,22 @@ SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist,
     "List of active raw IP sockets");
 
 #ifdef INET
-/*
- * See comment in in_proto.c containing "protosw definitions are not needed".
- */
-#define	RAW_PROTOSW							\
-	.pr_type =		SOCK_RAW,				\
-	.pr_flags =		PR_ATOMIC|PR_ADDR,			\
-	.pr_ctloutput =		rip_ctloutput,				\
-	.pr_abort =		rip_abort,				\
-	.pr_attach =		rip_attach,				\
-	.pr_bind =		rip_bind,				\
-	.pr_connect =		rip_connect,				\
-	.pr_control =		in_control,				\
-	.pr_detach =		rip_detach,				\
-	.pr_disconnect =	rip_disconnect,				\
-	.pr_peeraddr =		in_getpeeraddr,				\
-	.pr_send =		rip_send,				\
-	.pr_shutdown =		rip_shutdown,				\
-	.pr_sockaddr =		in_getsockaddr,				\
-	.pr_sosetlabel =	in_pcbsosetlabel,			\
-	.pr_close =		rip_close
-
 struct protosw rip_protosw = {
-	.pr_protocol = IPPROTO_RAW,
-	RAW_PROTOSW
-};
-struct protosw icmp_protosw = {
-	.pr_protocol =	IPPROTO_ICMP,
-	RAW_PROTOSW
-};
-struct protosw igmp_protosw = {
-	.pr_protocol =	IPPROTO_IGMP,
-	RAW_PROTOSW
-};
-struct protosw rsvp_protosw = {
-	.pr_protocol =	IPPROTO_RSVP,
-	RAW_PROTOSW
-};
-struct protosw rawipv4_protosw = {
-	.pr_protocol =	IPPROTO_IPV4,
-	RAW_PROTOSW
-};
-struct protosw mobile_protosw = {
-	.pr_protocol =	IPPROTO_MOBILE,
-	RAW_PROTOSW
-};
-struct protosw etherip_protosw = {
-	.pr_protocol =	IPPROTO_ETHERIP,
-	RAW_PROTOSW
-};
-struct protosw gre_protosw = {
-	.pr_protocol =	IPPROTO_GRE,
-	RAW_PROTOSW
-};
-#ifdef INET6
-struct protosw rawipv6_protosw = {
-	.pr_protocol =	IPPROTO_IPV6,
-	RAW_PROTOSW
-};
-#endif
-struct protosw pim_protosw = {
-	.pr_protocol =	IPPROTO_PIM,
-	RAW_PROTOSW
-};
-struct protosw ripwild_protosw = {
-	RAW_PROTOSW
+	.pr_type =		SOCK_RAW,
+	.pr_flags =		PR_ATOMIC|PR_ADDR,
+	.pr_ctloutput =		rip_ctloutput,
+	.pr_abort =		rip_abort,
+	.pr_attach =		rip_attach,
+	.pr_bind =		rip_bind,
+	.pr_connect =		rip_connect,
+	.pr_control =		in_control,
+	.pr_detach =		rip_detach,
+	.pr_disconnect =	rip_disconnect,
+	.pr_peeraddr =		in_getpeeraddr,
+	.pr_send =		rip_send,
+	.pr_shutdown =		rip_shutdown,
+	.pr_sockaddr =		in_getsockaddr,
+	.pr_sosetlabel =	in_pcbsosetlabel,
+	.pr_close =		rip_close
 };
 #endif /* INET */
diff --git a/sys/netinet6/in6_proto.c b/sys/netinet6/in6_proto.c
index 72b84c915641..688702e17e34 100644
--- a/sys/netinet6/in6_proto.c
+++ b/sys/netinet6/in6_proto.c
@@ -122,9 +122,7 @@ __FBSDID("$FreeBSD$");
 #include <netinet6/ip6protosw.h>
 
 /* netinet6/raw_ip6.c */
-extern struct protosw rip6_protosw, icmp6_protosw, dstopts6_protosw,
-    routing6_protosw, frag6_protosw, rawipv4in6_protosw, rawipv6in6_protosw,
-    etherip6_protosw, gre6_protosw, pim6_protosw, rip6wild_protosw;
+extern struct protosw rip6_protosw;
 /* netinet6/udp6_usrreq.c */
 extern struct protosw udp6_protosw, udplite6_protosw;
 
@@ -143,7 +141,7 @@ struct domain inet6domain = {
 	.dom_ifattach =		in6_domifattach,
 	.dom_ifdetach =		in6_domifdetach,
 	.dom_ifmtu    =		in6_domifmtu,
-	.dom_nprotosw =		24,
+	.dom_nprotosw =		14,
 	.dom_protosw = {
 		&tcp6_protosw,
 		&udp6_protosw,
@@ -155,28 +153,8 @@ struct domain inet6domain = {
 #endif
 		&udplite6_protosw,
 		&rip6_protosw,
-		/*
-		 * XXXGL: it is entirely possible that all below raw-based
-		 * protosw definitions are not needed.  They could have existed
-		 * just to define pr_input, pr_drain, pr_*timo or PR_LASTHDR
-		 * flag, and were never supposed to create a special socket.
-		 */
-		&icmp6_protosw,
-		&dstopts6_protosw,
-		&routing6_protosw,
-		&frag6_protosw,
-#ifdef INET
-		&rawipv4in6_protosw,
-#else
-		NULL,
-#endif
-		&rawipv6in6_protosw,
-		&etherip6_protosw,
-		&gre6_protosw,
-		&pim6_protosw,
 		/* Spacer 8 times for loadable protocols. XXXGL: why 8? */
 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		&rip6wild_protosw,
 	},
 };
 
diff --git a/sys/netinet6/raw_ip6.c b/sys/netinet6/raw_ip6.c
index 091e90b0eaed..acc6787d59a7 100644
--- a/sys/netinet6/raw_ip6.c
+++ b/sys/netinet6/raw_ip6.c
@@ -424,9 +424,8 @@ rip6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
 
 	if (control != NULL) {
 		NET_EPOCH_ENTER(et);
-		error = ip6_setpktopts(control, &opt,
-		    inp->in6p_outputopts, so->so_cred,
-		    so->so_proto->pr_protocol);
+		error = ip6_setpktopts(control, &opt, inp->in6p_outputopts,
+		    so->so_cred, inp->inp_ip_p);
 		NET_EPOCH_EXIT(et);
 
 		if (error != 0) {
@@ -455,7 +454,7 @@ rip6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
 	 * For an ICMPv6 packet, we should know its type and code to update
 	 * statistics.
 	 */
-	if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) {
+	if (inp->inp_ip_p == IPPROTO_ICMPV6) {
 		struct icmp6_hdr *icmp6;
 		if (m->m_len < sizeof(struct icmp6_hdr) &&
 		    (m = m_pullup(m, sizeof(struct icmp6_hdr))) == NULL) {
@@ -479,8 +478,7 @@ rip6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
 		uint32_t hash_type, hash_val;
 
 		hash_val = fib6_calc_software_hash(&inp->in6p_laddr,
-		    &dstsock->sin6_addr, 0, 0, so->so_proto->pr_protocol,
-		    &hash_type);
+		    &dstsock->sin6_addr, 0, 0, inp->inp_ip_p, &hash_type);
 		inp->inp_flowid = hash_val;
 		inp->inp_flowtype = hash_type;
 	}
@@ -516,14 +514,13 @@ rip6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
 	ip6->ip6_nxt = inp->inp_ip_p;
 	ip6->ip6_hlim = hlim;
 
-	if (so->so_proto->pr_protocol == IPPROTO_ICMPV6 ||
-	    inp->in6p_cksum != -1) {
+	if (inp->inp_ip_p == IPPROTO_ICMPV6 || inp->in6p_cksum != -1) {
 		struct mbuf *n;
 		int off;
 		u_int16_t *p;
 
 		/* Compute checksum. */
-		if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
+		if (inp->inp_ip_p == IPPROTO_ICMPV6)
 			off = offsetof(struct icmp6_hdr, icmp6_cksum);
 		else
 			off = inp->in6p_cksum;
@@ -550,7 +547,7 @@ rip6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
 	 * them to rtadvd/rtsol.
 	 */
 	if ((send_sendso_input_hook != NULL) &&
-	    so->so_proto->pr_protocol == IPPROTO_ICMPV6) {
+	    inp->inp_ip_p == IPPROTO_ICMPV6) {
 		switch (type) {
 		case ND_ROUTER_ADVERT:
 		case ND_ROUTER_SOLICIT:
@@ -565,7 +562,7 @@ rip6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
 	NET_EPOCH_ENTER(et);
 	error = ip6_output(m, optp, NULL, 0, inp->in6p_moptions, &oifp, inp);
 	NET_EPOCH_EXIT(et);
-	if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) {
+	if (inp->inp_ip_p == IPPROTO_ICMPV6) {
 		if (oifp)
 			icmp6_ifoutstat_inc(oifp, type, code);
 		ICMP6STAT_INC(icp6s_outhist[type]);
@@ -682,6 +679,8 @@ rip6_attach(struct socket *so, int proto, struct thread *td)
 	error = priv_check(td, PRIV_NETINET_RAW);
 	if (error)
 		return (error);
+	if (proto >= IPPROTO_MAX || proto < 0)
+		return (EPROTONOSUPPORT);
 	error = soreserve(so, rip_sendspace, rip_recvspace);
 	if (error)
 		return (error);
@@ -694,7 +693,7 @@ rip6_attach(struct socket *so, int proto, struct thread *td)
 		return (error);
 	}
 	inp = (struct inpcb *)so->so_pcb;
-	inp->inp_ip_p = (long)proto;
+	inp->inp_ip_p = proto;
 	inp->in6p_cksum = -1;
 	inp->in6p_icmp6filt = filter;
 	ICMP6_FILTER_SETPASSALL(inp->in6p_icmp6filt);
@@ -869,66 +868,20 @@ rip6_shutdown(struct socket *so)
 	return (0);
 }
 
-/*
- * See comment in in6_proto.c containing "protosw definitions are not needed".
- */
-#define	RAW6_PROTOSW						\
-	.pr_type =		SOCK_RAW,			\
-	.pr_flags =		PR_ATOMIC|PR_ADDR,		\
-	.pr_ctloutput =		rip6_ctloutput,			\
-	.pr_abort =		rip6_abort,			\
-	.pr_attach =		rip6_attach,			\
-	.pr_bind =		rip6_bind,			\
-	.pr_connect =		rip6_connect,			\
-	.pr_control =		in6_control,			\
-	.pr_detach =		rip6_detach,			\
-	.pr_disconnect =	rip6_disconnect,		\
-	.pr_peeraddr =		in6_getpeeraddr,		\
-	.pr_send =		rip6_send,			\
-	.pr_shutdown =		rip6_shutdown,			\
-	.pr_sockaddr =		in6_getsockaddr,		\
-	.pr_close =		rip6_close
-
 struct protosw rip6_protosw = {
-	.pr_protocol =	IPPROTO_RAW,
-	RAW6_PROTOSW
-};
-struct protosw icmp6_protosw = {
-	.pr_protocol =	IPPROTO_ICMPV6,
-	RAW6_PROTOSW
-};
-struct protosw dstopts6_protosw = {
-	.pr_protocol =	IPPROTO_DSTOPTS,
-	RAW6_PROTOSW
-};
-struct protosw routing6_protosw = {
-	.pr_protocol =	IPPROTO_ROUTING,
-	RAW6_PROTOSW
-};
-struct protosw frag6_protosw = {
-	.pr_protocol =	IPPROTO_FRAGMENT,
-	RAW6_PROTOSW
-};
-struct protosw rawipv4in6_protosw = {
-	.pr_protocol =	IPPROTO_IPV4,
-	RAW6_PROTOSW
-};
-struct protosw rawipv6in6_protosw = {
-	.pr_protocol =	IPPROTO_IPV6,
-	RAW6_PROTOSW
-};
-struct protosw etherip6_protosw = {
-	.pr_protocol =	IPPROTO_ETHERIP,
-	RAW6_PROTOSW
-};
-struct protosw gre6_protosw = {
-	.pr_protocol =	IPPROTO_GRE,
-	RAW6_PROTOSW
-};
-struct protosw pim6_protosw = {
-	.pr_protocol =	IPPROTO_PIM,
-	RAW6_PROTOSW
-};
-struct protosw rip6wild_protosw = {
-	RAW6_PROTOSW
+	.pr_type =		SOCK_RAW,
+	.pr_flags =		PR_ATOMIC|PR_ADDR,
+	.pr_ctloutput =		rip6_ctloutput,
+	.pr_abort =		rip6_abort,
+	.pr_attach =		rip6_attach,
+	.pr_bind =		rip6_bind,
+	.pr_connect =		rip6_connect,
+	.pr_control =		in6_control,
+	.pr_detach =		rip6_detach,
+	.pr_disconnect =	rip6_disconnect,
+	.pr_peeraddr =		in6_getpeeraddr,
+	.pr_send =		rip6_send,
+	.pr_shutdown =		rip6_shutdown,
+	.pr_sockaddr =		in6_getsockaddr,
+	.pr_close =		rip6_close
 };
diff --git a/sys/sys/protosw.h b/sys/sys/protosw.h
index f6907505178e..f05aff962420 100644
--- a/sys/sys/protosw.h
+++ b/sys/sys/protosw.h
@@ -236,7 +236,6 @@ char	*prcorequests[] = {
 
 #ifdef _KERNEL
 struct domain *pffinddomain(int family);
-struct protosw *pffindproto(int family, int protocol, int type);
 struct protosw *pffindtype(int family, int type);
 int protosw_register(struct domain *, struct protosw *);
 int protosw_unregister(struct protosw *);