git: 78b1fc05b205 - main - protosw: separate pr_input and pr_ctlinput out of protosw

From: Gleb Smirnoff <glebius_at_FreeBSD.org>
Date: Wed, 17 Aug 2022 18:52:13 UTC
The branch main has been updated by glebius:

URL: https://cgit.FreeBSD.org/src/commit/?id=78b1fc05b20504ed13aeeb4a5b47443246cabaeb

commit 78b1fc05b20504ed13aeeb4a5b47443246cabaeb
Author:     Gleb Smirnoff <glebius@FreeBSD.org>
AuthorDate: 2022-08-17 18:50:31 +0000
Commit:     Gleb Smirnoff <glebius@FreeBSD.org>
CommitDate: 2022-08-17 18:50:31 +0000

    protosw: separate pr_input and pr_ctlinput out of protosw
    
    The protosw KPI historically has implemented two quite orthogonal
    things: protocols that implement a certain kind of socket, and
    protocols that are IPv4/IPv6 protocol.  These two things do not
    make one-to-one correspondence. The pr_input and pr_ctlinput methods
    were utilized only in IP protocols.  This strange duality required
    IP protocols that doesn't have a socket to declare protosw, e.g.
    carp(4).  On the other hand developers of socket protocols thought
    that they need to define pr_input/pr_ctlinput always, which lead to
    strange dead code, e.g. div_input() or sdp_ctlinput().
    
    With this change pr_input and pr_ctlinput as part of protosw disappear
    and IPv4/IPv6 get their private single level protocol switch table
    ip_protox[] and ip6_protox[] respectively, pointing at array of
    ipproto_input_t functions.  The pr_ctlinput that was used for
    control input coming from the network (ICMP, ICMPv6) is now represented
    by ip_ctlprotox[] and ip6_ctlprotox[].
    
    ipproto_register() becomes the only official way to register in the
    table.  Those protocols that were always static and unlikely anybody
    is interested in making them loadable, are now registered by ip_init(),
    ip6_init().  An IP protocol that considers itself unloadable shall
    register itself within its own private SYSINIT().
    
    Reviewed by:            tuexen, melifaro
    Differential revision:  https://reviews.freebsd.org/D36157
---
 sys/kern/uipc_debug.c      |   2 -
 sys/kern/uipc_domain.c     |   2 -
 sys/netinet/in.h           |  17 ++++++++
 sys/netinet/in_proto.c     |  23 ----------
 sys/netinet/ip_carp.c      |  66 +++-------------------------
 sys/netinet/ip_carp.h      |   2 -
 sys/netinet/ip_divert.c    |  20 ---------
 sys/netinet/ip_icmp.c      |  12 +++---
 sys/netinet/ip_input.c     | 105 ++++++++++++++++++---------------------------
 sys/netinet/ip_var.h       |   4 +-
 sys/netinet/raw_ip.c       |   5 ++-
 sys/netinet/sctp_module.c  |  12 +-----
 sys/netinet/sctp_var.h     |   2 -
 sys/netinet/tcp_subr.c     |  10 +++--
 sys/netinet/udp_usrreq.c   |  21 ++++++---
 sys/netinet/udp_var.h      |   3 --
 sys/netinet6/icmp6.c       |  10 ++---
 sys/netinet6/in6.h         |   1 -
 sys/netinet6/in6_proto.c   |  23 ----------
 sys/netinet6/ip6_input.c   | 105 +++++++++++++++++----------------------------
 sys/netinet6/ip6_var.h     |   4 --
 sys/netinet6/tcp6_var.h    |   1 -
 sys/netinet6/udp6_usrreq.c |   9 ++++
 sys/netipsec/ipsec_input.c |   4 +-
 sys/netpfil/pf/if_pfsync.c |  22 +---------
 sys/sys/protosw.h          |   4 --
 26 files changed, 155 insertions(+), 334 deletions(-)

diff --git a/sys/kern/uipc_debug.c b/sys/kern/uipc_debug.c
index c553ee1047b6..b47ae71c84d9 100644
--- a/sys/kern/uipc_debug.c
+++ b/sys/kern/uipc_debug.c
@@ -314,8 +314,6 @@ db_print_protosw(struct protosw *pr, const char *prname, int indent)
 	db_printf(")\n");
 
 	db_print_indent(indent);
-	db_printf("pr_input: %p   ", pr->pr_input);
-	db_printf("pr_ctlinput: %p\n", pr->pr_ctlinput);
 	db_printf("pr_ctloutput: %p   ", pr->pr_ctloutput);
 
 	db_print_indent(indent);
diff --git a/sys/kern/uipc_domain.c b/sys/kern/uipc_domain.c
index 2cae08be089a..2edd79657d03 100644
--- a/sys/kern/uipc_domain.c
+++ b/sys/kern/uipc_domain.c
@@ -473,8 +473,6 @@ pf_proto_unregister(int family, int protocol, int type)
 	dpr->pr_domain = dp;
 	dpr->pr_protocol = PROTO_SPACER;
 	dpr->pr_flags = 0;
-	dpr->pr_input = NULL;
-	dpr->pr_ctlinput = NULL;
 	dpr->pr_ctloutput = NULL;
 	dpr->pr_fasttimo = NULL;
 	dpr->pr_slowtimo = NULL;
diff --git a/sys/netinet/in.h b/sys/netinet/in.h
index 44d64190ed01..43d26b9f7804 100644
--- a/sys/netinet/in.h
+++ b/sys/netinet/in.h
@@ -696,6 +696,23 @@ void	 in_ifdetach(struct ifnet *);
 #define	satosin(sa)	((struct sockaddr_in *)(sa))
 #define	sintosa(sin)	((struct sockaddr *)(sin))
 #define	ifatoia(ifa)	((struct in_ifaddr *)(ifa))
+
+typedef int	ipproto_input_t(struct mbuf **, int *, int);
+typedef void	ipproto_ctlinput_t(int, struct sockaddr *, void *);
+int	ipproto_register(uint8_t, ipproto_input_t, ipproto_ctlinput_t);
+int	ipproto_unregister(uint8_t);
+int	ip6proto_register(uint8_t, ipproto_input_t, ipproto_ctlinput_t);
+int	ip6proto_unregister(uint8_t);
+#define	IPPROTO_REGISTER(prot, input, ctl)	do {			\
+	int error __diagused;						\
+	error = ipproto_register(prot, input, ctl);			\
+	MPASS(error == 0);						\
+} while (0)
+#define	IP6PROTO_REGISTER(prot, input, ctl)	do {			\
+	int error __diagused;						\
+	error = ip6proto_register(prot, input, ctl);			\
+	MPASS(error == 0);						\
+} while (0)
 #endif /* _KERNEL */
 
 /* INET6 stuff */
diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c
index 81c078e2f306..b589441b20d3 100644
--- a/sys/netinet/in_proto.c
+++ b/sys/netinet/in_proto.c
@@ -122,8 +122,6 @@ struct protosw inetsw[] = {
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_UDP,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_CAPATTACH,
-	.pr_input =		udp_input,
-	.pr_ctlinput =		udp_ctlinput,
 	.pr_ctloutput =		udp_ctloutput,
 	.pr_usrreqs =		&udp_usrreqs
 },
@@ -133,8 +131,6 @@ struct protosw inetsw[] = {
 	.pr_protocol =		IPPROTO_TCP,
 	.pr_flags =		PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD|
 				    PR_CAPATTACH,
-	.pr_input =		tcp_input,
-	.pr_ctlinput =		tcp_ctlinput,
 	.pr_ctloutput =		tcp_ctloutput,
 	.pr_slowtimo =		tcp_slowtimo,
 	.pr_drain =		tcp_drain,
@@ -146,8 +142,6 @@ struct protosw inetsw[] = {
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_SCTP,
 	.pr_flags =		PR_WANTRCVD,
-	.pr_input =		sctp_input,
-	.pr_ctlinput =		sctp_ctlinput,
 	.pr_ctloutput =		sctp_ctloutput,
 	.pr_drain =		sctp_drain,
 	.pr_usrreqs =		&sctp_usrreqs
@@ -157,8 +151,6 @@ struct protosw inetsw[] = {
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_SCTP,
 	.pr_flags =		PR_CONNREQUIRED|PR_WANTRCVD,
-	.pr_input =		sctp_input,
-	.pr_ctlinput =		sctp_ctlinput,
 	.pr_ctloutput =		sctp_ctloutput,
 	.pr_drain =		NULL, /* Covered by the SOCK_SEQPACKET entry. */
 	.pr_usrreqs =		&sctp_usrreqs
@@ -169,8 +161,6 @@ struct protosw inetsw[] = {
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_UDPLITE,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_CAPATTACH,
-	.pr_input =		udp_input,
-	.pr_ctlinput =		udplite_ctlinput,
 	.pr_ctloutput =		udp_ctloutput,
 	.pr_usrreqs =		&udp_usrreqs
 },
@@ -179,8 +169,6 @@ struct protosw inetsw[] = {
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_RAW,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		rip_input,
-	.pr_ctlinput =		rip_ctlinput,
 	.pr_ctloutput =		rip_ctloutput,
 	.pr_usrreqs =		&rip_usrreqs
 },
@@ -189,7 +177,6 @@ struct protosw inetsw[] = {
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_ICMP,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		icmp_input,
 	.pr_ctloutput =		rip_ctloutput,
 	.pr_usrreqs =		&rip_usrreqs
 },
@@ -198,7 +185,6 @@ struct protosw inetsw[] = {
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_IGMP,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		igmp_input,
 	.pr_ctloutput =		rip_ctloutput,
 	.pr_fasttimo =		igmp_fasttimo,
 	.pr_slowtimo =		igmp_slowtimo,
@@ -209,7 +195,6 @@ struct protosw inetsw[] = {
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_RSVP,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		rsvp_input,
 	.pr_ctloutput =		rip_ctloutput,
 	.pr_usrreqs =		&rip_usrreqs
 },
@@ -218,7 +203,6 @@ struct protosw inetsw[] = {
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_IPV4,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		encap4_input,
 	.pr_ctloutput =		rip_ctloutput,
 	.pr_usrreqs =		&rip_usrreqs
 },
@@ -227,7 +211,6 @@ struct protosw inetsw[] = {
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_MOBILE,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		encap4_input,
 	.pr_ctloutput =		rip_ctloutput,
 	.pr_usrreqs =		&rip_usrreqs
 },
@@ -236,7 +219,6 @@ struct protosw inetsw[] = {
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_ETHERIP,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		encap4_input,
 	.pr_ctloutput =		rip_ctloutput,
 	.pr_usrreqs =		&rip_usrreqs
 },
@@ -245,7 +227,6 @@ struct protosw inetsw[] = {
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_GRE,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		encap4_input,
 	.pr_ctloutput =		rip_ctloutput,
 	.pr_usrreqs =		&rip_usrreqs
 },
@@ -255,7 +236,6 @@ struct protosw inetsw[] = {
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_IPV6,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		encap4_input,
 	.pr_ctloutput =		rip_ctloutput,
 	.pr_usrreqs =		&rip_usrreqs
 },
@@ -265,7 +245,6 @@ struct protosw inetsw[] = {
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_PIM,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		encap4_input,
 	.pr_ctloutput =		rip_ctloutput,
 	.pr_usrreqs =		&rip_usrreqs
 },
@@ -283,8 +262,6 @@ IPPROTOSPACER,
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inetdomain,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		rip_input,
-	.pr_ctloutput =		rip_ctloutput,
 	.pr_usrreqs =		&rip_usrreqs
 },
 };
diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c
index 013e4ea5c68b..7c0318c82e75 100644
--- a/sys/netinet/ip_carp.c
+++ b/sys/netinet/ip_carp.c
@@ -46,7 +46,6 @@ __FBSDID("$FreeBSD$");
 #include <sys/module.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
-#include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
@@ -148,10 +147,6 @@ struct carp_if {
 #define	CIF_PROMISC	0x00000001
 };
 
-#define	CARP_INET	0
-#define	CARP_INET6	1
-static int proto_reg[] = {-1, -1};
-
 /*
  * Brief design of carp(4).
  *
@@ -450,7 +445,7 @@ carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
  * but it seems more efficient this way or not possible otherwise.
  */
 #ifdef INET
-int
+static int
 carp_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp;
@@ -537,7 +532,7 @@ carp_input(struct mbuf **mp, int *offp, int proto)
 #endif
 
 #ifdef INET6
-int
+static int
 carp6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp;
@@ -2174,50 +2169,16 @@ carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS)
 	return (0);
 }
 
-#ifdef INET
-extern  struct domain inetdomain;
-static struct protosw in_carp_protosw = {
-	.pr_type =		SOCK_RAW,
-	.pr_domain =		&inetdomain,
-	.pr_protocol =		IPPROTO_CARP,
-	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		carp_input,
-	.pr_ctloutput =		rip_ctloutput,
-	.pr_usrreqs =		&rip_usrreqs
-};
-#endif
-
-#ifdef INET6
-extern	struct domain inet6domain;
-static struct protosw in6_carp_protosw = {
-	.pr_type =		SOCK_RAW,
-	.pr_domain =		&inet6domain,
-	.pr_protocol =		IPPROTO_CARP,
-	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		carp6_input,
-	.pr_ctloutput =		rip6_ctloutput,
-	.pr_usrreqs =		&rip6_usrreqs
-};
-#endif
-
 static void
 carp_mod_cleanup(void)
 {
 
 #ifdef INET
-	if (proto_reg[CARP_INET] == 0) {
-		(void)ipproto_unregister(IPPROTO_CARP);
-		pf_proto_unregister(PF_INET, IPPROTO_CARP, SOCK_RAW);
-		proto_reg[CARP_INET] = -1;
-	}
+	(void)ipproto_unregister(IPPROTO_CARP);
 	carp_iamatch_p = NULL;
 #endif
 #ifdef INET6
-	if (proto_reg[CARP_INET6] == 0) {
-		(void)ip6proto_unregister(IPPROTO_CARP);
-		pf_proto_unregister(PF_INET6, IPPROTO_CARP, SOCK_RAW);
-		proto_reg[CARP_INET6] = -1;
-	}
+	(void)ip6proto_unregister(IPPROTO_CARP);
 	carp_iamatch6_p = NULL;
 	carp_macmatch6_p = NULL;
 #endif
@@ -2256,15 +2217,7 @@ carp_mod_load(void)
 #ifdef INET6
 	carp_iamatch6_p = carp_iamatch6;
 	carp_macmatch6_p = carp_macmatch6;
-	proto_reg[CARP_INET6] = pf_proto_register(PF_INET6,
-	    (struct protosw *)&in6_carp_protosw);
-	if (proto_reg[CARP_INET6]) {
-		printf("carp: error %d attaching to PF_INET6\n",
-		    proto_reg[CARP_INET6]);
-		carp_mod_cleanup();
-		return (proto_reg[CARP_INET6]);
-	}
-	err = ip6proto_register(IPPROTO_CARP);
+	err = ip6proto_register(IPPROTO_CARP, carp6_input, NULL);
 	if (err) {
 		printf("carp: error %d registering with INET6\n", err);
 		carp_mod_cleanup();
@@ -2273,14 +2226,7 @@ carp_mod_load(void)
 #endif
 #ifdef INET
 	carp_iamatch_p = carp_iamatch;
-	proto_reg[CARP_INET] = pf_proto_register(PF_INET, &in_carp_protosw);
-	if (proto_reg[CARP_INET]) {
-		printf("carp: error %d attaching to PF_INET\n",
-		    proto_reg[CARP_INET]);
-		carp_mod_cleanup();
-		return (proto_reg[CARP_INET]);
-	}
-	err = ipproto_register(IPPROTO_CARP);
+	err = ipproto_register(IPPROTO_CARP, carp_input, NULL);
 	if (err) {
 		printf("carp: error %d registering with INET\n", err);
 		carp_mod_cleanup();
diff --git a/sys/netinet/ip_carp.h b/sys/netinet/ip_carp.h
index f8ee38ddd82f..f60085af9d61 100644
--- a/sys/netinet/ip_carp.h
+++ b/sys/netinet/ip_carp.h
@@ -142,8 +142,6 @@ int		carp_ioctl(struct ifreq *, u_long, struct thread *);
 int		carp_attach(struct ifaddr *, int);
 void		carp_detach(struct ifaddr *, bool);
 void		carp_carpdev_state(struct ifnet *);
-int		carp_input(struct mbuf **, int *, int);
-int		carp6_input (struct mbuf **, int *, int);
 int		carp_output (struct ifnet *, struct mbuf *,
 		    const struct sockaddr *);
 int		carp_master(struct ifaddr *);
diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c
index c149a2a2c416..c8a2ad7b4c94 100644
--- a/sys/netinet/ip_divert.c
+++ b/sys/netinet/ip_divert.c
@@ -147,20 +147,6 @@ div_destroy(void *unused __unused)
 }
 VNET_SYSUNINIT(divert, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, div_destroy, NULL);
 
-/*
- * IPPROTO_DIVERT is not in the real IP protocol number space; this
- * function should never be called.  Just in case, drop any packets.
- */
-static int
-div_input(struct mbuf **mp, int *offp, int proto)
-{
-	struct mbuf *m = *mp;
-
-	KMOD_IPSTAT_INC(ips_noproto);
-	m_freem(m);
-	return (IPPROTO_DONE);
-}
-
 static bool
 div_port_match(const struct inpcb *inp, void *v)
 {
@@ -171,9 +157,6 @@ div_port_match(const struct inpcb *inp, void *v)
 
 /*
  * Divert a packet by passing it up to the divert socket at port 'port'.
- *
- * Setup generic address and protocol structures for div_input routine,
- * then pass them along with mbuf chain.
  */
 static void
 divert_packet(struct mbuf *m, bool incoming)
@@ -759,7 +742,6 @@ struct protosw div_protosw = {
 	.pr_type =		SOCK_RAW,
 	.pr_protocol =		IPPROTO_DIVERT,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		div_input,
 	.pr_usrreqs =		&div_usrreqs
 };
 
@@ -772,8 +754,6 @@ div_modevent(module_t mod, int type, void *unused)
 	case MOD_LOAD:
 		/*
 		 * Protocol will be initialized by pf_proto_register().
-		 * We don't have to register ip_protox because we are not
-		 * a true IP protocol that goes over the wire.
 		 */
 		err = pf_proto_register(PF_INET, &div_protosw);
 		if (err != 0)
diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c
index 2e548a539bf7..1cc8d7c09391 100644
--- a/sys/netinet/ip_icmp.c
+++ b/sys/netinet/ip_icmp.c
@@ -77,6 +77,8 @@ __FBSDID("$FreeBSD$");
 #include <security/mac/mac_framework.h>
 #endif /* INET */
 
+extern ipproto_ctlinput_t	*ip_ctlprotox[];
+
 /*
  * ICMP routines: error generation, receive packet processing, and
  * routines to turnaround packets back to the originator, and
@@ -187,8 +189,6 @@ static void	icmp_send(struct mbuf *, struct mbuf *);
 static int	icmp_verify_redirect_gateway(struct sockaddr_in *,
     struct sockaddr_in *, struct sockaddr_in *, u_int);
 
-extern	struct protosw inetsw[];
-
 /*
  * Kernel module interface for updating icmpstat.  The argument is an index
  * into icmpstat treated as an array of u_long.  While this encodes the
@@ -417,7 +417,6 @@ icmp_input(struct mbuf **mp, int *offp, int proto)
 	int hlen = *offp;
 	int icmplen = ntohs(ip->ip_len) - *offp;
 	int i, code;
-	void (*ctlfunc)(int, struct sockaddr *, void *);
 	int fibnum;
 
 	NET_EPOCH_ASSERT();
@@ -573,10 +572,9 @@ icmp_input(struct mbuf **mp, int *offp, int proto)
 		 *   n is at least 8, but might be larger based on
 		 *   ICMP_ADVLENPREF. See its definition in ip_icmp.h.
 		 */
-		ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
-		if (ctlfunc)
-			(*ctlfunc)(code, (struct sockaddr *)&icmpsrc,
-				   (void *)&icp->icmp_ip);
+		if (ip_ctlprotox[icp->icmp_ip.ip_p] != NULL)
+			ip_ctlprotox[icp->icmp_ip.ip_p](code,
+			    (struct sockaddr *)&icmpsrc, &icp->icmp_ip);
 		break;
 
 	badcode:
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
index e1029920c95f..7fdabf24b2a7 100644
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
 #include "opt_ipsec.h"
 #include "opt_route.h"
 #include "opt_rss.h"
+#include "opt_sctp.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -76,12 +77,17 @@ __FBSDID("$FreeBSD$");
 #include <netinet/in_fib.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
+#include <netinet/ip_encap.h>
 #include <netinet/ip_fw.h>
 #include <netinet/ip_icmp.h>
+#include <netinet/igmp_var.h>
 #include <netinet/ip_options.h>
 #include <machine/in_cksum.h>
 #include <netinet/ip_carp.h>
 #include <netinet/in_rss.h>
+#ifdef SCTP
+#include <netinet/sctp_var.h>
+#endif
 
 #include <netipsec/ipsec_support.h>
 
@@ -162,9 +168,11 @@ static struct netisr_handler ip_direct_nh = {
 };
 #endif
 
-extern	struct domain inetdomain;
-extern	struct protosw inetsw[];
-u_char	ip_protox[IPPROTO_MAX];
+ipproto_input_t		*ip_protox[IPPROTO_MAX] = {
+			    [0 ... IPPROTO_MAX - 1] = rip_input };
+ipproto_ctlinput_t	*ip_ctlprotox[IPPROTO_MAX] = {
+			    [0 ... IPPROTO_MAX - 1] = rip_ctlinput };
+
 VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead);  /* first inet address */
 VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table  */
 VNET_DEFINE(u_long, in_ifaddrhmask);		/* mask for hash table */
@@ -339,30 +347,26 @@ ip_vnet_init(void *arg __unused)
 VNET_SYSINIT(ip_vnet_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH,
     ip_vnet_init, NULL);
 
-
 static void
 ip_init(const void *unused __unused)
 {
-	struct protosw *pr;
-
-	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
-	KASSERT(pr, ("%s: PF_INET not found", __func__));
 
-	/* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
-	for (int i = 0; i < IPPROTO_MAX; i++)
-		ip_protox[i] = pr - inetsw;
 	/*
-	 * Cycle through IP protocols and put them into the appropriate place
-	 * in ip_protox[].
+	 * Register statically compiled protocols, that are unlikely to
+	 * ever become dynamic.
 	 */
-	for (pr = inetdomain.dom_protosw;
-	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
-		if (pr->pr_domain->dom_family == PF_INET &&
-		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
-			/* Be careful to only index valid IP protocols. */
-			if (pr->pr_protocol < IPPROTO_MAX)
-				ip_protox[pr->pr_protocol] = pr - inetsw;
-		}
+	IPPROTO_REGISTER(IPPROTO_ICMP, icmp_input, NULL);
+	IPPROTO_REGISTER(IPPROTO_IGMP, igmp_input, NULL);
+	IPPROTO_REGISTER(IPPROTO_RSVP, rsvp_input, NULL);
+	IPPROTO_REGISTER(IPPROTO_IPV4, encap4_input, NULL);
+	IPPROTO_REGISTER(IPPROTO_MOBILE, encap4_input, NULL);
+	IPPROTO_REGISTER(IPPROTO_ETHERIP, encap4_input, NULL);
+	IPPROTO_REGISTER(IPPROTO_GRE, encap4_input, NULL);
+	IPPROTO_REGISTER(IPPROTO_IPV6, encap4_input, NULL);
+	IPPROTO_REGISTER(IPPROTO_PIM, encap4_input, NULL);
+#ifdef SCTP	/* XXX: has a loadable & static version */
+	IPPROTO_REGISTER(IPPROTO_SCTP, sctp_input, sctp_ctlinput);
+#endif
 
 	netisr_register(&ip_nh);
 #ifdef	RSS
@@ -435,8 +439,7 @@ ip_direct_input(struct mbuf *m)
 	}
 #endif /* IPSEC */
 	IPSTAT_INC(ips_delivered);
-	(*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
-	return;
+	ip_protox[ip->ip_p](&m, &hlen, ip->ip_p);
 }
 #endif
 
@@ -837,7 +840,7 @@ ours:
 	 */
 	IPSTAT_INC(ips_delivered);
 
-	(*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
+	ip_protox[ip->ip_p](&m, &hlen, ip->ip_p);
 	return;
 bad:
 	m_freem(m);
@@ -876,60 +879,36 @@ ip_drain(void)
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
-/*
- * The protocol to be inserted into ip_protox[] must be already registered
- * in inetsw[], either statically or through pf_proto_register().
- */
 int
-ipproto_register(short ipproto)
+ipproto_register(uint8_t proto, ipproto_input_t input, ipproto_ctlinput_t ctl)
 {
-	struct protosw *pr;
 
-	/* Sanity checks. */
-	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
-		return (EPROTONOSUPPORT);
+	MPASS(proto > 0);
 
 	/*
 	 * The protocol slot must not be occupied by another protocol
-	 * already.  An index pointing to IPPROTO_RAW is unused.
+	 * already.  An index pointing to rip_input() is unused.
 	 */
-	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
-	if (pr == NULL)
-		return (EPFNOSUPPORT);
-	if (ip_protox[ipproto] != pr - inetsw)	/* IPPROTO_RAW */
+	if (ip_protox[proto] == rip_input) {
+		ip_protox[proto] = input;
+		ip_ctlprotox[proto] = ctl;
+		return (0);
+	} else
 		return (EEXIST);
-
-	/* Find the protocol position in inetsw[] and set the index. */
-	for (pr = inetdomain.dom_protosw;
-	     pr < inetdomain.dom_protoswNPROTOSW; pr++) {
-		if (pr->pr_domain->dom_family == PF_INET &&
-		    pr->pr_protocol && pr->pr_protocol == ipproto) {
-			ip_protox[pr->pr_protocol] = pr - inetsw;
-			return (0);
-		}
-	}
-	return (EPROTONOSUPPORT);
 }
 
 int
-ipproto_unregister(short ipproto)
+ipproto_unregister(uint8_t proto)
 {
-	struct protosw *pr;
 
-	/* Sanity checks. */
-	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
-		return (EPROTONOSUPPORT);
+	MPASS(proto > 0);
 
-	/* Check if the protocol was indeed registered. */
-	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
-	if (pr == NULL)
-		return (EPFNOSUPPORT);
-	if (ip_protox[ipproto] == pr - inetsw)  /* IPPROTO_RAW */
+	if (ip_protox[proto] != rip_input) {
+		ip_protox[proto] = rip_input;
+		ip_ctlprotox[proto] = rip_ctlinput;
+		return (0);
+	} else
 		return (ENOENT);
-
-	/* Reset the protocol slot to IPPROTO_RAW. */
-	ip_protox[ipproto] = pr - inetsw;
-	return (0);
 }
 
 u_char inetctlerrmap[PRC_NCMDS] = {
diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h
index 92447c519cc3..ce0efcfead1b 100644
--- a/sys/netinet/ip_var.h
+++ b/sys/netinet/ip_var.h
@@ -189,7 +189,6 @@ VNET_DECLARE(int, ipsendredirects);
 #ifdef IPSTEALTH
 VNET_DECLARE(int, ipstealth);			/* stealth forwarding */
 #endif
-extern u_char	ip_protox[];
 VNET_DECLARE(struct socket *, ip_rsvpd);	/* reservation protocol daemon*/
 VNET_DECLARE(struct socket *, ip_mrouter);	/* multicast routing daemon */
 extern int	(*legal_vif_num)(int);
@@ -225,8 +224,6 @@ extern int
 int	ip_output(struct mbuf *,
 	    struct mbuf *, struct route *, int, struct ip_moptions *,
 	    struct inpcb *);
-int	ipproto_register(short);
-int	ipproto_unregister(short);
 struct mbuf *
 	ip_reass(struct mbuf *);
 void	ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
@@ -238,6 +235,7 @@ void	rip_ctlinput(int, struct sockaddr *, void *);
 int	rip_input(struct mbuf **, int *, int);
 int	ipip_input(struct mbuf **, int *, int);
 int	rsvp_input(struct mbuf **, int *, int);
+
 int	ip_rsvp_init(struct socket *);
 int	ip_rsvp_done(void);
 extern int	(*ip_rsvp_vif)(struct socket *, struct sockopt *);
diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
index bda0138107ae..4da408080794 100644
--- a/sys/netinet/raw_ip.c
+++ b/sys/netinet/raw_ip.c
@@ -82,6 +82,8 @@ __FBSDID("$FreeBSD$");
 #include <machine/stdarg.h>
 #include <security/mac/mac_framework.h>
 
+extern ipproto_input_t *ip_protox[];
+
 VNET_DEFINE(int, ip_defttl) = IPDEFTTL;
 SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ip_defttl), 0,
@@ -392,8 +394,7 @@ rip_input(struct mbuf **mp, int *offp, int proto)
 		}
 		appended += rip_append(inp, ctx.ip, m, &ripsrc);
 	}
-	if (appended == 0 &&
-	    inetsw[ip_protox[ctx.ip->ip_p]].pr_input == rip_input) {
+	if (appended == 0 && ip_protox[ctx.ip->ip_p] == rip_input) {
 		IPSTAT_INC(ips_noproto);
 		IPSTAT_DEC(ips_delivered);
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0);
diff --git a/sys/netinet/sctp_module.c b/sys/netinet/sctp_module.c
index faa7fca49d28..ea49b74343e3 100644
--- a/sys/netinet/sctp_module.c
+++ b/sys/netinet/sctp_module.c
@@ -60,8 +60,6 @@ struct protosw sctp_stream_protosw = {
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_SCTP,
 	.pr_flags =		PR_CONNREQUIRED|PR_WANTRCVD,
-	.pr_input =		sctp_input,
-	.pr_ctlinput =		sctp_ctlinput,
 	.pr_ctloutput =		sctp_ctloutput,
 	.pr_drain =		sctp_drain,
 	.pr_usrreqs =		&sctp_usrreqs,
@@ -72,8 +70,6 @@ struct protosw sctp_seqpacket_protosw = {
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_SCTP,
 	.pr_flags =		PR_WANTRCVD,
-	.pr_input =		sctp_input,
-	.pr_ctlinput =		sctp_ctlinput,
 	.pr_ctloutput =		sctp_ctloutput,
 	.pr_drain =		sctp_drain,
 	.pr_usrreqs =		&sctp_usrreqs,
@@ -88,8 +84,6 @@ struct protosw sctp6_stream_protosw = {
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_SCTP,
 	.pr_flags =		PR_CONNREQUIRED|PR_WANTRCVD,
-	.pr_input =		sctp6_input,
-	.pr_ctlinput =		sctp6_ctlinput,
 	.pr_ctloutput =		sctp_ctloutput,
 	.pr_drain =		sctp_drain,
 	.pr_usrreqs =		&sctp6_usrreqs,
@@ -100,8 +94,6 @@ struct protosw sctp6_seqpacket_protosw = {
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_SCTP,
 	.pr_flags =		PR_WANTRCVD,
-	.pr_input =		sctp6_input,
-	.pr_ctlinput =		sctp6_ctlinput,
 	.pr_ctloutput =		sctp_ctloutput,
 #ifndef INET	/* Do not call initialization and drain routines twice. */
 	.pr_drain =		sctp_drain,
@@ -122,7 +114,7 @@ sctp_module_load(void)
 	error = pf_proto_register(PF_INET, &sctp_seqpacket_protosw);
 	if (error != 0)
 		return (error);
-	error = ipproto_register(IPPROTO_SCTP);
+	error = ipproto_register(IPPROTO_SCTP, sctp_input, sctp_ctlinput);
 	if (error != 0)
 		return (error);
 #endif
@@ -133,7 +125,7 @@ sctp_module_load(void)
 	error = pf_proto_register(PF_INET6, &sctp6_seqpacket_protosw);
 	if (error != 0)
 		return (error);
-	error = ip6proto_register(IPPROTO_SCTP);
+	error = ip6proto_register(IPPROTO_SCTP, sctp6_input, sctp6_ctlinput);
 	if (error != 0)
 		return (error);
 #endif
diff --git a/sys/netinet/sctp_var.h b/sys/netinet/sctp_var.h
index 0f2d3d9c61d6..16beaa7f8b12 100644
--- a/sys/netinet/sctp_var.h
+++ b/sys/netinet/sctp_var.h
@@ -324,10 +324,8 @@ void sctp_close(struct socket *so);
 int sctp_disconnect(struct socket *so);
 void sctp_ctlinput(int, struct sockaddr *, void *);
 int sctp_ctloutput(struct socket *, struct sockopt *);
-#ifdef INET
 void sctp_input_with_port(struct mbuf *, int, uint16_t);
 int sctp_input(struct mbuf **, int *, int);
-#endif
 void sctp_pathmtu_adjustment(struct sctp_tcb *, uint32_t, bool);
 void sctp_drain(void);
 void
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 8a1c862f9249..70d1d2fb942a 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -62,9 +62,6 @@ __FBSDID("$FreeBSD$");
 #include <sys/malloc.h>
 #include <sys/refcount.h>
 #include <sys/mbuf.h>
-#ifdef INET6
-#include <sys/domain.h>
-#endif
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/sdt.h>
@@ -1564,6 +1561,13 @@ tcp_init(void *arg __unused)
 		    hashsize);
 	}
 	tcp_tcbhashsize = hashsize;
+
+#ifdef INET
+	IPPROTO_REGISTER(IPPROTO_TCP, tcp_input, tcp_ctlinput);
+#endif
+#ifdef INET6
+	IP6PROTO_REGISTER(IPPROTO_TCP, tcp6_input, tcp6_ctlinput);
+#endif
 }
 SYSINIT(tcp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, tcp_init, NULL);
 
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index bff82b9718e1..13fe863ecb75 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -175,7 +175,7 @@ INPCBSTORAGE_DEFINE(udplitecbstor, "udpliteinp", "udplite_inpcb", "udplite",
     "udplitehash");
 
 static void
-udp_init(void *arg __unused)
+udp_vnet_init(void *arg __unused)
 {
 
 	/*
@@ -195,7 +195,8 @@ udp_init(void *arg __unused)
 	in_pcbinfo_init(&V_ulitecbinfo, &udplitecbstor, UDBHASHSIZE,
 	    UDBHASHSIZE);
 }
-VNET_SYSINIT(udp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, udp_init, NULL);
+VNET_SYSINIT(udp_vnet_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH,
+    udp_vnet_init, NULL);
 
 /*
  * Kernel module interface for updating udpstat.  The argument is an index
@@ -482,7 +483,7 @@ udp_multi_input(struct mbuf *m, int proto, struct sockaddr_in *udp_in)
 	return (IPPROTO_DONE);
 }
 
-int
+static int
 udp_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct ip *ip;
@@ -798,14 +799,15 @@ udp_common_ctlinput(int cmd, struct sockaddr *sa, void *vip,
 		in_pcbnotifyall(pcbinfo, faddr, inetctlerrmap[cmd],
 		    udp_notify);
 }
-void
+
+static void
 udp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
 {
 
 	return (udp_common_ctlinput(cmd, sa, vip, &V_udbinfo));
 }
 
-void
+static void
 udplite_ctlinput(int cmd, struct sockaddr *sa, void *vip)
 {
 
@@ -1768,4 +1770,13 @@ struct pr_usrreqs udp_usrreqs = {
 	.pru_sosetlabel =	in_pcbsosetlabel,
 	.pru_close =		udp_close,
 };
+
+static void
+udp_init(void *arg __unused)
+{
+
+	IPPROTO_REGISTER(IPPROTO_UDP, udp_input, udp_ctlinput);
+	IPPROTO_REGISTER(IPPROTO_UDPLITE, udp_input, udplite_ctlinput);
+}
+SYSINIT(udp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, udp_init, NULL);
 #endif /* INET */
diff --git a/sys/netinet/udp_var.h b/sys/netinet/udp_var.h
index 9db5494ab82b..8da2592054af 100644
--- a/sys/netinet/udp_var.h
+++ b/sys/netinet/udp_var.h
@@ -170,10 +170,7 @@ udp_get_inpcbinfo(int protocol)
 int		udp_newudpcb(struct inpcb *);
 void		udp_discardcb(struct udpcb *);
 
-void		udp_ctlinput(int, struct sockaddr *, void *);
-void		udplite_ctlinput(int, struct sockaddr *, void *);
 int		udp_ctloutput(struct socket *, struct sockopt *);
-int		udp_input(struct mbuf **, int *, int);
 void		udplite_input(struct mbuf *, int);
 struct inpcb	*udp_notify(struct inpcb *inp, int errno);
 int		udp_shutdown(struct socket *so);
diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c
index 132b62260ab7..691943fe5d56 100644
--- a/sys/netinet6/icmp6.c
+++ b/sys/netinet6/icmp6.c
@@ -114,7 +114,7 @@ __FBSDID("$FreeBSD$");
 #include <netinet6/nd6.h>
 #include <netinet6/send.h>
 
-extern struct domain inet6domain;
+extern ipproto_ctlinput_t	*ip6_ctlprotox[];
 
 VNET_PCPUSTAT_DEFINE(struct icmp6stat, icmp6stat);
 VNET_PCPUSTAT_SYSINIT(icmp6stat);
@@ -922,7 +922,6 @@ icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code)
 
 	/* Detect the upper level protocol */
 	{
-		void (*ctlfunc)(int, struct sockaddr *, void *);
 		u_int8_t nxt = eip6->ip6_nxt;
 		int eoff = off + sizeof(struct icmp6_hdr) +
 		    sizeof(struct ip6_hdr);
@@ -1087,12 +1086,9 @@ icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code)
 			icmp6_mtudisc_update(&ip6cp, 1);	/*XXX*/
 		}
 
-		ctlfunc = (void (*)(int, struct sockaddr *, void *))
-		    (inet6sw[ip6_protox[nxt]].pr_ctlinput);
-		if (ctlfunc) {
-			(void) (*ctlfunc)(code, (struct sockaddr *)&icmp6dst,
+		if (ip6_ctlprotox[nxt] != NULL)
+			ip6_ctlprotox[nxt](code, (struct sockaddr *)&icmp6dst,
 			    &ip6cp);
-		}
 	}
 	*mp = m;
 	return (0);
diff --git a/sys/netinet6/in6.h b/sys/netinet6/in6.h
index 34682da04898..4c20532256b2 100644
--- a/sys/netinet6/in6.h
+++ b/sys/netinet6/in6.h
@@ -681,7 +681,6 @@ char	*ip6_sprintf(char *, const struct in6_addr *);
 struct	in6_ifaddr *in6_ifawithifp(struct ifnet *, struct in6_addr *);
 extern void in6_if_up(struct ifnet *);
 struct sockaddr;
-extern	u_char	ip6_protox[];
 
 void	in6_sin6_2_sin(struct sockaddr_in *sin,
 			    struct sockaddr_in6 *sin6);
diff --git a/sys/netinet6/in6_proto.c b/sys/netinet6/in6_proto.c
index a34b7aa9cc7f..2b70e38fe193 100644
--- a/sys/netinet6/in6_proto.c
+++ b/sys/netinet6/in6_proto.c
@@ -155,8 +155,6 @@ struct protosw inet6sw[] = {
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_UDP,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_CAPATTACH,
-	.pr_input =		udp6_input,
-	.pr_ctlinput =		udp6_ctlinput,
 	.pr_ctloutput =		ip6_ctloutput,
 	.pr_usrreqs =		&udp6_usrreqs,
 },
@@ -166,8 +164,6 @@ struct protosw inet6sw[] = {
 	.pr_protocol =		IPPROTO_TCP,
 	.pr_flags =		PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD|
 				    PR_LISTEN|PR_CAPATTACH,
-	.pr_input =		tcp6_input,
-	.pr_ctlinput =		tcp6_ctlinput,
 	.pr_ctloutput =		tcp_ctloutput,
 #ifndef INET	/* don't call initialization, timeout, and drain routines twice */
 	.pr_slowtimo =		tcp_slowtimo,
@@ -181,8 +177,6 @@ struct protosw inet6sw[] = {
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_SCTP,
 	.pr_flags =		PR_WANTRCVD,
-	.pr_input =		sctp6_input,
*** 435 LINES SKIPPED ***