git: ea7be1293b48 - main - keysock: do not use raw socket code

From: Gleb Smirnoff <glebius_at_FreeBSD.org>
Date: Thu, 11 Aug 2022 16:20:13 UTC
The branch main has been updated by glebius:

URL: https://cgit.FreeBSD.org/src/commit/?id=ea7be1293b48385f27b97c5f112e4cad93cbd33b

commit ea7be1293b48385f27b97c5f112e4cad93cbd33b
Author:     Gleb Smirnoff <glebius@FreeBSD.org>
AuthorDate: 2022-08-11 16:19:36 +0000
Commit:     Gleb Smirnoff <glebius@FreeBSD.org>
CommitDate: 2022-08-11 16:19:36 +0000

    keysock: do not use raw socket code
    
    This makes key socket implementation self contained and removes one
    of the last dependencies on the raw socket code and pr_output method.
    
    There are very subtle API visible changes:
    - now key socket would return EOPNOTSUPP instead of EINVAL on
      syscalls that are not supposed to be called on a key socket.
    - key socket buffer sizes are now controlled by net.key sysctls instead
      of net.raw.  The latter were not documented anywhere, and even Internet
      search doesn't find any references or discussions related to them.
    
    Reviewed by:            melifaro
    Differential revision:  https://reviews.freebsd.org/D36123
---
 sys/netipsec/key.c     |   9 +-
 sys/netipsec/keysock.c | 218 +++++++++++++------------------------------------
 sys/netipsec/keysock.h |   5 +-
 3 files changed, 63 insertions(+), 169 deletions(-)

diff --git a/sys/netipsec/key.c b/sys/netipsec/key.c
index b1a99cfbb261..a7d691f09bb1 100644
--- a/sys/netipsec/key.c
+++ b/sys/netipsec/key.c
@@ -66,7 +66,6 @@
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/vnet.h>
-#include <net/raw_cb.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
@@ -468,7 +467,6 @@ SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEBUG, debug,
     "Enable IPsec debugging output when set.");
 #endif
 
-SYSCTL_DECL(_net_key);
 SYSCTL_INT(_net_key, KEYCTL_DEBUG_LEVEL,	debug,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_debug_level), 0, "");
 
@@ -516,8 +514,7 @@ SYSCTL_INT(_net_key, KEYCTL_AH_KEYMIN, ah_keymin,
 SYSCTL_INT(_net_key, KEYCTL_PREFERED_OLDSA, preferred_oldsa,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_preferred_oldsa), 0, "");
 
-static SYSCTL_NODE(_net_key, OID_AUTO, spdcache,
-    CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+SYSCTL_NODE(_net_key, OID_AUTO, spdcache, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "SPD cache");
 
 SYSCTL_UINT(_net_key_spdcache, OID_AUTO, maxentries,
@@ -7157,7 +7154,7 @@ key_register(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp)
 	}
 
 	newreg->so = so;
-	((struct keycb *)sotorawcb(so))->kp_registered++;
+	((struct keycb *)(so->so_pcb))->kp_registered++;
 
 	/* add regnode to regtree. */
 	LIST_INSERT_HEAD(&V_regtree[mhp->msg->sadb_msg_satype], newreg, chain);
@@ -7717,7 +7714,7 @@ key_promisc(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp)
 		/* enable/disable promisc mode */
 		struct keycb *kp;
 
-		if ((kp = (struct keycb *)sotorawcb(so)) == NULL)
+		if ((kp = so->so_pcb) == NULL)
 			return key_senderror(so, m, EINVAL);
 		mhp->msg->sadb_msg_errno = 0;
 		switch (mhp->msg->sadb_msg_satype) {
diff --git a/sys/netipsec/keysock.c b/sys/netipsec/keysock.c
index 1ee1f619b860..148a67a1775e 100644
--- a/sys/netipsec/keysock.c
+++ b/sys/netipsec/keysock.c
@@ -55,7 +55,6 @@
 
 #include <net/if.h>
 #include <net/vnet.h>
-#include <net/raw_cb.h>
 
 #include <netinet/in.h>
 
@@ -67,16 +66,18 @@
 
 #include <machine/stdarg.h>
 
-struct key_cb {
-	int key_count;
-	int any_count;
-};
-VNET_DEFINE_STATIC(struct key_cb, key_cb) = {};
-#define	V_key_cb		VNET(key_cb)
+static struct mtx keysock_mtx;
+MTX_SYSINIT(keysock, &keysock_mtx, "key socket pcb list", MTX_DEF);
+
+#define	KEYSOCK_LOCK()		mtx_lock(&keysock_mtx)
+#define	KEYSOCK_UNLOCK()	mtx_unlock(&keysock_mtx)
+
+VNET_DEFINE_STATIC(LIST_HEAD(, keycb), keycb_list);
+#define	V_keycb_list		VNET(keycb_list)
 
 static struct sockaddr key_src = { 2, PF_KEY, };
 
-static int key_sendup0(struct rawcb *, struct mbuf *, int);
+static int key_sendup0(struct keycb *, struct mbuf *, int);
 
 VNET_PCPUSTAT_DEFINE(struct pfkeystat, pfkeystat);
 VNET_PCPUSTAT_SYSINIT(pfkeystat);
@@ -85,17 +86,19 @@ VNET_PCPUSTAT_SYSINIT(pfkeystat);
 VNET_PCPUSTAT_SYSUNINIT(pfkeystat);
 #endif /* VIMAGE */
 
-/*
- * key_output()
- */
-int
-key_output(struct mbuf *m, struct socket *so, ...)
+static int
+key_send(struct socket *so, int flags, struct mbuf *m,
+    struct sockaddr *nam, struct mbuf *control, struct thread *td)
 {
 	struct sadb_msg *msg;
 	int len, error = 0;
 
-	if (m == NULL)
-		panic("%s: NULL pointer was passed.\n", __func__);
+	if ((flags & PRUS_OOB) || control != NULL) {
+		m_freem(m);
+		if (control != NULL)
+			m_freem(control);
+		return (EOPNOTSUPP);
+	}
 
 	PFKEYSTAT_INC(out_total);
 	PFKEYSTAT_ADD(out_bytes, m->m_pkthdr.len);
@@ -139,7 +142,7 @@ end:
  * send message to the socket.
  */
 static int
-key_sendup0(struct rawcb *rp, struct mbuf *m, int promisc)
+key_sendup0(struct keycb *kp, struct mbuf *m, int promisc)
 {
 
 	if (promisc) {
@@ -160,15 +163,14 @@ key_sendup0(struct rawcb *rp, struct mbuf *m, int promisc)
 		PFKEYSTAT_INC(in_msgtype[pmsg->sadb_msg_type]);
 	}
 
-	if (!sbappendaddr(&rp->rcb_socket->so_rcv, (struct sockaddr *)&key_src,
-	    m, NULL)) {
+	if (!sbappendaddr(&kp->kp_socket->so_rcv, &key_src, m, NULL)) {
 		PFKEYSTAT_INC(in_nomem);
 		m_freem(m);
-		soroverflow(rp->rcb_socket);
+		soroverflow(kp->kp_socket);
 		return ENOBUFS;
 	}
 
-	sorwakeup(rp->rcb_socket);
+	sorwakeup(kp->kp_socket);
 	return 0;
 }
 
@@ -178,7 +180,6 @@ key_sendup_mbuf(struct socket *so, struct mbuf *m, int target)
 {
 	struct mbuf *n;
 	struct keycb *kp;
-	struct rawcb *rp;
 	int error = 0;
 
 	KASSERT(m != NULL, ("NULL mbuf pointer was passed."));
@@ -201,37 +202,23 @@ key_sendup_mbuf(struct socket *so, struct mbuf *m, int target)
 		msg = mtod(m, struct sadb_msg *);
 		PFKEYSTAT_INC(in_msgtype[msg->sadb_msg_type]);
 	}
-	mtx_lock(&rawcb_mtx);
-	if (V_key_cb.any_count == 0) {
-		mtx_unlock(&rawcb_mtx);
-		m_freem(m);
-		return (0);
-	}
-	LIST_FOREACH(rp, &V_rawcb_list, list)
-	{
-		if (rp->rcb_proto.sp_family != PF_KEY)
-			continue;
-		if (rp->rcb_proto.sp_protocol
-		 && rp->rcb_proto.sp_protocol != PF_KEY_V2) {
-			continue;
-		}
-
+	KEYSOCK_LOCK();
+	LIST_FOREACH(kp, &V_keycb_list, kp_next) {
 		/*
 		 * If you are in promiscuous mode, and when you get broadcasted
 		 * reply, you'll get two PF_KEY messages.
 		 * (based on pf_key@inner.net message on 14 Oct 1998)
 		 */
-		kp = (struct keycb *)rp;
 		if (kp->kp_promisc) {
 			n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 			if (n != NULL)
-				key_sendup0(rp, n, 1);
+				key_sendup0(kp, n, 1);
 			else
 				PFKEYSTAT_INC(in_nomem);
 		}
 
 		/* the exact target will be processed later */
-		if (so && sotorawcb(so) == rp)
+		if (so != NULL && so->so_pcb == kp)
 			continue;
 
 		if (target == KEY_SENDUP_ONE || (
@@ -246,36 +233,29 @@ key_sendup_mbuf(struct socket *so, struct mbuf *m, int target)
 			continue;
 		}
 
-		if (key_sendup0(rp, n, 0) == 0)
+		if (key_sendup0(kp, n, 0) == 0)
 			PFKEYSTAT_INC(in_msgtarget[target]);
 	}
 
 	if (so)	{ /* KEY_SENDUP_ONE */
-		error = key_sendup0(sotorawcb(so), m, 0);
+		error = key_sendup0(so->so_pcb, m, 0);
 		if (error == 0)
 			PFKEYSTAT_INC(in_msgtarget[KEY_SENDUP_ONE]);
 	} else {
 		error = 0;
 		m_freem(m);
 	}
-	mtx_unlock(&rawcb_mtx);
+	KEYSOCK_UNLOCK();
 	return (error);
 }
 
-/*
- * key_abort()
- * derived from net/rtsock.c:rts_abort()
- */
-static void
-key_abort(struct socket *so)
-{
-	raw_usrreqs.pru_abort(so);
-}
+static u_long key_sendspace = 8192;
+SYSCTL_ULONG(_net_key, OID_AUTO, sendspace, CTLFLAG_RW, &key_sendspace, 0,
+    "Default key socket send space");
+static u_long key_recvspace = 8192;
+SYSCTL_ULONG(_net_key, OID_AUTO, recvspace, CTLFLAG_RW, &key_recvspace, 0,
+    "Default key socket receive space");
 
-/*
- * key_attach()
- * derived from net/rtsock.c:rts_attach()
- */
 static int
 key_attach(struct socket *so, int proto, struct thread *td)
 {
@@ -290,143 +270,59 @@ key_attach(struct socket *so, int proto, struct thread *td)
 			return error;
 	}
 
-	/* XXX */
-	kp = malloc(sizeof *kp, M_PCB, M_WAITOK | M_ZERO); 
-	if (kp == NULL)
-		return ENOBUFS;
-
-	so->so_pcb = (caddr_t)kp;
-	error = raw_attach(so, proto);
-	kp = (struct keycb *)sotorawcb(so);
-	if (error) {
-		free(kp, M_PCB);
-		so->so_pcb = (caddr_t) 0;
-		return error;
-	}
+	error = soreserve(so, key_sendspace, key_recvspace);
+	if (error)
+		return (error);
 
+	kp = malloc(sizeof(*kp), M_PCB, M_WAITOK);
+	kp->kp_socket = so;
 	kp->kp_promisc = kp->kp_registered = 0;
 
-	if (kp->kp_raw.rcb_proto.sp_protocol == PF_KEY) /* XXX: AF_KEY */
-		V_key_cb.key_count++;
-	V_key_cb.any_count++;
-	soisconnected(so);
+	so->so_pcb = kp;
 	so->so_options |= SO_USELOOPBACK;
 
-	return 0;
-}
+	KEYSOCK_LOCK();
+	LIST_INSERT_HEAD(&V_keycb_list, kp, kp_next);
+	KEYSOCK_UNLOCK();
+	soisconnected(so);
 
-/*
- * key_bind()
- * derived from net/rtsock.c:rts_bind()
- */
-static int
-key_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
-{
-	return EINVAL;
+	return (0);
 }
 
-/*
- * key_close()
- * derived from net/rtsock.c:rts_close().
- */
 static void
 key_close(struct socket *so)
 {
 
-	raw_usrreqs.pru_close(so);
+	soisdisconnected(so);
 }
 
-/*
- * key_connect()
- * derived from net/rtsock.c:rts_connect()
- */
-static int
-key_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
-{
-	return EINVAL;
-}
-
-/*
- * key_detach()
- * derived from net/rtsock.c:rts_detach()
- */
 static void
 key_detach(struct socket *so)
 {
-	struct keycb *kp = (struct keycb *)sotorawcb(so);
-
-	KASSERT(kp != NULL, ("key_detach: kp == NULL"));
-	if (kp->kp_raw.rcb_proto.sp_protocol
-	    == PF_KEY) /* XXX: AF_KEY */
-		V_key_cb.key_count--;
-	V_key_cb.any_count--;
+	struct keycb *kp = so->so_pcb;
 
 	key_freereg(so);
-	raw_usrreqs.pru_detach(so);
-}
-
-/*
- * key_disconnect()
- * derived from net/rtsock.c:key_disconnect()
- */
-static int
-key_disconnect(struct socket *so)
-{
-	return(raw_usrreqs.pru_disconnect(so));
-}
-
-/*
- * key_peeraddr()
- * derived from net/rtsock.c:rts_peeraddr()
- */
-static int
-key_peeraddr(struct socket *so, struct sockaddr **nam)
-{
-	return(raw_usrreqs.pru_peeraddr(so, nam));
+	KEYSOCK_LOCK();
+	LIST_REMOVE(kp, kp_next);
+	KEYSOCK_UNLOCK();
+	free(kp, M_PCB);
+	so->so_pcb = NULL;
 }
 
-/*
- * key_send()
- * derived from net/rtsock.c:rts_send()
- */
-static int
-key_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
-	 struct mbuf *control, struct thread *td)
-{
-	return(raw_usrreqs.pru_send(so, flags, m, nam, control, td));
-}
-
-/*
- * key_shutdown()
- * derived from net/rtsock.c:rts_shutdown()
- */
 static int
 key_shutdown(struct socket *so)
 {
-	return(raw_usrreqs.pru_shutdown(so));
-}
 
-/*
- * key_sockaddr()
- * derived from net/rtsock.c:rts_sockaddr()
- */
-static int
-key_sockaddr(struct socket *so, struct sockaddr **nam)
-{
-	return(raw_usrreqs.pru_sockaddr(so, nam));
+	socantsendmore(so);
+	return (0);
 }
 
 struct pr_usrreqs key_usrreqs = {
-	.pru_abort =		key_abort,
+	.pru_abort =		key_close,
 	.pru_attach =		key_attach,
-	.pru_bind =		key_bind,
-	.pru_connect =		key_connect,
 	.pru_detach =		key_detach,
-	.pru_disconnect =	key_disconnect,
-	.pru_peeraddr =		key_peeraddr,
 	.pru_send =		key_send,
 	.pru_shutdown =		key_shutdown,
-	.pru_sockaddr =		key_sockaddr,
 	.pru_close =		key_close,
 };
 
@@ -446,8 +342,6 @@ struct protosw keysw[] = {
 	.pr_domain =		&keydomain,
 	.pr_protocol =		PF_KEY_V2,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_output =		key_output,
-	.pr_ctlinput =		raw_ctlinput,
 	.pr_usrreqs =		&key_usrreqs
 }
 };
diff --git a/sys/netipsec/keysock.h b/sys/netipsec/keysock.h
index 484c65361e5e..8f5f0148c595 100644
--- a/sys/netipsec/keysock.h
+++ b/sys/netipsec/keysock.h
@@ -67,8 +67,11 @@ struct pfkeystat {
 #ifdef _KERNEL
 #include <sys/counter.h>
 
+SYSCTL_DECL(_net_key);
+
 struct keycb {
-	struct rawcb kp_raw;	/* rawcb */
+	LIST_ENTRY(keycb) kp_next;
+	struct socket *kp_socket;
 	int kp_promisc;		/* promiscuous mode */
 	int kp_registered;	/* registered socket */
 };