git: bd4a39cc93d9 - main - socket: Properly interlock when transitioning to a listening socket

Mark Johnston markj at FreeBSD.org
Tue Sep 7 21:12:37 UTC 2021


The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=bd4a39cc93d9faf8b5c000855d5aa90df592dd49

commit bd4a39cc93d9faf8b5c000855d5aa90df592dd49
Author:     Mark Johnston <markj at FreeBSD.org>
AuthorDate: 2021-09-07 18:49:53 +0000
Commit:     Mark Johnston <markj at FreeBSD.org>
CommitDate: 2021-09-07 21:11:43 +0000

    socket: Properly interlock when transitioning to a listening socket
    
    Currently, most protocols implement pru_listen with something like the
    following:
    
            SOCK_LOCK(so);
            error = solisten_proto_check(so);
            if (error) {
                    SOCK_UNLOCK(so);
                    return (error);
            }
            solisten_proto(so);
            SOCK_UNLOCK(so);
    
    solisten_proto_check() fails if the socket is connected or connecting.
    However, the socket lock is not used during I/O, so this pattern is
    racy.
    
    The change modifies solisten_proto_check() to additionally acquire
    socket buffer locks, and the calling thread holds them until
    solisten_proto() or solisten_proto_abort() is called.  Now that the
    socket buffer locks are preserved across a listen(2), this change allows
    socket I/O paths to properly interlock with listen(2).
    
    This fixes a large number of syzbot reports, only one is listed below
    and the rest will be dup'ed to it.
    
    Reported by:    syzbot+9fece8a63c0e27273821 at syzkaller.appspotmail.com
    Reviewed by:    tuexen, gallatin
    MFC after:      1 month
    Sponsored by:   The FreeBSD Foundation
    Differential Revision:  https://reviews.freebsd.org/D31659
---
 sys/kern/uipc_socket.c                             | 58 ++++++++++++++++++----
 sys/kern/uipc_usrreq.c                             | 28 +++++++----
 sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c  |  3 ++
 sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c |  5 +-
 sys/netinet/sctp_usrreq.c                          | 41 ++++++++-------
 sys/netinet/tcp_usrreq.c                           | 40 ++++++++++++---
 sys/sys/socketvar.h                                |  1 +
 7 files changed, 130 insertions(+), 46 deletions(-)

diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 5932c28aca2f..b26d0591cbdd 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -418,12 +418,14 @@ soalloc(struct vnet *vnet)
 	 * a feature to change class of an existing lock, so we use DUPOK.
 	 */
 	mtx_init(&so->so_lock, "socket", NULL, MTX_DEF | MTX_DUPOK);
+	so->so_snd.sb_mtx = &so->so_snd_mtx;
+	so->so_rcv.sb_mtx = &so->so_rcv_mtx;
 	SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd");
 	SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv");
 	so->so_rcv.sb_sel = &so->so_rdsel;
 	so->so_snd.sb_sel = &so->so_wrsel;
-	sx_init(&so->so_snd.sb_sx, "so_snd_sx");
-	sx_init(&so->so_rcv.sb_sx, "so_rcv_sx");
+	sx_init(&so->so_snd_sx, "so_snd_sx");
+	sx_init(&so->so_rcv_sx, "so_rcv_sx");
 	TAILQ_INIT(&so->so_snd.sb_aiojobq);
 	TAILQ_INIT(&so->so_rcv.sb_aiojobq);
 	TASK_INIT(&so->so_snd.sb_aiotask, 0, soaio_snd, so);
@@ -487,8 +489,8 @@ sodealloc(struct socket *so)
 		if (so->so_snd.sb_hiwat)
 			(void)chgsbsize(so->so_cred->cr_uidinfo,
 			    &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
-		sx_destroy(&so->so_snd.sb_sx);
-		sx_destroy(&so->so_rcv.sb_sx);
+		sx_destroy(&so->so_snd_sx);
+		sx_destroy(&so->so_rcv_sx);
 		SOCKBUF_LOCK_DESTROY(&so->so_snd);
 		SOCKBUF_LOCK_DESTROY(&so->so_rcv);
 	}
@@ -899,18 +901,48 @@ solisten(struct socket *so, int backlog, struct thread *td)
 	return (error);
 }
 
+/*
+ * Prepare for a call to solisten_proto().  Acquire all socket buffer locks in
+ * order to interlock with socket I/O.
+ */
 int
 solisten_proto_check(struct socket *so)
 {
-
 	SOCK_LOCK_ASSERT(so);
 
-	if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING |
-	    SS_ISDISCONNECTING))
+	if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING |
+	    SS_ISDISCONNECTING)) != 0)
 		return (EINVAL);
+
+	/*
+	 * Sleeping is not permitted here, so simply fail if userspace is
+	 * attempting to transmit or receive on the socket.  This kind of
+	 * transient failure is not ideal, but it should occur only if userspace
+	 * is misusing the socket interfaces.
+	 */
+	if (!sx_try_xlock(&so->so_snd_sx))
+		return (EAGAIN);
+	if (!sx_try_xlock(&so->so_rcv_sx)) {
+		sx_xunlock(&so->so_snd_sx);
+		return (EAGAIN);
+	}
+	mtx_lock(&so->so_snd_mtx);
+	mtx_lock(&so->so_rcv_mtx);
 	return (0);
 }
 
+/*
+ * Undo the setup done by solisten_proto_check().
+ */
+void
+solisten_proto_abort(struct socket *so)
+{
+	mtx_unlock(&so->so_snd_mtx);
+	mtx_unlock(&so->so_rcv_mtx);
+	sx_xunlock(&so->so_snd_sx);
+	sx_xunlock(&so->so_rcv_sx);
+}
+
 void
 solisten_proto(struct socket *so, int backlog)
 {
@@ -920,6 +952,9 @@ solisten_proto(struct socket *so, int backlog)
 	sbintime_t sbrcv_timeo, sbsnd_timeo;
 
 	SOCK_LOCK_ASSERT(so);
+	KASSERT((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING |
+	    SS_ISDISCONNECTING)) == 0,
+	    ("%s: bad socket state %p", __func__, so));
 
 	if (SOLISTENING(so))
 		goto listening;
@@ -938,10 +973,6 @@ solisten_proto(struct socket *so, int backlog)
 
 	sbdestroy(&so->so_snd, so);
 	sbdestroy(&so->so_rcv, so);
-	sx_destroy(&so->so_snd.sb_sx);
-	sx_destroy(&so->so_rcv.sb_sx);
-	SOCKBUF_LOCK_DESTROY(&so->so_snd);
-	SOCKBUF_LOCK_DESTROY(&so->so_rcv);
 
 #ifdef INVARIANTS
 	bzero(&so->so_rcv,
@@ -974,6 +1005,11 @@ listening:
 	if (backlog < 0 || backlog > somaxconn)
 		backlog = somaxconn;
 	so->sol_qlimit = backlog;
+
+	mtx_unlock(&so->so_snd_mtx);
+	mtx_unlock(&so->so_rcv_mtx);
+	sx_xunlock(&so->so_snd_sx);
+	sx_xunlock(&so->so_rcv_sx);
 }
 
 /*
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index c736f35b5ee0..5add930bfa8e 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -890,13 +890,17 @@ uipc_listen(struct socket *so, int backlog, struct thread *td)
 	if (so->so_type != SOCK_STREAM && so->so_type != SOCK_SEQPACKET)
 		return (EOPNOTSUPP);
 
+	/*
+	 * Synchronize with concurrent connection attempts.
+	 */
+	error = 0;
 	unp = sotounpcb(so);
-	KASSERT(unp != NULL, ("uipc_listen: unp == NULL"));
-
 	UNP_PCB_LOCK(unp);
-	if (unp->unp_vnode == NULL) {
-		/* Already connected or not bound to an address. */
-		error = unp->unp_conn != NULL ? EINVAL : EDESTADDRREQ;
+	if (unp->unp_conn != NULL || (unp->unp_flags & UNP_CONNECTING) != 0)
+		error = EINVAL;
+	else if (unp->unp_vnode == NULL)
+		error = EDESTADDRREQ;
+	if (error != 0) {
 		UNP_PCB_UNLOCK(unp);
 		return (error);
 	}
@@ -1523,6 +1527,7 @@ unp_connectat(int fd, struct socket *so, struct sockaddr *nam,
 	bcopy(soun->sun_path, buf, len);
 	buf[len] = 0;
 
+	error = 0;
 	unp = sotounpcb(so);
 	UNP_PCB_LOCK(unp);
 	for (;;) {
@@ -1538,13 +1543,16 @@ unp_connectat(int fd, struct socket *so, struct sockaddr *nam,
 		 * lock the peer socket, to ensure that unp_conn cannot
 		 * transition between two valid sockets while locks are dropped.
 		 */
-		if (unp->unp_conn != NULL) {
-			UNP_PCB_UNLOCK(unp);
-			return (EISCONN);
+		if (SOLISTENING(so))
+			error = EOPNOTSUPP;
+		else if (unp->unp_conn != NULL)
+			error = EISCONN;
+		else if ((unp->unp_flags & UNP_CONNECTING) != 0) {
+			error = EALREADY;
 		}
-		if ((unp->unp_flags & UNP_CONNECTING) != 0) {
+		if (error != 0) {
 			UNP_PCB_UNLOCK(unp);
-			return (EALREADY);
+			return (error);
 		}
 		if (unp->unp_pairbusy > 0) {
 			unp->unp_flags |= UNP_WAITING;
diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c
index cd620fe3aef9..18d7a89b7a2f 100644
--- a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c
+++ b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c
@@ -2506,14 +2506,17 @@ ng_btsocket_l2cap_listen(struct socket *so, int backlog, struct thread *td)
 	if (error != 0)
 		goto out;
 	if (pcb == NULL) {
+		solisten_proto_abort(so);
 		error = EINVAL;
 		goto out;
 	}
 	if (ng_btsocket_l2cap_node == NULL) {
+		solisten_proto_abort(so);
 		error = EINVAL;
 		goto out;
 	}
 	if (pcb->psm == 0) {
+		solisten_proto_abort(so);
 		error = EADDRNOTAVAIL;
 		goto out;
 	}
diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c b/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c
index c0704bce55fa..5b7bbeb45407 100644
--- a/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c
+++ b/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c
@@ -894,6 +894,7 @@ ng_btsocket_rfcomm_listen(struct socket *so, int backlog, struct thread *td)
 		 * from socreate()
 		 */
 		if (l2so == NULL) {
+			solisten_proto_abort(so);
 			error = socreate_error;
 			goto out;
 		}
@@ -907,8 +908,10 @@ ng_btsocket_rfcomm_listen(struct socket *so, int backlog, struct thread *td)
 		 */
 		error = ng_btsocket_rfcomm_session_create(&s, l2so,
 					NG_HCI_BDADDR_ANY, NULL, td);
-		if (error != 0)
+		if (error != 0) {
+			solisten_proto_abort(so);
 			goto out;
+		}
 		l2so = NULL;
 	}
 	SOCK_LOCK(so);
diff --git a/sys/netinet/sctp_usrreq.c b/sys/netinet/sctp_usrreq.c
index 62d6996ab60d..76d73a11304a 100644
--- a/sys/netinet/sctp_usrreq.c
+++ b/sys/netinet/sctp_usrreq.c
@@ -7201,7 +7201,8 @@ sctp_listen(struct socket *so, int backlog, struct thread *p)
 			}
 		}
 	}
-	SCTP_INP_RLOCK(inp);
+	SCTP_INP_INFO_WLOCK();
+	SCTP_INP_WLOCK(inp);
 #ifdef SCTP_LOCK_LOGGING
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOCK_LOGGING_ENABLE) {
 		sctp_log_lock(inp, (struct sctp_tcb *)NULL, SCTP_LOG_LOCK_SOCK);
@@ -7209,10 +7210,9 @@ sctp_listen(struct socket *so, int backlog, struct thread *p)
 #endif
 	SOCK_LOCK(so);
 	error = solisten_proto_check(so);
-	SOCK_UNLOCK(so);
 	if (error) {
-		SCTP_INP_RUNLOCK(inp);
-		return (error);
+		SOCK_UNLOCK(so);
+		goto out;
 	}
 	if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE)) &&
 	    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
@@ -7223,39 +7223,44 @@ sctp_listen(struct socket *so, int backlog, struct thread *p)
 		 * move the guy that was listener to the TCP Pool.
 		 */
 		if (sctp_swap_inpcb_for_listen(inp)) {
-			SCTP_INP_RUNLOCK(inp);
-			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EADDRINUSE);
-			return (EADDRINUSE);
+			SOCK_UNLOCK(so);
+			solisten_proto_abort(so);
+			error = EADDRINUSE;
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+			goto out;
 		}
 	}
 
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
 	    (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED)) {
-		/* We are already connected AND the TCP model */
-		SCTP_INP_RUNLOCK(inp);
-		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EADDRINUSE);
-		return (EADDRINUSE);
+		SOCK_UNLOCK(so);
+		solisten_proto_abort(so);
+		error = EADDRINUSE;
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+		goto out;
 	}
-	SCTP_INP_RUNLOCK(inp);
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) {
-		/* We must do a bind. */
-		if ((error = sctp_inpcb_bind(so, NULL, NULL, p))) {
+		if ((error = sctp_inpcb_bind_locked(inp, NULL, NULL, p))) {
+			SOCK_UNLOCK(so);
+			solisten_proto_abort(so);
 			/* bind error, probably perm */
-			return (error);
+			goto out;
 		}
 	}
-	SCTP_INP_WLOCK(inp);
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) == 0) {
-		SOCK_LOCK(so);
 		solisten_proto(so, backlog);
-		SOCK_UNLOCK(so);
+	} else {
+		solisten_proto_abort(so);
 	}
+	SOCK_UNLOCK(so);
 	if (backlog > 0) {
 		inp->sctp_flags |= SCTP_PCB_FLAGS_ACCEPTING;
 	} else {
 		inp->sctp_flags &= ~SCTP_PCB_FLAGS_ACCEPTING;
 	}
+out:
 	SCTP_INP_WUNLOCK(inp);
+	SCTP_INP_INFO_WUNLOCK();
 	return (error);
 }
 
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index bcd7d18d9d62..3a1608cc106a 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -457,10 +457,15 @@ tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
 	TCPDEBUG1();
 	SOCK_LOCK(so);
 	error = solisten_proto_check(so);
-	INP_HASH_WLOCK(&V_tcbinfo);
-	if (error == 0 && inp->inp_lport == 0)
-		error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
-	INP_HASH_WUNLOCK(&V_tcbinfo);
+	if (error != 0) {
+		SOCK_UNLOCK(so);
+		goto out;
+	}
+	if (inp->inp_lport == 0) {
+		INP_HASH_WLOCK(&V_tcbinfo);
+		error = in_pcbbind(inp, NULL, td->td_ucred);
+		INP_HASH_WUNLOCK(&V_tcbinfo);
+	}
 	if (error == 0) {
 		tcp_state_change(tp, TCPS_LISTEN);
 		solisten_proto(so, backlog);
@@ -468,6 +473,8 @@ tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
 		if ((so->so_options & SO_NO_OFFLOAD) == 0)
 			tcp_offload_listen_start(tp);
 #endif
+	} else {
+		solisten_proto_abort(so);
 	}
 	SOCK_UNLOCK(so);
 
@@ -504,12 +511,16 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
 	TCPDEBUG1();
 	SOCK_LOCK(so);
 	error = solisten_proto_check(so);
+	if (error != 0) {
+		SOCK_UNLOCK(so);
+		goto out;
+	}
 	INP_HASH_WLOCK(&V_tcbinfo);
-	if (error == 0 && inp->inp_lport == 0) {
+	if (inp->inp_lport == 0) {
 		inp->inp_vflag &= ~INP_IPV4;
 		if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
 			inp->inp_vflag |= INP_IPV4;
-		error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
+		error = in6_pcbbind(inp, NULL, td->td_ucred);
 	}
 	INP_HASH_WUNLOCK(&V_tcbinfo);
 	if (error == 0) {
@@ -519,6 +530,8 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
 		if ((so->so_options & SO_NO_OFFLOAD) == 0)
 			tcp_offload_listen_start(tp);
 #endif
+	} else {
+		solisten_proto_abort(so);
 	}
 	SOCK_UNLOCK(so);
 
@@ -581,6 +594,10 @@ tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 		error = ECONNREFUSED;
 		goto out;
 	}
+	if (SOLISTENING(so)) {
+		error = EOPNOTSUPP;
+		goto out;
+	}
 	tp = intotcpcb(inp);
 	TCPDEBUG1();
 	NET_EPOCH_ENTER(et);
@@ -643,6 +660,10 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 		error = ECONNREFUSED;
 		goto out;
 	}
+	if (SOLISTENING(so)) {
+		error = EINVAL;
+		goto out;
+	}
 	tp = intotcpcb(inp);
 	TCPDEBUG1();
 #ifdef INET
@@ -1021,6 +1042,10 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
 
 	TCPDEBUG1();
 	if (nam != NULL && tp->t_state < TCPS_SYN_SENT) {
+		if (tp->t_state == TCPS_LISTEN) {
+			error = EINVAL;
+			goto out;
+		}
 		switch (nam->sa_family) {
 #ifdef INET
 		case AF_INET:
@@ -1119,6 +1144,9 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
 		sbappendstream(&so->so_snd, m, flags);
 		m = NULL;
 		if (nam && tp->t_state < TCPS_SYN_SENT) {
+			KASSERT(tp->t_state == TCPS_CLOSED,
+			    ("%s: tp %p is listening", __func__, tp));
+
 			/*
 			 * Do implied connect if not yet connected,
 			 * initialize window to default value, and
diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h
index 69e182dfa9a5..69dd1706e366 100644
--- a/sys/sys/socketvar.h
+++ b/sys/sys/socketvar.h
@@ -453,6 +453,7 @@ void	sofree(struct socket *so);
 void	sohasoutofband(struct socket *so);
 int	solisten(struct socket *so, int backlog, struct thread *td);
 void	solisten_proto(struct socket *so, int backlog);
+void	solisten_proto_abort(struct socket *so);
 int	solisten_proto_check(struct socket *so);
 int	solisten_dequeue(struct socket *, struct socket **, int);
 struct socket *


More information about the dev-commits-src-all mailing list