git: ce69e37369aa - main - Revert "sockets: retire sorflush()"

From: Gleb Smirnoff <glebius_at_FreeBSD.org>
Date: Sat, 03 Feb 2024 21:10:54 UTC
The branch main has been updated by glebius:

URL: https://cgit.FreeBSD.org/src/commit/?id=ce69e37369aa44a96113387253aadc5e49246928

commit ce69e37369aa44a96113387253aadc5e49246928
Author:     Gleb Smirnoff <glebius@FreeBSD.org>
AuthorDate: 2024-02-03 21:08:41 +0000
Commit:     Gleb Smirnoff <glebius@FreeBSD.org>
CommitDate: 2024-02-03 21:08:41 +0000

    Revert "sockets: retire sorflush()"
    
    Provide a comment in sorflush() why the socket I/O sx(9) lock is actually
    important.
    
    This reverts commit 507f87a799cf0811ce30f0ae7f10ba19b2fd3db3.
---
 sys/kern/uipc_socket.c    | 42 +++++++++++++++++++++++++++++++++++++++---
 sys/net/rtsock.c          |  6 ++----
 sys/netinet/raw_ip.c      |  6 ++----
 sys/netinet/sctp_usrreq.c |  6 +++++-
 sys/netinet/tcp_usrreq.c  |  6 ++----
 sys/netinet/udp_usrreq.c  |  6 ++----
 sys/netinet6/raw_ip6.c    |  6 ++----
 sys/sys/socketvar.h       |  1 +
 8 files changed, 55 insertions(+), 24 deletions(-)

diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 5e5e7d2cd058..b647766cf258 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -95,9 +95,9 @@
  *
  * NOTE: With regard to VNETs the general rule is that callers do not set
  * curvnet. Exceptions to this rule include soabort(), sodisconnect(),
- * sofree() (and with that sorele(), sotryfree()), as well as sonewconn(),
- * which are usually called from a pre-set VNET context. sopoll() currently
- * does not need a VNET context to be set.
+ * sofree(), sorele(), sonewconn() and sorflush(), which are usually called
+ * from a pre-set VNET context.  sopoll() currently does not need a VNET
+ * context to be set.
  */
 
 #include <sys/cdefs.h>
@@ -2964,6 +2964,42 @@ soshutdown(struct socket *so, enum shutdown_how how)
 	return (error);
 }
 
+/*
+ * Used by several pr_shutdown implementations that use generic socket buffers.
+ */
+void
+sorflush(struct socket *so)
+{
+	int error;
+
+	VNET_SO_ASSERT(so);
+
+	/*
+	 * Dislodge threads currently blocked in receive and wait to acquire
+	 * a lock against other simultaneous readers before clearing the
+	 * socket buffer.  Don't let our acquire be interrupted by a signal
+	 * despite any existing socket disposition on interruptable waiting.
+	 *
+	 * The SOCK_IO_RECV_LOCK() is important here as there some pr_soreceive
+	 * methods that read the top of the socket buffer without acquisition
+	 * of the socket buffer mutex, assuming that top of the buffer
+	 * exclusively belongs to the read(2) syscall.  This is handy when
+	 * performing MSG_PEEK.
+	 */
+	socantrcvmore(so);
+
+	error = SOCK_IO_RECV_LOCK(so, SBL_WAIT | SBL_NOINTR);
+	if (error != 0) {
+		KASSERT(SOLISTENING(so),
+		    ("%s: soiolock(%p) failed", __func__, so));
+		return;
+	}
+
+	sbrelease(so, SO_RCV);
+	SOCK_IO_RECV_UNLOCK(so);
+
+}
+
 /*
  * Wrapper for Socket established helper hook.
  * Parameters: socket, context of the hook point, hook id.
diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c
index e4183232700e..e0411ed0c1d0 100644
--- a/sys/net/rtsock.c
+++ b/sys/net/rtsock.c
@@ -457,12 +457,10 @@ rts_shutdown(struct socket *so, enum shutdown_how how)
 	 */
 	switch (how) {
 	case SHUT_RD:
-		socantrcvmore(so);
-		sbrelease(so, SO_RCV);
+		sorflush(so);
 		break;
 	case SHUT_RDWR:
-		socantrcvmore(so);
-		sbrelease(so, SO_RCV);
+		sorflush(so);
 		/* FALLTHROUGH */
 	case SHUT_WR:
 		socantsendmore(so);
diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
index 004aaea01bfa..a6bef1c7e275 100644
--- a/sys/netinet/raw_ip.c
+++ b/sys/netinet/raw_ip.c
@@ -994,12 +994,10 @@ rip_shutdown(struct socket *so, enum shutdown_how how)
 
 	switch (how) {
 	case SHUT_RD:
-		socantrcvmore(so);
-		sbrelease(so, SO_RCV);
+		sorflush(so);
 		break;
 	case SHUT_RDWR:
-		socantrcvmore(so);
-		sbrelease(so, SO_RCV);
+		sorflush(so);
 		/* FALLTHROUGH */
 	case SHUT_WR:
 		socantsendmore(so);
diff --git a/sys/netinet/sctp_usrreq.c b/sys/netinet/sctp_usrreq.c
index b847271a7bd3..70fe021be766 100644
--- a/sys/netinet/sctp_usrreq.c
+++ b/sys/netinet/sctp_usrreq.c
@@ -868,7 +868,11 @@ sctp_shutdown(struct socket *so, enum shutdown_how how)
 			SCTP_TCB_UNLOCK(stcb);
 		}
 		SCTP_INP_WUNLOCK(inp);
-		socantrcvmore(so);
+		/*
+		 * XXXGL: does SCTP need sorflush()? This is what old
+		 * soshutdown() used to do for all kinds of sockets.
+		 */
+		sorflush(so);
 		if (how == SHUT_RD)
 			break;
 		/* FALLTHROUGH */
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index 7c8e3d7e72db..ccd6a6149dae 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -824,12 +824,10 @@ tcp_usr_shutdown(struct socket *so, enum shutdown_how how)
 
 	switch (how) {
 	case SHUT_RD:
-		socantrcvmore(so);
-		sbrelease(so, SO_RCV);
+		sorflush(so);
 		break;
 	case SHUT_RDWR:
-		socantrcvmore(so);
-		sbrelease(so, SO_RCV);
+		sorflush(so);
 		/* FALLTHROUGH */
 	case SHUT_WR:
 		/*
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index 24bc3403b0f7..9dad79e95b04 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -1722,12 +1722,10 @@ udp_shutdown(struct socket *so, enum shutdown_how how)
 
 	switch (how) {
 	case SHUT_RD:
-		socantrcvmore(so);
-		sbrelease(so, SO_RCV);
+		sorflush(so);
 		break;
 	case SHUT_RDWR:
-		socantrcvmore(so);
-		sbrelease(so, SO_RCV);
+		sorflush(so);
 		/* FALLTHROUGH */
 	case SHUT_WR:
 		socantsendmore(so);
diff --git a/sys/netinet6/raw_ip6.c b/sys/netinet6/raw_ip6.c
index 5b31a84f31eb..3264de331817 100644
--- a/sys/netinet6/raw_ip6.c
+++ b/sys/netinet6/raw_ip6.c
@@ -839,12 +839,10 @@ rip6_shutdown(struct socket *so, enum shutdown_how how)
 
 	switch (how) {
 	case SHUT_RD:
-		socantrcvmore(so);
-		sbrelease(so, SO_RCV);
+		sorflush(so);
 		break;
 	case SHUT_RDWR:
-		socantrcvmore(so);
-		sbrelease(so, SO_RCV);
+		sorflush(so);
 		/* FALLTHROUGH */
 	case SHUT_WR:
 		socantsendmore(so);
diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h
index 030087cbc17c..cba7dd655aa0 100644
--- a/sys/sys/socketvar.h
+++ b/sys/sys/socketvar.h
@@ -501,6 +501,7 @@ int	soreceive_generic(struct socket *so, struct sockaddr **paddr,
 void	sorele_locked(struct socket *so);
 void	sodealloc(struct socket *);
 int	soreserve(struct socket *so, u_long sndcc, u_long rcvcc);
+void	sorflush(struct socket *so);
 int	sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
 	    struct mbuf *top, struct mbuf *control, int flags,
 	    struct thread *td);