git: 7a2c93b86ef7 - main - sockets: provide sousrsend() that does socket specific error handling

From: Gleb Smirnoff <glebius_at_FreeBSD.org>
Date: Wed, 14 Dec 2022 18:03:09 UTC
The branch main has been updated by glebius:

URL: https://cgit.FreeBSD.org/src/commit/?id=7a2c93b86ef75390a60a4b4d6e3911b36221dfbe

commit 7a2c93b86ef75390a60a4b4d6e3911b36221dfbe
Author:     Gleb Smirnoff <glebius@FreeBSD.org>
AuthorDate: 2022-12-14 18:02:44 +0000
Commit:     Gleb Smirnoff <glebius@FreeBSD.org>
CommitDate: 2022-12-14 18:02:44 +0000

    sockets: provide sousrsend() that does socket specific error handling
    
    Sockets have special handling for EPIPE on a write, that was spread out
    into several places.  Treating transient errors is also special - if
    protocol is atomic, than we should ignore any changes to uio_resid, a
    transient error means the write had completely failed (see d2b3a0ed31e).
    
    - Provide sousrsend() that expects a valid uio, and leave sosend() for
      kernel consumers only.  Do all special error handling right here.
    - In dofilewrite() don't do special handling of error for DTYPE_SOCKET.
    - For send(2), write(2) and aio_write(2) call into sousrsend() and remove
      error handling for kern_sendit(), soo_write() and soaio_process_job().
    
    PR:                     265087
    Reported by:            rz-rpi03 at h-ka.de
    Reviewed by:            markj
    Differential revision:  https://reviews.freebsd.org/D35863
---
 sys/kern/sys_generic.c   | 10 +++++++---
 sys/kern/sys_socket.c    | 17 +++--------------
 sys/kern/uipc_socket.c   | 49 ++++++++++++++++++++++++++++++++++++++++++++++++
 sys/kern/uipc_syscalls.c | 16 +---------------
 sys/sys/socketvar.h      |  2 ++
 5 files changed, 62 insertions(+), 32 deletions(-)

diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c
index b181a818e359..cf4084904b79 100644
--- a/sys/kern/sys_generic.c
+++ b/sys/kern/sys_generic.c
@@ -562,12 +562,16 @@ dofilewrite(struct thread *td, int fd, struct file *fp, struct uio *auio,
 		ktruio = cloneuio(auio);
 #endif
 	cnt = auio->uio_resid;
-	if ((error = fo_write(fp, auio, td->td_ucred, flags, td))) {
+	error = fo_write(fp, auio, td->td_ucred, flags, td);
+	/*
+	 * Socket layer is responsible for special error handling,
+	 * see sousrsend().
+	 */
+	if (error != 0 && fp->f_type != DTYPE_SOCKET) {
 		if (auio->uio_resid != cnt && (error == ERESTART ||
 		    error == EINTR || error == EWOULDBLOCK))
 			error = 0;
-		/* Socket layer is responsible for issuing SIGPIPE. */
-		if (fp->f_type != DTYPE_SOCKET && error == EPIPE) {
+		if (error == EPIPE) {
 			PROC_LOCK(td->td_proc);
 			tdsignal(td, SIGPIPE);
 			PROC_UNLOCK(td->td_proc);
diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c
index 3455cfee564c..5cfb366c150b 100644
--- a/sys/kern/sys_socket.c
+++ b/sys/kern/sys_socket.c
@@ -145,13 +145,7 @@ soo_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
 	if (error)
 		return (error);
 #endif
-	error = sosend(so, 0, uio, 0, 0, 0, uio->uio_td);
-	if (error == EPIPE && (so->so_options & SO_NOSIGPIPE) == 0) {
-		PROC_LOCK(uio->uio_td->td_proc);
-		tdsignal(uio->uio_td, SIGPIPE);
-		PROC_UNLOCK(uio->uio_td->td_proc);
-	}
-	return (error);
+	return (sousrsend(so, NULL, uio, NULL, 0, NULL));
 }
 
 static int
@@ -646,15 +640,10 @@ retry:
 		error = mac_socket_check_send(fp->f_cred, so);
 		if (error == 0)
 #endif
-			error = sosend(so, NULL, job->uiop, NULL, NULL, flags,
-			    td);
+			error = sousrsend(so, NULL, job->uiop, NULL, flags,
+			    job->userproc);
 		if (td->td_ru.ru_msgsnd != ru_before)
 			job->msgsnd = 1;
-		if (error == EPIPE && (so->so_options & SO_NOSIGPIPE) == 0) {
-			PROC_LOCK(job->userproc);
-			kern_psignal(job->userproc, SIGPIPE);
-			PROC_UNLOCK(job->userproc);
-		}
 	}
 
 	done += cnt - job->uiop->uio_resid;
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 6c09b14c4f72..b7e43d496d1d 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -1822,6 +1822,14 @@ out:
 	return (error);
 }
 
+/*
+ * Send to a socket from a kernel thread.
+ *
+ * XXXGL: in almost all cases uio is NULL and the mbuf is supplied.
+ * Exception is nfs/bootp_subr.c.  It is arguable that the VNET context needs
+ * to be set at all.  This function should just boil down to a static inline
+ * calling the protocol method.
+ */
 int
 sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
@@ -1835,6 +1843,47 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
 	return (error);
 }
 
+/*
+ * send(2), write(2) or aio_write(2) on a socket.
+ */
+int
+sousrsend(struct socket *so, struct sockaddr *addr, struct uio *uio,
+    struct mbuf *control, int flags, struct proc *userproc)
+{
+	struct thread *td;
+	ssize_t len;
+	int error;
+
+	td = uio->uio_td;
+	len = uio->uio_resid;
+	CURVNET_SET(so->so_vnet);
+	error = so->so_proto->pr_sosend(so, addr, uio, NULL, control, flags,
+	    td);
+	CURVNET_RESTORE();
+	if (error != 0) {
+		if (uio->uio_resid != len &&
+		    (so->so_proto->pr_flags & PR_ATOMIC) == 0 &&
+		    (error == ERESTART || error == EINTR ||
+		    error == EWOULDBLOCK))
+			error = 0;
+		/* Generation of SIGPIPE can be controlled per socket. */
+		if (error == EPIPE && (so->so_options & SO_NOSIGPIPE) == 0 &&
+		    (flags & MSG_NOSIGNAL) == 0) {
+			if (userproc != NULL) {
+				/* aio(4) job */
+				PROC_LOCK(userproc);
+				kern_psignal(userproc, SIGPIPE);
+				PROC_UNLOCK(userproc);
+			} else {
+				PROC_LOCK(td->td_proc);
+				tdsignal(td, SIGPIPE);
+				PROC_UNLOCK(td->td_proc);
+			}
+		}
+	}
+	return (error);
+}
+
 /*
  * The part of soreceive() that implements reading non-inline out-of-band
  * data from a socket.  For more complete comments, see soreceive(), from
diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
index 9c5c9719c204..cbf45e53f96f 100644
--- a/sys/kern/uipc_syscalls.c
+++ b/sys/kern/uipc_syscalls.c
@@ -798,21 +798,7 @@ kern_sendit(struct thread *td, int s, struct msghdr *mp, int flags,
 		ktruio = cloneuio(&auio);
 #endif
 	len = auio.uio_resid;
-	error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
-	if (error != 0) {
-		if (auio.uio_resid != len &&
-		    (so->so_proto->pr_flags & PR_ATOMIC) == 0 &&
-		    (error == ERESTART || error == EINTR ||
-		    error == EWOULDBLOCK))
-			error = 0;
-		/* Generation of SIGPIPE can be controlled per socket */
-		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
-		    !(flags & MSG_NOSIGNAL)) {
-			PROC_LOCK(td->td_proc);
-			tdsignal(td, SIGPIPE);
-			PROC_UNLOCK(td->td_proc);
-		}
-	}
+	error = sousrsend(so, mp->msg_name, &auio, control, flags, NULL);
 	if (error == 0)
 		td->td_retval[0] = len - auio.uio_resid;
 #ifdef KTRACE
diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h
index e46eb0520e47..1e24a162b34b 100644
--- a/sys/sys/socketvar.h
+++ b/sys/sys/socketvar.h
@@ -506,6 +506,8 @@ void	sorflush(struct socket *so);
 int	sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
 	    struct mbuf *top, struct mbuf *control, int flags,
 	    struct thread *td);
+int	sousrsend(struct socket *so, struct sockaddr *addr, struct uio *uio,
+	    struct mbuf *control, int flags, struct proc *);
 int	sosend_dgram(struct socket *so, struct sockaddr *addr,
 	    struct uio *uio, struct mbuf *top, struct mbuf *control,
 	    int flags, struct thread *td);