git: 664077e69e8f - stable/12 - socket: Implement SO_RERROR

Kevin Bowling kbowling at FreeBSD.org
Wed Aug 11 02:34:25 UTC 2021


The branch stable/12 has been updated by kbowling (ports committer):

URL: https://cgit.FreeBSD.org/src/commit/?id=664077e69e8f300cd0218e3c700ee235df12fcff

commit 664077e69e8f300cd0218e3c700ee235df12fcff
Author:     Roy Marples <roy at marples.name>
AuthorDate: 2021-07-28 15:46:59 +0000
Commit:     Kevin Bowling <kbowling at FreeBSD.org>
CommitDate: 2021-08-11 01:59:36 +0000

    socket: Implement SO_RERROR
    
    SO_RERROR indicates that receive buffer overflows should be handled as
    errors. Historically receive buffer overflows have been ignored and
    programs could not tell if they missed messages or messages had been
    truncated because of overflows. Since programs historically do not
    expect to get receive overflow errors, this behavior is not the
    default.
    
    This is really really important for programs that use route(4) to keep
    in sync with the system. If we loose a message then we need to reload
    the full system state, otherwise the behaviour from that point is
    undefined and can lead to chasing bogus bug reports.
    
    Reviewed by:    philip (network), kbowling (transport), gbe (manpages)
    MFC after:      2 weeks
    Differential Revision:  https://reviews.freebsd.org/D26652
    
    (cherry picked from commit 7045b1603bdf054145dd958a4acc17b410fb62a0)
---
 lib/libc/sys/getsockopt.2                          | 10 +++++++-
 sbin/route/route.c                                 | 13 +++++++++-
 sys/kern/uipc_sockbuf.c                            | 24 +++++++++++++++++
 sys/kern/uipc_socket.c                             | 30 ++++++++++++++++------
 sys/kern/uipc_usrreq.c                             |  2 +-
 sys/net/raw_usrreq.c                               | 11 ++++----
 .../bluetooth/socket/ng_btsocket_hci_raw.c         |  1 +
 sys/netgraph/ng_socket.c                           |  2 +-
 sys/netinet/ip_divert.c                            |  2 +-
 sys/netinet/ip_mroute.c                            |  2 +-
 sys/netinet/raw_ip.c                               |  3 +--
 sys/netinet/udp_usrreq.c                           |  2 +-
 sys/netinet6/icmp6.c                               |  6 ++---
 sys/netinet6/ip6_input.c                           |  1 +
 sys/netinet6/ip6_mroute.c                          |  3 ++-
 sys/netinet6/raw_ip6.c                             |  2 ++
 sys/netinet6/send.c                                |  2 +-
 sys/netinet6/udp6_usrreq.c                         |  2 +-
 sys/netipsec/keysock.c                             | 10 ++++----
 sys/sys/socket.h                                   |  1 +
 sys/sys/socketvar.h                                |  6 ++++-
 21 files changed, 100 insertions(+), 35 deletions(-)

diff --git a/lib/libc/sys/getsockopt.2 b/lib/libc/sys/getsockopt.2
index dcd39e07efb8..b27f663f01bd 100644
--- a/lib/libc/sys/getsockopt.2
+++ b/lib/libc/sys/getsockopt.2
@@ -28,7 +28,7 @@
 .\"     @(#)getsockopt.2	8.4 (Berkeley) 5/2/95
 .\" $FreeBSD$
 .\"
-.Dd June 03, 2020
+.Dd February 8, 2021
 .Dt GETSOCKOPT 2
 .Os
 .Sh NAME
@@ -177,6 +177,7 @@ for the socket
 .It Dv SO_PROTOCOL Ta "get the protocol number for the socket (get only)"
 .It Dv SO_PROTOTYPE Ta "SunOS alias for the Linux SO_PROTOCOL (get only)"
 .It Dv SO_ERROR Ta "get and clear error on the socket (get only)"
+.It Dv SO_RERROR Ta "enables receive error reporting"
 .It Dv SO_SETFIB Ta "set the associated FIB (routing table) for the socket (set only)"
 .El
 .Pp
@@ -513,6 +514,13 @@ returns any pending error on the socket and clears
 the error status.
 It may be used to check for asynchronous errors on connected
 datagram sockets or for other asynchronous errors.
+.Dv SO_RERROR
+indicates that receive buffer overflows should be handled as errors.
+Historically receive buffer overflows have been ignored and programs
+could not tell if they missed messages or messages had been truncated
+because of overflows.
+Since programs historically do not expect to get receive overflow errors,
+this behavior is not the default.
 .Pp
 .Dv SO_LABEL
 returns the MAC label of the socket.
diff --git a/sbin/route/route.c b/sbin/route/route.c
index 9c9e4b304848..14d1fe274ad3 100644
--- a/sbin/route/route.c
+++ b/sbin/route/route.c
@@ -1485,9 +1485,20 @@ monitor(int argc, char *argv[])
 		interfaces();
 		exit(0);
 	}
+
+#ifdef SO_RERROR
+	n = 1;
+	if (setsockopt(s, SOL_SOCKET, SO_RERROR, &n, sizeof(n)) == -1)
+		warn("SO_RERROR");
+#endif
+
 	for (;;) {
 		time_t now;
-		n = read(s, msg, 2048);
+		n = read(s, msg, sizeof(msg));
+		if (n == -1) {
+			warn("read");
+			continue;
+		}
 		now = time(NULL);
 		(void)printf("\ngot message of size %d on %s", n, ctime(&now));
 		print_rtmsg((struct rt_msghdr *)(void *)msg, n);
diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c
index 598fc05f2fcf..92b6d728fcd8 100644
--- a/sys/kern/uipc_sockbuf.c
+++ b/sys/kern/uipc_sockbuf.c
@@ -254,6 +254,30 @@ socantrcvmore(struct socket *so)
 	mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
 }
 
+void
+soroverflow_locked(struct socket *so)
+{
+
+	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+
+	if (so->so_options & SO_RERROR) {
+		so->so_rerror = ENOBUFS;
+		sorwakeup_locked(so);
+	} else
+		SOCKBUF_UNLOCK(&so->so_rcv);
+
+	mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
+}
+
+void
+soroverflow(struct socket *so)
+{
+
+	SOCKBUF_LOCK(&so->so_rcv);
+	soroverflow_locked(so);
+	mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
+}
+
 /*
  * Wait for data to arrive at/drain from a socket buffer.
  */
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index dd028c660d2b..620d81246062 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -1771,12 +1771,19 @@ restart:
 		KASSERT(m != NULL || !sbavail(&so->so_rcv),
 		    ("receive: m == %p sbavail == %u",
 		    m, sbavail(&so->so_rcv)));
-		if (so->so_error) {
+		if (so->so_error || so->so_rerror) {
 			if (m != NULL)
 				goto dontblock;
-			error = so->so_error;
-			if ((flags & MSG_PEEK) == 0)
-				so->so_error = 0;
+			if (so->so_error)
+				error = so->so_error;
+			else
+				error = so->so_rerror;
+			if ((flags & MSG_PEEK) == 0) {
+				if (so->so_error)
+					so->so_error = 0;
+				else
+					so->so_rerror = 0;
+			}
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			goto release;
 		}
@@ -2084,7 +2091,7 @@ dontblock:
 		while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 &&
 		    !sosendallatonce(so) && nextrecord == NULL) {
 			SOCKBUF_LOCK_ASSERT(&so->so_rcv);
-			if (so->so_error ||
+			if (so->so_error || so->so_rerror ||
 			    so->so_rcv.sb_state & SBS_CANTRCVMORE)
 				break;
 			/*
@@ -2805,6 +2812,7 @@ sosetopt(struct socket *so, struct sockopt *sopt)
 		case SO_NOSIGPIPE:
 		case SO_NO_DDP:
 		case SO_NO_OFFLOAD:
+		case SO_RERROR:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
@@ -3026,6 +3034,7 @@ sogetopt(struct socket *so, struct sockopt *sopt)
 		case SO_NOSIGPIPE:
 		case SO_NO_DDP:
 		case SO_NO_OFFLOAD:
+		case SO_RERROR:
 			optval = so->so_options & sopt->sopt_name;
 integer:
 			error = sooptcopyout(sopt, &optval, sizeof optval);
@@ -3045,8 +3054,13 @@ integer:
 
 		case SO_ERROR:
 			SOCK_LOCK(so);
-			optval = so->so_error;
-			so->so_error = 0;
+			if (so->so_error) {
+				optval = so->so_error;
+				so->so_error = 0;
+			} else {
+				optval = so->so_rerror;
+				so->so_rerror = 0;
+			}
 			SOCK_UNLOCK(so);
 			goto integer;
 
@@ -3595,7 +3609,7 @@ filt_soread(struct knote *kn, long hint)
 		kn->kn_flags |= EV_EOF;
 		kn->kn_fflags = so->so_error;
 		return (1);
-	} else if (so->so_error)	/* temporary udp error */
+	} else if (so->so_error || so->so_rerror)
 		return (1);
 
 	if (kn->kn_sfflags & NOTE_LOWAT) {
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index 25f27837d6d5..329e4346c425 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -1047,7 +1047,7 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
 			m = NULL;
 			control = NULL;
 		} else {
-			SOCKBUF_UNLOCK(&so2->so_rcv);
+			soroverflow_locked(so2);
 			error = ENOBUFS;
 		}
 		if (nam != NULL)
diff --git a/sys/net/raw_usrreq.c b/sys/net/raw_usrreq.c
index f43de7dae9ca..5d4e223e5a0a 100644
--- a/sys/net/raw_usrreq.c
+++ b/sys/net/raw_usrreq.c
@@ -100,10 +100,10 @@ raw_input_ext(struct mbuf *m0, struct sockproto *proto, struct sockaddr *src,
 			n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 			if (n) {
 				if (sbappendaddr(&last->so_rcv, src,
-				    n, (struct mbuf *)0) == 0)
-					/* should notify about lost packet */
+				    n, (struct mbuf *)0) == 0) {
+					soroverflow(last);
 					m_freem(n);
-				else
+				} else
 					sorwakeup(last);
 			}
 		}
@@ -111,9 +111,10 @@ raw_input_ext(struct mbuf *m0, struct sockproto *proto, struct sockaddr *src,
 	}
 	if (last) {
 		if (sbappendaddr(&last->so_rcv, src,
-		    m, (struct mbuf *)0) == 0)
+		    m, (struct mbuf *)0) == 0) {
+			soroverflow(last);
 			m_freem(m);
-		else
+		} else
 			sorwakeup(last);
 	} else
 		m_freem(m);
diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_hci_raw.c b/sys/netgraph/bluetooth/socket/ng_btsocket_hci_raw.c
index 9523d88af633..a78905705fd0 100644
--- a/sys/netgraph/bluetooth/socket/ng_btsocket_hci_raw.c
+++ b/sys/netgraph/bluetooth/socket/ng_btsocket_hci_raw.c
@@ -539,6 +539,7 @@ ng_btsocket_hci_raw_data_input(struct mbuf *nam)
 
 				NG_FREE_M(m);
 				NG_FREE_M(ctl);
+				soroverflow(pcb->so);
 			}
 		}
 next:
diff --git a/sys/netgraph/ng_socket.c b/sys/netgraph/ng_socket.c
index 08ee76300ae8..a10993a8fab6 100644
--- a/sys/netgraph/ng_socket.c
+++ b/sys/netgraph/ng_socket.c
@@ -974,7 +974,7 @@ ngs_rcvmsg(node_p node, item_p item, hook_p lasthook)
 	/* Send it up to the socket. */
 	if (sbappendaddr_locked(&so->so_rcv, (struct sockaddr *)&addr, m,
 	    NULL) == 0) {
-		SOCKBUF_UNLOCK(&so->so_rcv);
+		soroverflow_locked(so);
 		TRAP_ERROR;
 		m_freem(m);
 		return (ENOBUFS);
diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c
index 81e70177e641..54cd0f509b51 100644
--- a/sys/netinet/ip_divert.c
+++ b/sys/netinet/ip_divert.c
@@ -287,7 +287,7 @@ divert_packet(struct mbuf *m, int incoming)
 			if (sbappendaddr_locked(&sa->so_rcv,
 			    (struct sockaddr *)&divsrc, m,
 			    (struct mbuf *)0) == 0) {
-				SOCKBUF_UNLOCK(&sa->so_rcv);
+				soroverflow_locked(sa);
 				sa = NULL;	/* force mbuf reclaim below */
 			} else
 				sorwakeup_locked(sa);
diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c
index d024cdb16772..60b18cdbd35c 100644
--- a/sys/netinet/ip_mroute.c
+++ b/sys/netinet/ip_mroute.c
@@ -1195,7 +1195,7 @@ socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src)
 	    sorwakeup_locked(s);
 	    return 0;
 	}
-	SOCKBUF_UNLOCK(&s->so_rcv);
+	soroverflow_locked(s);
     }
     m_freem(mm);
     return -1;
diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
index f15a3bca3a90..84dd4d7a764c 100644
--- a/sys/netinet/raw_ip.c
+++ b/sys/netinet/raw_ip.c
@@ -261,11 +261,10 @@ rip_append(struct inpcb *last, struct ip *ip, struct mbuf *n,
 		SOCKBUF_LOCK(&so->so_rcv);
 		if (sbappendaddr_locked(&so->so_rcv,
 		    (struct sockaddr *)ripsrc, n, opts) == 0) {
-			/* should notify about lost packet */
+			soroverflow_locked(so);
 			m_freem(n);
 			if (opts)
 				m_freem(opts);
-			SOCKBUF_UNLOCK(&so->so_rcv);
 		} else
 			sorwakeup_locked(so);
 	} else
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index c271392c225d..2a5e8fdd25c2 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -376,7 +376,7 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
 	so = inp->inp_socket;
 	SOCKBUF_LOCK(&so->so_rcv);
 	if (sbappendaddr_locked(&so->so_rcv, append_sa, n, opts) == 0) {
-		SOCKBUF_UNLOCK(&so->so_rcv);
+		soroverflow(so);
 		m_freem(n);
 		if (opts)
 			m_freem(opts);
diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c
index 75e7ef7c2039..923f5dd63ddc 100644
--- a/sys/netinet6/icmp6.c
+++ b/sys/netinet6/icmp6.c
@@ -1976,13 +1976,11 @@ icmp6_rip6_input(struct mbuf **mp, int off)
 				    &last->inp_socket->so_rcv,
 				    (struct sockaddr *)&fromsa, n, opts)
 				    == 0) {
-					/* should notify about lost packet */
+					soroverflow_locked(last->inp_socket);
 					m_freem(n);
 					if (opts) {
 						m_freem(opts);
 					}
-					SOCKBUF_UNLOCK(
-					    &last->inp_socket->so_rcv);
 				} else
 					sorwakeup_locked(last->inp_socket);
 				opts = NULL;
@@ -2023,7 +2021,7 @@ icmp6_rip6_input(struct mbuf **mp, int off)
 			m_freem(m);
 			if (opts)
 				m_freem(opts);
-			SOCKBUF_UNLOCK(&last->inp_socket->so_rcv);
+			soroverflow_locked(last->inp_socket);
 		} else
 			sorwakeup_locked(last->inp_socket);
 		INP_RUNLOCK(last);
diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c
index 74d275f7e66e..9ef4620565a1 100644
--- a/sys/netinet6/ip6_input.c
+++ b/sys/netinet6/ip6_input.c
@@ -1579,6 +1579,7 @@ ip6_notify_pmtu(struct inpcb *inp, struct sockaddr_in6 *dst, u_int32_t mtu)
 	so =  inp->inp_socket;
 	if (sbappendaddr(&so->so_rcv, (struct sockaddr *)dst, NULL, m_mtu)
 	    == 0) {
+		soroverflow(so);
 		m_freem(m_mtu);
 		/* XXX: should count statistics */
 	} else
diff --git a/sys/netinet6/ip6_mroute.c b/sys/netinet6/ip6_mroute.c
index 82ca908d4553..bce491d7fa5b 100644
--- a/sys/netinet6/ip6_mroute.c
+++ b/sys/netinet6/ip6_mroute.c
@@ -1039,7 +1039,8 @@ socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in6 *src)
 				 mm, (struct mbuf *)0) != 0) {
 			sorwakeup(s);
 			return (0);
-		}
+		} else
+			soroverflow(s);
 	}
 	m_freem(mm);
 	return (-1);
diff --git a/sys/netinet6/raw_ip6.c b/sys/netinet6/raw_ip6.c
index 304ee4a8899b..47e106f02651 100644
--- a/sys/netinet6/raw_ip6.c
+++ b/sys/netinet6/raw_ip6.c
@@ -212,6 +212,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
 				if (sbappendaddr(&last->inp_socket->so_rcv,
 						(struct sockaddr *)&fromsa,
 						 n, opts) == 0) {
+					soroverflow(last->inp_socket);
 					m_freem(n);
 					if (opts)
 						m_freem(opts);
@@ -324,6 +325,7 @@ skip_2:
 		m_adj(m, *offp);
 		if (sbappendaddr(&last->inp_socket->so_rcv,
 		    (struct sockaddr *)&fromsa, m, opts) == 0) {
+			soroverflow(last->inp_socket);
 			m_freem(m);
 			if (opts)
 				m_freem(opts);
diff --git a/sys/netinet6/send.c b/sys/netinet6/send.c
index bc9880c82267..8458ef367cba 100644
--- a/sys/netinet6/send.c
+++ b/sys/netinet6/send.c
@@ -291,7 +291,7 @@ send_input(struct mbuf *m, struct ifnet *ifp, int direction, int msglen __unused
 	SOCKBUF_LOCK(&V_send_so->so_rcv);
 	if (sbappendaddr_locked(&V_send_so->so_rcv,
 	    (struct sockaddr *)&sendsrc, m, NULL) == 0) {
-		SOCKBUF_UNLOCK(&V_send_so->so_rcv);
+		soroverflow_locked(V_send_so);
 		/* XXX stats. */
 		m_freem(m);
 	} else {
diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c
index 98588e48c511..02d4359dc92f 100644
--- a/sys/netinet6/udp6_usrreq.c
+++ b/sys/netinet6/udp6_usrreq.c
@@ -191,7 +191,7 @@ udp6_append(struct inpcb *inp, struct mbuf *n, int off,
 	SOCKBUF_LOCK(&so->so_rcv);
 	if (sbappendaddr_locked(&so->so_rcv, (struct sockaddr *)&fromsa[0], n,
 	    opts) == 0) {
-		SOCKBUF_UNLOCK(&so->so_rcv);
+		soroverflow_locked(so);
 		m_freem(n);
 		if (opts)
 			m_freem(opts);
diff --git a/sys/netipsec/keysock.c b/sys/netipsec/keysock.c
index a216f085c1bc..84a65967eea2 100644
--- a/sys/netipsec/keysock.c
+++ b/sys/netipsec/keysock.c
@@ -141,7 +141,6 @@ end:
 static int
 key_sendup0(struct rawcb *rp, struct mbuf *m, int promisc)
 {
-	int error;
 
 	if (promisc) {
 		struct sadb_msg *pmsg;
@@ -165,11 +164,12 @@ key_sendup0(struct rawcb *rp, struct mbuf *m, int promisc)
 	    m, NULL)) {
 		PFKEYSTAT_INC(in_nomem);
 		m_freem(m);
-		error = ENOBUFS;
-	} else
-		error = 0;
+		soroverflow(rp->rcb_socket);
+		return ENOBUFS;
+	}
+
 	sorwakeup(rp->rcb_socket);
-	return error;
+	return 0;
 }
 
 /* so can be NULL if target != KEY_SENDUP_ONE */
diff --git a/sys/sys/socket.h b/sys/sys/socket.h
index eaad9b1bacdb..17a5099424f4 100644
--- a/sys/sys/socket.h
+++ b/sys/sys/socket.h
@@ -147,6 +147,7 @@ typedef	__uintptr_t	uintptr_t;
 #define	SO_NO_OFFLOAD	0x00004000	/* socket cannot be offloaded */
 #define	SO_NO_DDP	0x00008000	/* disable direct data placement */
 #define	SO_REUSEPORT_LB	0x00010000	/* reuse with load balancing */
+#define	SO_RERROR	0x00020000	/* keep track of receive errors */
 
 /*
  * Additional options, not kept in so_options.
diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h
index 99d8839f8beb..99d2d554143e 100644
--- a/sys/sys/socketvar.h
+++ b/sys/sys/socketvar.h
@@ -99,6 +99,7 @@ struct socket {
 	struct	protosw *so_proto;	/* (a) protocol handle */
 	short	so_timeo;		/* (g) connection timeout */
 	u_short	so_error;		/* (f) error affecting connection */
+	u_short so_rerror;		/* (f) error affecting connection */
 	struct	sigio *so_sigio;	/* [sg] information for async I/O or
 					   out of band data (SIGURG) */
 	struct	ucred *so_cred;		/* (a) user credentials */
@@ -258,7 +259,8 @@ struct socket {
 
 /* can we read something from so? */
 #define	soreadabledata(so) \
-	(sbavail(&(so)->so_rcv) >= (so)->so_rcv.sb_lowat ||  (so)->so_error)
+	(sbavail(&(so)->so_rcv) >= (so)->so_rcv.sb_lowat || \
+	(so)->so_error || (so)->so_rerror)
 #define	soreadable(so) \
 	(soreadabledata(so) || ((so)->so_rcv.sb_state & SBS_CANTRCVMORE))
 
@@ -471,6 +473,8 @@ void	socantrcvmore(struct socket *so);
 void	socantrcvmore_locked(struct socket *so);
 void	socantsendmore(struct socket *so);
 void	socantsendmore_locked(struct socket *so);
+void	soroverflow(struct socket *so);
+void	soroverflow_locked(struct socket *so);
 
 /*
  * Accept filter functions (duh).


More information about the dev-commits-src-branches mailing list