git: 4328318445ae - main - sockets: use socket buffer mutexes in struct socket directly

From: Gleb Smirnoff <glebius_at_FreeBSD.org>
Date: Thu, 12 May 2022 20:22:51 UTC
The branch main has been updated by glebius:

URL: https://cgit.FreeBSD.org/src/commit/?id=4328318445aec29c87f33ebfa11f5253a73a7896

commit 4328318445aec29c87f33ebfa11f5253a73a7896
Author:     Gleb Smirnoff <glebius@FreeBSD.org>
AuthorDate: 2022-05-12 20:22:12 +0000
Commit:     Gleb Smirnoff <glebius@FreeBSD.org>
CommitDate: 2022-05-12 20:22:12 +0000

    sockets: use socket buffer mutexes in struct socket directly
    
    Since c67f3b8b78e the sockbuf mutexes belong to the containing socket,
    and socket buffers just point to it.  In 74a68313b50 macros that access
    this mutex directly were added.  Go over the core socket code and
    eliminate code that reaches the mutex by dereferencing the sockbuf
    compatibility pointer.
    
    This change requires a KPI change, as some functions were given the
    sockbuf pointer only without any hint if it is a receive or send buffer.
    
    This change doesn't cover the whole kernel, many protocols still use
    compatibility pointers internally.  However, it allows operation of a
    protocol that doesn't use them.
    
    Reviewed by:            markj
    Differential revision:  https://reviews.freebsd.org/D35152
---
 sys/dev/cxgbe/tom/t4_cpl_io.c                  |   4 +-
 sys/dev/cxgbe/tom/t4_ddp.c                     |   2 +-
 sys/dev/hyperv/hvsock/hv_sock.c                |   4 +-
 sys/kern/kern_sendfile.c                       |   2 +-
 sys/kern/sys_socket.c                          |  31 +++---
 sys/kern/uipc_sockbuf.c                        | 142 ++++++++++++++++---------
 sys/kern/uipc_socket.c                         |  48 ++++-----
 sys/kern/uipc_usrreq.c                         |   2 +-
 sys/netinet/sctp_output.c                      |   4 +-
 sys/netinet/sctputil.c                         |   4 +-
 sys/netinet/tcp_input.c                        |   8 +-
 sys/netinet/tcp_output.c                       |   4 +-
 sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c |   4 +-
 sys/rpc/clnt_bck.c                             |   2 +-
 sys/rpc/clnt_vc.c                              |   2 +-
 sys/sys/sockbuf.h                              |  16 +--
 sys/sys/socketvar.h                            |  61 +++++------
 17 files changed, 191 insertions(+), 149 deletions(-)

diff --git a/sys/dev/cxgbe/tom/t4_cpl_io.c b/sys/dev/cxgbe/tom/t4_cpl_io.c
index 130c2468b20b..59d1c367f94c 100644
--- a/sys/dev/cxgbe/tom/t4_cpl_io.c
+++ b/sys/dev/cxgbe/tom/t4_cpl_io.c
@@ -803,7 +803,7 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
 			int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc,
 			    V_tcp_autosndbuf_max);
 
-			if (!sbreserve_locked(sb, newsize, so, NULL))
+			if (!sbreserve_locked(so, SO_SND, newsize, NULL))
 				sb->sb_flags &= ~SB_AUTOSIZE;
 			else
 				sowwakeup = 1;	/* room available */
@@ -1770,7 +1770,7 @@ do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 		unsigned int newsize = min(hiwat + sc->tt.autorcvbuf_inc,
 		    V_tcp_autorcvbuf_max);
 
-		if (!sbreserve_locked(sb, newsize, so, NULL))
+		if (!sbreserve_locked(so, SO_RCV, newsize, NULL))
 			sb->sb_flags &= ~SB_AUTOSIZE;
 	}
 
diff --git a/sys/dev/cxgbe/tom/t4_ddp.c b/sys/dev/cxgbe/tom/t4_ddp.c
index 05bb903a28aa..11fea91b060e 100644
--- a/sys/dev/cxgbe/tom/t4_ddp.c
+++ b/sys/dev/cxgbe/tom/t4_ddp.c
@@ -555,7 +555,7 @@ handle_ddp_data(struct toepcb *toep, __be32 ddp_report, __be32 rcv_nxt, int len)
 		unsigned int newsize = min(hiwat + sc->tt.autorcvbuf_inc,
 		    V_tcp_autorcvbuf_max);
 
-		if (!sbreserve_locked(sb, newsize, so, NULL))
+		if (!sbreserve_locked(so, SO_RCV, newsize, NULL))
 			sb->sb_flags &= ~SB_AUTOSIZE;
 	}
 	SOCKBUF_UNLOCK(sb);
diff --git a/sys/dev/hyperv/hvsock/hv_sock.c b/sys/dev/hyperv/hvsock/hv_sock.c
index f0238c233181..8c327a22e6fd 100644
--- a/sys/dev/hyperv/hvsock/hv_sock.c
+++ b/sys/dev/hyperv/hvsock/hv_sock.c
@@ -763,7 +763,7 @@ hvs_trans_soreceive(struct socket *so, struct sockaddr **paddr,
 		 * Wait and block until (more) data comes in.
 		 * Note: Drops the sockbuf lock during wait.
 		 */
-		error = sbwait(sb);
+		error = sbwait(so, SO_RCV);
 
 		if (error)
 			break;
@@ -859,7 +859,7 @@ hvs_trans_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
 				 * Sleep wait until space avaiable to send
 				 * Note: Drops the sockbuf lock during wait.
 				 */
-				error = sbwait(sb);
+				error = sbwait(so, SO_SND);
 
 				if (error)
 					break;
diff --git a/sys/kern/kern_sendfile.c b/sys/kern/kern_sendfile.c
index 30383490ca41..2de015254ab9 100644
--- a/sys/kern/kern_sendfile.c
+++ b/sys/kern/kern_sendfile.c
@@ -814,7 +814,7 @@ retry_space:
 			 * state may have changed and we retest
 			 * for it.
 			 */
-			error = sbwait(&so->so_snd);
+			error = sbwait(so, SO_SND);
 			/*
 			 * An error from sbwait usually indicates that we've
 			 * been interrupted by a signal. If we've sent anything
diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c
index 774b317c6ecb..b1b47d3d3c26 100644
--- a/sys/kern/sys_socket.c
+++ b/sys/kern/sys_socket.c
@@ -179,12 +179,12 @@ soo_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred,
 				so->sol_sbrcv_flags |= SB_ASYNC;
 				so->sol_sbsnd_flags |= SB_ASYNC;
 			} else {
-				SOCKBUF_LOCK(&so->so_rcv);
+				SOCK_RECVBUF_LOCK(so);
 				so->so_rcv.sb_flags |= SB_ASYNC;
-				SOCKBUF_UNLOCK(&so->so_rcv);
-				SOCKBUF_LOCK(&so->so_snd);
+				SOCK_RECVBUF_UNLOCK(so);
+				SOCK_SENDBUF_LOCK(so);
 				so->so_snd.sb_flags |= SB_ASYNC;
-				SOCKBUF_UNLOCK(&so->so_snd);
+				SOCK_SENDBUF_UNLOCK(so);
 			}
 			SOCK_UNLOCK(so);
 		} else {
@@ -194,12 +194,12 @@ soo_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred,
 				so->sol_sbrcv_flags &= ~SB_ASYNC;
 				so->sol_sbsnd_flags &= ~SB_ASYNC;
 			} else {
-				SOCKBUF_LOCK(&so->so_rcv);
+				SOCK_RECVBUF_LOCK(so);
 				so->so_rcv.sb_flags &= ~SB_ASYNC;
-				SOCKBUF_UNLOCK(&so->so_rcv);
-				SOCKBUF_LOCK(&so->so_snd);
+				SOCK_RECVBUF_UNLOCK(so);
+				SOCK_SENDBUF_LOCK(so);
 				so->so_snd.sb_flags &= ~SB_ASYNC;
-				SOCKBUF_UNLOCK(&so->so_snd);
+				SOCK_SENDBUF_UNLOCK(so);
 			}
 			SOCK_UNLOCK(so);
 		}
@@ -751,10 +751,12 @@ soaio_snd(void *context, int pending)
 }
 
 void
-sowakeup_aio(struct socket *so, struct sockbuf *sb)
+sowakeup_aio(struct socket *so, sb_which which)
 {
+	struct sockbuf *sb = sobuf(so, which);
+
+	SOCK_BUF_LOCK_ASSERT(so, which);
 
-	SOCKBUF_LOCK_ASSERT(sb);
 	sb->sb_flags &= ~SB_AIO;
 	if (sb->sb_flags & SB_AIO_RUNNING)
 		return;
@@ -799,6 +801,7 @@ soo_aio_queue(struct file *fp, struct kaiocb *job)
 {
 	struct socket *so;
 	struct sockbuf *sb;
+	sb_which which;
 	int error;
 
 	so = fp->f_data;
@@ -809,12 +812,14 @@ soo_aio_queue(struct file *fp, struct kaiocb *job)
 	/* Lock through the socket, since this may be a listening socket. */
 	switch (job->uaiocb.aio_lio_opcode & (LIO_WRITE | LIO_READ)) {
 	case LIO_READ:
-		sb = &so->so_rcv;
 		SOCK_RECVBUF_LOCK(so);
+		sb = &so->so_rcv;
+		which = SO_RCV;
 		break;
 	case LIO_WRITE:
-		sb = &so->so_snd;
 		SOCK_SENDBUF_LOCK(so);
+		sb = &so->so_snd;
+		which = SO_SND;
 		break;
 	default:
 		return (EINVAL);
@@ -833,7 +838,7 @@ soo_aio_queue(struct file *fp, struct kaiocb *job)
 	TAILQ_INSERT_TAIL(&sb->sb_aiojobq, job, list);
 	if (!(sb->sb_flags & SB_AIO_RUNNING)) {
 		if (soaio_ready(so, sb))
-			sowakeup_aio(so, sb);
+			sowakeup_aio(so, which);
 		else
 			sb->sb_flags |= SB_AIO;
 	}
diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c
index 5ac6c79a928f..421fa5da37d9 100644
--- a/sys/kern/uipc_sockbuf.c
+++ b/sys/kern/uipc_sockbuf.c
@@ -396,27 +396,27 @@ void
 socantsendmore_locked(struct socket *so)
 {
 
-	SOCKBUF_LOCK_ASSERT(&so->so_snd);
+	SOCK_SENDBUF_LOCK_ASSERT(so);
 
 	so->so_snd.sb_state |= SBS_CANTSENDMORE;
 	sowwakeup_locked(so);
-	mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
+	SOCK_SENDBUF_UNLOCK_ASSERT(so);
 }
 
 void
 socantsendmore(struct socket *so)
 {
 
-	SOCKBUF_LOCK(&so->so_snd);
+	SOCK_SENDBUF_LOCK(so);
 	socantsendmore_locked(so);
-	mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
+	SOCK_SENDBUF_UNLOCK_ASSERT(so);
 }
 
 void
 socantrcvmore_locked(struct socket *so)
 {
 
-	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+	SOCK_RECVBUF_LOCK_ASSERT(so);
 
 	so->so_rcv.sb_state |= SBS_CANTRCVMORE;
 #ifdef KERN_TLS
@@ -424,53 +424,55 @@ socantrcvmore_locked(struct socket *so)
 		ktls_check_rx(&so->so_rcv);
 #endif
 	sorwakeup_locked(so);
-	mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
+	SOCK_RECVBUF_UNLOCK_ASSERT(so);
 }
 
 void
 socantrcvmore(struct socket *so)
 {
 
-	SOCKBUF_LOCK(&so->so_rcv);
+	SOCK_RECVBUF_LOCK(so);
 	socantrcvmore_locked(so);
-	mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
+	SOCK_RECVBUF_UNLOCK_ASSERT(so);
 }
 
 void
 soroverflow_locked(struct socket *so)
 {
 
-	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+	SOCK_RECVBUF_LOCK_ASSERT(so);
 
 	if (so->so_options & SO_RERROR) {
 		so->so_rerror = ENOBUFS;
 		sorwakeup_locked(so);
 	} else
-		SOCKBUF_UNLOCK(&so->so_rcv);
+		SOCK_RECVBUF_UNLOCK(so);
 
-	mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
+	SOCK_RECVBUF_UNLOCK_ASSERT(so);
 }
 
 void
 soroverflow(struct socket *so)
 {
 
-	SOCKBUF_LOCK(&so->so_rcv);
+	SOCK_RECVBUF_LOCK(so);
 	soroverflow_locked(so);
-	mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
+	SOCK_RECVBUF_UNLOCK_ASSERT(so);
 }
 
 /*
  * Wait for data to arrive at/drain from a socket buffer.
  */
 int
-sbwait(struct sockbuf *sb)
+sbwait(struct socket *so, sb_which which)
 {
+	struct sockbuf *sb;
 
-	SOCKBUF_LOCK_ASSERT(sb);
+	SOCK_BUF_LOCK_ASSERT(so, which);
 
+	sb = sobuf(so, which);
 	sb->sb_flags |= SB_WAIT;
-	return (msleep_sbt(&sb->sb_acc, SOCKBUF_MTX(sb),
+	return (msleep_sbt(&sb->sb_acc, soeventmtx(so, which),
 	    (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
 	    sb->sb_timeo, 0, 0));
 }
@@ -487,13 +489,15 @@ sbwait(struct sockbuf *sb)
  * then release it to avoid lock order issues.  It's not clear that's
  * correct.
  */
-void
-sowakeup(struct socket *so, struct sockbuf *sb)
+static __always_inline void
+sowakeup(struct socket *so, const sb_which which)
 {
+	struct sockbuf *sb;
 	int ret;
 
-	SOCKBUF_LOCK_ASSERT(sb);
+	SOCK_BUF_LOCK_ASSERT(so, which);
 
+	sb = sobuf(so, which);
 	selwakeuppri(sb->sb_sel, PSOCK);
 	if (!SEL_WAITING(sb->sb_sel))
 		sb->sb_flags &= ~SB_SEL;
@@ -512,13 +516,43 @@ sowakeup(struct socket *so, struct sockbuf *sb)
 	} else
 		ret = SU_OK;
 	if (sb->sb_flags & SB_AIO)
-		sowakeup_aio(so, sb);
-	SOCKBUF_UNLOCK(sb);
+		sowakeup_aio(so, which);
+	SOCK_BUF_UNLOCK(so, which);
 	if (ret == SU_ISCONNECTED)
 		soisconnected(so);
 	if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
 		pgsigio(&so->so_sigio, SIGIO, 0);
-	mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED);
+	SOCK_BUF_UNLOCK_ASSERT(so, which);
+}
+
+/*
+ * Do we need to notify the other side when I/O is possible?
+ */
+static __always_inline bool
+sb_notify(const struct sockbuf *sb)
+{
+	return ((sb->sb_flags & (SB_WAIT | SB_SEL | SB_ASYNC |
+	    SB_UPCALL | SB_AIO | SB_KNOTE)) != 0);
+}
+
+void
+sorwakeup_locked(struct socket *so)
+{
+	SOCK_RECVBUF_LOCK_ASSERT(so);
+	if (sb_notify(&so->so_rcv))
+		sowakeup(so, SO_RCV);
+	else
+		SOCK_RECVBUF_UNLOCK(so);
+}
+
+void
+sowwakeup_locked(struct socket *so)
+{
+	SOCK_SENDBUF_LOCK_ASSERT(so);
+	if (sb_notify(&so->so_snd))
+		sowakeup(so, SO_SND);
+	else
+		SOCK_SENDBUF_UNLOCK(so);
 }
 
 /*
@@ -557,11 +591,11 @@ soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
 {
 	struct thread *td = curthread;
 
-	SOCKBUF_LOCK(&so->so_snd);
-	SOCKBUF_LOCK(&so->so_rcv);
-	if (sbreserve_locked(&so->so_snd, sndcc, so, td) == 0)
+	SOCK_SENDBUF_LOCK(so);
+	SOCK_RECVBUF_LOCK(so);
+	if (sbreserve_locked(so, SO_SND, sndcc, td) == 0)
 		goto bad;
-	if (sbreserve_locked(&so->so_rcv, rcvcc, so, td) == 0)
+	if (sbreserve_locked(so, SO_RCV, rcvcc, td) == 0)
 		goto bad2;
 	if (so->so_rcv.sb_lowat == 0)
 		so->so_rcv.sb_lowat = 1;
@@ -569,14 +603,14 @@ soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
 		so->so_snd.sb_lowat = MCLBYTES;
 	if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
 		so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
-	SOCKBUF_UNLOCK(&so->so_rcv);
-	SOCKBUF_UNLOCK(&so->so_snd);
+	SOCK_RECVBUF_UNLOCK(so);
+	SOCK_SENDBUF_UNLOCK(so);
 	return (0);
 bad2:
-	sbrelease_locked(&so->so_snd, so);
+	sbrelease_locked(so, SO_SND);
 bad:
-	SOCKBUF_UNLOCK(&so->so_rcv);
-	SOCKBUF_UNLOCK(&so->so_snd);
+	SOCK_RECVBUF_UNLOCK(so);
+	SOCK_SENDBUF_UNLOCK(so);
 	return (ENOBUFS);
 }
 
@@ -600,13 +634,14 @@ sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS)
  * Allot mbufs to a sockbuf.  Attempt to scale mbmax so that mbcnt doesn't
  * become limiting if buffering efficiency is near the normal case.
  */
-int
-sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so,
+bool
+sbreserve_locked(struct socket *so, sb_which which, u_long cc,
     struct thread *td)
 {
+	struct sockbuf *sb = sobuf(so, which);
 	rlim_t sbsize_limit;
 
-	SOCKBUF_LOCK_ASSERT(sb);
+	SOCK_BUF_LOCK_ASSERT(so, which);
 
 	/*
 	 * When a thread is passed, we take into account the thread's socket
@@ -616,24 +651,25 @@ sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so,
 	 * we don't apply a process limit.
 	 */
 	if (cc > sb_max_adj)
-		return (0);
+		return (false);
 	if (td != NULL) {
 		sbsize_limit = lim_cur(td, RLIMIT_SBSIZE);
 	} else
 		sbsize_limit = RLIM_INFINITY;
 	if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc,
 	    sbsize_limit))
-		return (0);
+		return (false);
 	sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
 	if (sb->sb_lowat > sb->sb_hiwat)
 		sb->sb_lowat = sb->sb_hiwat;
-	return (1);
+	return (true);
 }
 
 int
 sbsetopt(struct socket *so, int cmd, u_long cc)
 {
 	struct sockbuf *sb;
+	sb_which wh;
 	short *flags;
 	u_int *hiwat, *lowat;
 	int error;
@@ -660,16 +696,18 @@ sbsetopt(struct socket *so, int cmd, u_long cc)
 			case SO_SNDLOWAT:
 			case SO_SNDBUF:
 				sb = &so->so_snd;
+				wh = SO_SND;
 				break;
 			case SO_RCVLOWAT:
 			case SO_RCVBUF:
 				sb = &so->so_rcv;
+				wh = SO_RCV;
 				break;
 		}
 		flags = &sb->sb_flags;
 		hiwat = &sb->sb_hiwat;
 		lowat = &sb->sb_lowat;
-		SOCKBUF_LOCK(sb);
+		SOCK_BUF_LOCK(so, wh);
 	}
 
 	error = 0;
@@ -685,7 +723,7 @@ sbsetopt(struct socket *so, int cmd, u_long cc)
 			if (*lowat > *hiwat)
 				*lowat = *hiwat;
 		} else {
-			if (!sbreserve_locked(sb, cc, so, curthread))
+			if (!sbreserve_locked(so, wh, cc, curthread))
 				error = ENOBUFS;
 		}
 		if (error == 0)
@@ -702,7 +740,7 @@ sbsetopt(struct socket *so, int cmd, u_long cc)
 	}
 
 	if (!SOLISTENING(so))
-		SOCKBUF_UNLOCK(sb);
+		SOCK_BUF_UNLOCK(so, wh);
 	SOCK_UNLOCK(so);
 	return (error);
 }
@@ -711,8 +749,9 @@ sbsetopt(struct socket *so, int cmd, u_long cc)
  * Free mbufs held by a socket, and reserved mbuf space.
  */
 static void
-sbrelease_internal(struct sockbuf *sb, struct socket *so)
+sbrelease_internal(struct socket *so, sb_which which)
 {
+	struct sockbuf *sb = sobuf(so, which);
 
 	sbflush_internal(sb);
 	(void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0,
@@ -721,33 +760,34 @@ sbrelease_internal(struct sockbuf *sb, struct socket *so)
 }
 
 void
-sbrelease_locked(struct sockbuf *sb, struct socket *so)
+sbrelease_locked(struct socket *so, sb_which which)
 {
 
-	SOCKBUF_LOCK_ASSERT(sb);
+	SOCK_BUF_LOCK_ASSERT(so, which);
 
-	sbrelease_internal(sb, so);
+	sbrelease_internal(so, which);
 }
 
 void
-sbrelease(struct sockbuf *sb, struct socket *so)
+sbrelease(struct socket *so, sb_which which)
 {
 
-	SOCKBUF_LOCK(sb);
-	sbrelease_locked(sb, so);
-	SOCKBUF_UNLOCK(sb);
+	SOCK_BUF_LOCK(so, which);
+	sbrelease_locked(so, which);
+	SOCK_BUF_UNLOCK(so, which);
 }
 
 void
-sbdestroy(struct sockbuf *sb, struct socket *so)
+sbdestroy(struct socket *so, sb_which which)
 {
-
-	sbrelease_internal(sb, so);
 #ifdef KERN_TLS
+	struct sockbuf *sb = sobuf(so, which);
+
 	if (sb->sb_tls_info != NULL)
 		ktls_free(sb->sb_tls_info);
 	sb->sb_tls_info = NULL;
 #endif
+	sbrelease_internal(so, which);
 }
 
 /*
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 628730171715..49a2b5773cc6 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -420,8 +420,8 @@ soalloc(struct vnet *vnet)
 	mtx_init(&so->so_lock, "socket", NULL, MTX_DEF | MTX_DUPOK);
 	so->so_snd.sb_mtx = &so->so_snd_mtx;
 	so->so_rcv.sb_mtx = &so->so_rcv_mtx;
-	SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd");
-	SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv");
+	mtx_init(&so->so_snd_mtx, "so_snd", NULL, MTX_DEF);
+	mtx_init(&so->so_rcv_mtx, "so_rcv", NULL, MTX_DEF);
 	so->so_rcv.sb_sel = &so->so_rdsel;
 	so->so_snd.sb_sel = &so->so_wrsel;
 	sx_init(&so->so_snd_sx, "so_snd_sx");
@@ -491,8 +491,8 @@ sodealloc(struct socket *so)
 			    &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
 		sx_destroy(&so->so_snd_sx);
 		sx_destroy(&so->so_rcv_sx);
-		SOCKBUF_LOCK_DESTROY(&so->so_snd);
-		SOCKBUF_LOCK_DESTROY(&so->so_rcv);
+		mtx_destroy(&so->so_snd_mtx);
+		mtx_destroy(&so->so_rcv_mtx);
 	}
 	crfree(so->so_cred);
 	mtx_destroy(&so->so_lock);
@@ -990,8 +990,8 @@ solisten_proto(struct socket *so, int backlog)
 	sbrcv_timeo = so->so_rcv.sb_timeo;
 	sbsnd_timeo = so->so_snd.sb_timeo;
 
-	sbdestroy(&so->so_snd, so);
-	sbdestroy(&so->so_rcv, so);
+	sbdestroy(so, SO_SND);
+	sbdestroy(so, SO_RCV);
 
 #ifdef INVARIANTS
 	bzero(&so->so_rcv,
@@ -1208,8 +1208,8 @@ sofree(struct socket *so)
 	 * to be acquired or held.
 	 */
 	if (!SOLISTENING(so)) {
-		sbdestroy(&so->so_snd, so);
-		sbdestroy(&so->so_rcv, so);
+		sbdestroy(so, SO_SND);
+		sbdestroy(so, SO_RCV);
 	}
 	seldrain(&so->so_rdsel);
 	seldrain(&so->so_wrsel);
@@ -1735,7 +1735,7 @@ restart:
 				error = EWOULDBLOCK;
 				goto release;
 			}
-			error = sbwait(&so->so_snd);
+			error = sbwait(so, SO_SND);
 			SOCKBUF_UNLOCK(&so->so_snd);
 			if (error)
 				goto release;
@@ -2067,7 +2067,7 @@ restart:
 		}
 		SBLASTRECORDCHK(&so->so_rcv);
 		SBLASTMBUFCHK(&so->so_rcv);
-		error = sbwait(&so->so_rcv);
+		error = sbwait(so, SO_RCV);
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		if (error)
 			goto release;
@@ -2389,7 +2389,7 @@ dontblock:
 			 * the protocol. Skip blocking in this case.
 			 */
 			if (so->so_rcv.sb_mb == NULL) {
-				error = sbwait(&so->so_rcv);
+				error = sbwait(so, SO_RCV);
 				if (error) {
 					SOCKBUF_UNLOCK(&so->so_rcv);
 					goto release;
@@ -2570,7 +2570,7 @@ restart:
 	 * Wait and block until (more) data comes in.
 	 * NB: Drops the sockbuf lock during wait.
 	 */
-	error = sbwait(sb);
+	error = sbwait(so, SO_RCV);
 	if (error)
 		goto out;
 	goto restart;
@@ -2742,7 +2742,7 @@ soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio,
 		}
 		SBLASTRECORDCHK(&so->so_rcv);
 		SBLASTMBUFCHK(&so->so_rcv);
-		error = sbwait(&so->so_rcv);
+		error = sbwait(so, SO_RCV);
 		if (error) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			return (error);
@@ -2960,7 +2960,7 @@ sorflush(struct socket *so)
 		MPASS(pr->pr_domain->dom_dispose != NULL);
 		(*pr->pr_domain->dom_dispose)(so);
 	} else {
-		sbrelease(&so->so_rcv, so);
+		sbrelease(so, SO_RCV);
 		SOCK_IO_RECV_UNLOCK(so);
 	}
 
@@ -3610,8 +3610,8 @@ sopoll_generic(struct socket *so, int events, struct ucred *active_cred,
 		}
 	} else {
 		revents = 0;
-		SOCKBUF_LOCK(&so->so_snd);
-		SOCKBUF_LOCK(&so->so_rcv);
+		SOCK_SENDBUF_LOCK(so);
+		SOCK_RECVBUF_LOCK(so);
 		if (events & (POLLIN | POLLRDNORM))
 			if (soreadabledata(so))
 				revents |= events & (POLLIN | POLLRDNORM);
@@ -3642,8 +3642,8 @@ sopoll_generic(struct socket *so, int events, struct ucred *active_cred,
 				so->so_snd.sb_flags |= SB_SEL;
 			}
 		}
-		SOCKBUF_UNLOCK(&so->so_rcv);
-		SOCKBUF_UNLOCK(&so->so_snd);
+		SOCK_RECVBUF_UNLOCK(so);
+		SOCK_SENDBUF_UNLOCK(so);
 	}
 	SOCK_UNLOCK(so);
 	return (revents);
@@ -4297,12 +4297,12 @@ so_rdknl_assert_lock(void *arg, int what)
 		if (SOLISTENING(so))
 			SOCK_LOCK_ASSERT(so);
 		else
-			SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+			SOCK_RECVBUF_LOCK_ASSERT(so);
 	} else {
 		if (SOLISTENING(so))
 			SOCK_UNLOCK_ASSERT(so);
 		else
-			SOCKBUF_UNLOCK_ASSERT(&so->so_rcv);
+			SOCK_RECVBUF_UNLOCK_ASSERT(so);
 	}
 }
 
@@ -4314,7 +4314,7 @@ so_wrknl_lock(void *arg)
 	if (SOLISTENING(so))
 		SOCK_LOCK(so);
 	else
-		SOCKBUF_LOCK(&so->so_snd);
+		SOCK_SENDBUF_LOCK(so);
 }
 
 static void
@@ -4325,7 +4325,7 @@ so_wrknl_unlock(void *arg)
 	if (SOLISTENING(so))
 		SOCK_UNLOCK(so);
 	else
-		SOCKBUF_UNLOCK(&so->so_snd);
+		SOCK_SENDBUF_UNLOCK(so);
 }
 
 static void
@@ -4337,12 +4337,12 @@ so_wrknl_assert_lock(void *arg, int what)
 		if (SOLISTENING(so))
 			SOCK_LOCK_ASSERT(so);
 		else
-			SOCKBUF_LOCK_ASSERT(&so->so_snd);
+			SOCK_SENDBUF_LOCK_ASSERT(so);
 	} else {
 		if (SOLISTENING(so))
 			SOCK_UNLOCK_ASSERT(so);
 		else
-			SOCKBUF_UNLOCK_ASSERT(&so->so_snd);
+			SOCK_SENDBUF_UNLOCK_ASSERT(so);
 	}
 }
 
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index efa586d346c5..b326dbd825a6 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -2771,7 +2771,7 @@ unp_dispose(struct socket *so)
 	KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0,
 	    ("%s: ccc %u mb %p mbcnt %u", __func__,
 	    sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt));
-	sbrelease_locked(sb, so);
+	sbrelease_locked(so, SO_RCV);
 	SOCK_RECVBUF_UNLOCK(so);
 	if (SOCK_IO_RECV_OWNED(so))
 		SOCK_IO_RECV_UNLOCK(so);
diff --git a/sys/netinet/sctp_output.c b/sys/netinet/sctp_output.c
index 9a8927160441..9e2e70313be3 100644
--- a/sys/netinet/sctp_output.c
+++ b/sys/netinet/sctp_output.c
@@ -12992,7 +12992,7 @@ sctp_lower_sosend(struct socket *so,
 			stcb->block_entry = &be;
 			SCTP_TCB_UNLOCK(stcb);
 			hold_tcblock = false;
-			error = sbwait(&so->so_snd);
+			error = sbwait(so, SO_SND);
 			if (error == 0) {
 				if (so->so_error != 0) {
 					error = so->so_error;
@@ -13352,7 +13352,7 @@ skip_preblock:
 				stcb->block_entry = &be;
 				SCTP_TCB_UNLOCK(stcb);
 				hold_tcblock = false;
-				error = sbwait(&so->so_snd);
+				error = sbwait(so, SO_SND);
 				if (error == 0) {
 					if (so->so_error != 0)
 						error = so->so_error;
diff --git a/sys/netinet/sctputil.c b/sys/netinet/sctputil.c
index 8451ed5e2007..e20a49be1adb 100644
--- a/sys/netinet/sctputil.c
+++ b/sys/netinet/sctputil.c
@@ -5640,7 +5640,7 @@ restart_nosblocks:
 			}
 		}
 		if (block_allowed) {
-			error = sbwait(&so->so_rcv);
+			error = sbwait(so, SO_RCV);
 			if (error) {
 				goto out;
 			}
@@ -6255,7 +6255,7 @@ wait_some_more:
 			goto release;
 		}
 		if (so->so_rcv.sb_cc <= control->held_length) {
-			error = sbwait(&so->so_rcv);
+			error = sbwait(so, SO_RCV);
 			if (error) {
 				goto release;
 			}
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 695cd5a916db..be86ceca445c 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -1921,8 +1921,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
 				 * Give up when limit is reached.
 				 */
 				if (newsize)
-					if (!sbreserve_locked(&so->so_rcv,
-					    newsize, so, NULL))
+					if (!sbreserve_locked(so, SO_RCV,
+					    newsize, NULL))
 						so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
 				m_adj(m, drop_hdrlen);	/* delayed header drop */
 				sbappendstream_locked(&so->so_rcv, m, 0);
@@ -3848,7 +3848,7 @@ tcp_mss(struct tcpcb *tp, int offer)
 		if (bufsize > sb_max)
 			bufsize = sb_max;
 		if (bufsize > so->so_snd.sb_hiwat)
-			(void)sbreserve_locked(&so->so_snd, bufsize, so, NULL);
+			(void)sbreserve_locked(so, SO_SND, bufsize, NULL);
 	}
 	SOCKBUF_UNLOCK(&so->so_snd);
 	/*
@@ -3871,7 +3871,7 @@ tcp_mss(struct tcpcb *tp, int offer)
 		if (bufsize > sb_max)
 			bufsize = sb_max;
 		if (bufsize > so->so_rcv.sb_hiwat)
-			(void)sbreserve_locked(&so->so_rcv, bufsize, so, NULL);
+			(void)sbreserve_locked(so, SO_RCV, bufsize, NULL);
 	}
 	SOCKBUF_UNLOCK(&so->so_rcv);
 
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index 299f1d034717..b0dedb1a24d1 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -2166,9 +2166,9 @@ tcp_sndbuf_autoscale(struct tcpcb *tp, struct socket *so, uint32_t sendwin)
 		    sbused(&so->so_snd) < V_tcp_autosndbuf_max &&
 		    sendwin >= (sbused(&so->so_snd) -
 		    (tp->snd_nxt - tp->snd_una))) {
-			if (!sbreserve_locked(&so->so_snd,
+			if (!sbreserve_locked(so, SO_SND,
 			    min(so->so_snd.sb_hiwat + V_tcp_autosndbuf_inc,
-			     V_tcp_autosndbuf_max), so, curthread))
+			     V_tcp_autosndbuf_max), curthread))
 				so->so_snd.sb_flags &= ~SB_AUTOSIZE;
 		}
 	}
diff --git a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c
index c822276185cb..def5edb98983 100644
--- a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c
+++ b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c
@@ -1142,7 +1142,7 @@ restart:
 				error = EWOULDBLOCK;
 				goto release;
 			}
-			error = sbwait(&so->so_snd);
+			error = sbwait(so, SO_SND);
 			SOCKBUF_UNLOCK(&so->so_snd);
 			if (error)
 				goto release;
@@ -1336,7 +1336,7 @@ restart:
 	 * Wait and block until (more) data comes in.
 	 * NB: Drops the sockbuf lock during wait.
 	 */
-	error = sbwait(sb);
+	error = sbwait(so, SO_RCV);
 	if (error)
 		goto out;
 	goto restart;
diff --git a/sys/rpc/clnt_bck.c b/sys/rpc/clnt_bck.c
index 514905bf1cc2..810a957bb97b 100644
--- a/sys/rpc/clnt_bck.c
+++ b/sys/rpc/clnt_bck.c
@@ -326,7 +326,7 @@ if (error != 0) printf("sosend=%d\n", error);
 	if (error == EMSGSIZE) {
 printf("emsgsize\n");
 		SOCKBUF_LOCK(&xprt->xp_socket->so_snd);
-		sbwait(&xprt->xp_socket->so_snd);
+		sbwait(xprt->xp_socket, SO_SND);
 		SOCKBUF_UNLOCK(&xprt->xp_socket->so_snd);
 		sx_xunlock(&xprt->xp_lock);
 		AUTH_VALIDATE(auth, xid, NULL, NULL);
diff --git a/sys/rpc/clnt_vc.c b/sys/rpc/clnt_vc.c
index dfada2bea388..f565de06f4bd 100644
--- a/sys/rpc/clnt_vc.c
+++ b/sys/rpc/clnt_vc.c
@@ -447,7 +447,7 @@ call_again:
 	if (error == EMSGSIZE || (error == ERESTART &&
 	    (ct->ct_waitflag & PCATCH) == 0 && trycnt-- > 0)) {
 		SOCKBUF_LOCK(&ct->ct_socket->so_snd);
-		sbwait(&ct->ct_socket->so_snd);
+		sbwait(ct->ct_socket, SO_SND);
 		SOCKBUF_UNLOCK(&ct->ct_socket->so_snd);
 		AUTH_VALIDATE(auth, xid, NULL, NULL);
 		mtx_lock(&ct->ct_lock);
diff --git a/sys/sys/sockbuf.h b/sys/sys/sockbuf.h
index 372f04eba54c..2484407d557c 100644
--- a/sys/sys/sockbuf.h
+++ b/sys/sys/sockbuf.h
@@ -116,6 +116,9 @@ struct sockbuf {
 #endif	/* defined(_KERNEL) || defined(_WANT_SOCKET) */
 #ifdef _KERNEL
 
+/* 'which' values for KPIs that operate on one buffer of a socket. */
+typedef enum { SO_RCV, SO_SND } sb_which;
+
 /*
  * Per-socket buffer mutex used to protect most fields in the socket buffer.
  * These make use of the mutex pointer embedded in struct sockbuf, which
@@ -124,9 +127,6 @@ struct sockbuf {
  * these locking macros.
  */
 #define	SOCKBUF_MTX(_sb)		((_sb)->sb_mtx)
-#define	SOCKBUF_LOCK_INIT(_sb, _name) \
-	mtx_init(SOCKBUF_MTX(_sb), _name, NULL, MTX_DEF)
-#define	SOCKBUF_LOCK_DESTROY(_sb)	mtx_destroy(SOCKBUF_MTX(_sb))
 #define	SOCKBUF_LOCK(_sb)		mtx_lock(SOCKBUF_MTX(_sb))
 #define	SOCKBUF_OWNED(_sb)		mtx_owned(SOCKBUF_MTX(_sb))
 #define	SOCKBUF_UNLOCK(_sb)		mtx_unlock(SOCKBUF_MTX(_sb))
@@ -162,7 +162,7 @@ struct mbuf *
 struct mbuf *
 	sbcreatecontrol_how(void *p, int size, int type, int level,
 	    int wait);
-void	sbdestroy(struct sockbuf *sb, struct socket *so);
+void	sbdestroy(struct socket *, sb_which);
 void	sbdrop(struct sockbuf *sb, int len);
 void	sbdrop_locked(struct sockbuf *sb, int len);
 struct mbuf *
@@ -171,17 +171,17 @@ void	sbdroprecord(struct sockbuf *sb);
 void	sbdroprecord_locked(struct sockbuf *sb);
 void	sbflush(struct sockbuf *sb);
 void	sbflush_locked(struct sockbuf *sb);
-void	sbrelease(struct sockbuf *sb, struct socket *so);
-void	sbrelease_locked(struct sockbuf *sb, struct socket *so);
+void	sbrelease(struct socket *, sb_which);
+void	sbrelease_locked(struct socket *, sb_which);
 int	sbsetopt(struct socket *so, int cmd, u_long cc);
-int	sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so,
+bool	sbreserve_locked(struct socket *so, sb_which which, u_long cc,
 	    struct thread *td);
 void	sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, u_int len);
 struct mbuf *
 	sbsndptr_noadv(struct sockbuf *sb, u_int off, u_int *moff);
 struct mbuf *
 	sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff);
-int	sbwait(struct sockbuf *sb);
+int	sbwait(struct socket *, sb_which);
 void	sballoc(struct sockbuf *, struct mbuf *);
 void	sbfree(struct sockbuf *, struct mbuf *);
 void	sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m);
diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h
index fe6faa842bda..05eefd7e4fd4 100644
--- a/sys/sys/socketvar.h
+++ b/sys/sys/socketvar.h
@@ -77,8 +77,8 @@ enum socket_qstate {
  * Locking key to struct socket:
  * (a) constant after allocation, no locking required.
  * (b) locked by SOCK_LOCK(so).
- * (cr) locked by SOCK_RECVBUF_LOCK(so)/SOCKBUF_LOCK(&so->so_rcv).
- * (cs) locked by SOCK_SENDBUF_LOCK(so)/SOCKBUF_LOCK(&so->so_snd).
+ * (cr) locked by SOCK_RECVBUF_LOCK(so)
+ * (cs) locked by SOCK_SENDBUF_LOCK(so)
  * (e) locked by SOLISTEN_LOCK() of corresponding listening socket.
  * (f) not locked since integer reads/writes are atomic.
  * (g) used only as a sleep/wakeup address, no value.
@@ -256,8 +256,8 @@ struct socket {
 } while (0)
 
 /*
- * Socket buffer locks.  These manipulate the same mutexes as SOCKBUF_LOCK()
- * and related macros.
+ * Socket buffer locks.  These are strongly preferred over SOCKBUF_LOCK(sb)
+ * macros, as we are moving towards protocol specific socket buffers.
  */
 #define	SOCK_RECVBUF_MTX(so)						\
 	(&(so)->so_rcv_mtx)
@@ -281,8 +281,26 @@ struct socket {
 #define	SOCK_SENDBUF_UNLOCK_ASSERT(so)					\
 	mtx_assert(SOCK_SENDBUF_MTX(so), MA_NOTOWNED)
 
-/* 'which' values for socket buffer events and upcalls. */
-typedef enum { SO_RCV, SO_SND } sb_which;
+#define	SOCK_BUF_LOCK(so, which)					\
+	mtx_lock(soeventmtx(so, which))
+#define	SOCK_BUF_UNLOCK(so, which)					\
+	mtx_unlock(soeventmtx(so, which))
+#define	SOCK_BUF_LOCK_ASSERT(so, which)					\
+	mtx_assert(soeventmtx(so, which), MA_OWNED)
+#define	SOCK_BUF_UNLOCK_ASSERT(so, which)				\
+	mtx_assert(soeventmtx(so, which), MA_NOTOWNED)
+
+static inline struct sockbuf *
+sobuf(struct socket *so, const sb_which which)
+{
+	return (which == SO_RCV ? &so->so_rcv : &so->so_snd);
+}
+
+static inline struct mtx *
+soeventmtx(struct socket *so, const sb_which which)
+{
+	return (which == SO_RCV ? SOCK_RECVBUF_MTX(so) : SOCK_SENDBUF_MTX(so));
+}
 
 /*
  * Macros for sockets and socket buffering.
*** 57 LINES SKIPPED ***