git: 1a8d176432e7 - main - inpcb: fully retire inp_ppcb pointer

From: Gleb Smirnoff <glebius_at_FreeBSD.org>
Date: Fri, 29 Mar 2024 19:19:57 UTC
The branch main has been updated by glebius:

URL: https://cgit.FreeBSD.org/src/commit/?id=1a8d176432e76d7725ed1ac0b44f63ac6cc82397

commit 1a8d176432e76d7725ed1ac0b44f63ac6cc82397
Author:     Gleb Smirnoff <glebius@FreeBSD.org>
AuthorDate: 2024-03-29 19:16:59 +0000
Commit:     Gleb Smirnoff <glebius@FreeBSD.org>
CommitDate: 2024-03-29 19:18:32 +0000

    inpcb: fully retire inp_ppcb pointer
    
    Before a protocol specific control block started to embed inpcb in self
    (see 0aa120d52f3c, e68b3792440c, 483fe96511ec) this pointer used to point
    at it.
    
    Retain kf_sock_inpcb field in the struct kinfo_file in <sys/user.h>.  The
    exp-run detected a minimal use of the field in ports:
      * sysutils/lsof - patched upstream
      * net-mgmt/netdata  - patch accepted upstream
      * emulators/qemu-user-static - upstream master branch seems not using
        the field anymore
    We can keep the field around for some time, but eventually it may be
    reused for something else.
    
    PR:                     277659 (exp-run)
    Reviewed by:            tuexen
    Differential Revision:  https://reviews.freebsd.org/D44491
---
 lib/libprocstat/libprocstat.c        | 29 ++++-------------------------
 lib/libprocstat/libprocstat.h        |  1 -
 lib/libprocstat/libprocstat_compat.c |  2 +-
 sys/kern/sys_socket.c                |  8 ++------
 sys/netinet/in_pcb.c                 |  9 ---------
 sys/netinet/in_pcb.h                 |  9 ++++-----
 sys/netinet/tcp_subr.c               |  7 -------
 sys/sys/user.h                       |  2 +-
 usr.bin/fstat/fstat.c                | 15 +++++----------
 usr.bin/systat/netstat.c             | 28 +++++++++++++---------------
 10 files changed, 30 insertions(+), 80 deletions(-)

diff --git a/lib/libprocstat/libprocstat.c b/lib/libprocstat/libprocstat.c
index 8f1f58d12957..0c7d28540e43 100644
--- a/lib/libprocstat/libprocstat.c
+++ b/lib/libprocstat/libprocstat.c
@@ -83,8 +83,6 @@
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
-#define	_WANT_INPCB
-#include <netinet/in_pcb.h>
 
 #include <assert.h>
 #include <ctype.h>
@@ -1473,7 +1471,6 @@ procstat_get_socket_info_kvm(kvm_t *kd, struct filestat *fst,
     struct sockstat *sock, char *errbuf)
 {
 	struct domain dom;
-	struct inpcb inpcb;
 	struct protosw proto;
 	struct socket s;
 	struct unpcb unpcb;
@@ -1522,28 +1519,15 @@ procstat_get_socket_info_kvm(kvm_t *kd, struct filestat *fst,
 	sock->proto = proto.pr_protocol;
 	sock->dom_family = dom.dom_family;
 	sock->so_pcb = (uintptr_t)s.so_pcb;
+	sock->sendq = s.so_snd.sb_ccc;
+	sock->recvq = s.so_rcv.sb_ccc;
+	sock->so_rcv_sb_state = s.so_rcv.sb_state;
+	sock->so_snd_sb_state = s.so_snd.sb_state;
 
 	/*
 	 * Protocol specific data.
 	 */
 	switch (dom.dom_family) {
-	case AF_INET:
-	case AF_INET6:
-		if (proto.pr_protocol == IPPROTO_TCP) {
-			if (s.so_pcb) {
-				if (kvm_read(kd, (u_long)s.so_pcb,
-				    (char *)&inpcb, sizeof(struct inpcb))
-				    != sizeof(struct inpcb)) {
-					warnx("can't read inpcb at %p",
-					    (void *)s.so_pcb);
-				} else
-					sock->inp_ppcb =
-					    (uintptr_t)inpcb.inp_ppcb;
-				sock->sendq = s.so_snd.sb_ccc;
-				sock->recvq = s.so_rcv.sb_ccc;
-			}
-		}
-		break;
 	case AF_UNIX:
 		if (s.so_pcb) {
 			if (kvm_read(kd, (u_long)s.so_pcb, (char *)&unpcb,
@@ -1551,11 +1535,7 @@ procstat_get_socket_info_kvm(kvm_t *kd, struct filestat *fst,
 				warnx("can't read unpcb at %p",
 				    (void *)s.so_pcb);
 			} else if (unpcb.unp_conn) {
-				sock->so_rcv_sb_state = s.so_rcv.sb_state;
-				sock->so_snd_sb_state = s.so_snd.sb_state;
 				sock->unp_conn = (uintptr_t)unpcb.unp_conn;
-				sock->sendq = s.so_snd.sb_ccc;
-				sock->recvq = s.so_rcv.sb_ccc;
 			}
 		}
 		break;
@@ -1603,7 +1583,6 @@ procstat_get_socket_info_sysctl(struct filestat *fst, struct sockstat *sock,
 	case AF_INET:
 	case AF_INET6:
 		if (sock->proto == IPPROTO_TCP) {
-			sock->inp_ppcb = kif->kf_un.kf_sock.kf_sock_inpcb;
 			sock->sendq = kif->kf_un.kf_sock.kf_sock_sendq;
 			sock->recvq = kif->kf_un.kf_sock.kf_sock_recvq;
 		}
diff --git a/lib/libprocstat/libprocstat.h b/lib/libprocstat/libprocstat.h
index 3d30b4db4018..4aba1610a05f 100644
--- a/lib/libprocstat/libprocstat.h
+++ b/lib/libprocstat/libprocstat.h
@@ -154,7 +154,6 @@ struct shmstat {
 	uint16_t	mode;
 };
 struct sockstat {
-	uint64_t	inp_ppcb;
 	uint64_t	so_addr;
 	uint64_t	so_pcb;
 	uint64_t	unp_conn;
diff --git a/lib/libprocstat/libprocstat_compat.c b/lib/libprocstat/libprocstat_compat.c
index 472527973323..63eb35678752 100644
--- a/lib/libprocstat/libprocstat_compat.c
+++ b/lib/libprocstat/libprocstat_compat.c
@@ -182,7 +182,7 @@ freebsd11_procstat_get_socket_info(struct procstat *procstat, struct filestat *f
 	r = procstat_get_socket_info(procstat, fst, &sock, errbuf);
 	if (r != 0)
 		return (r);
-	sock_compat->inp_ppcb = sock.inp_ppcb;
+	sock_compat->inp_ppcb = sock.so_pcb;
 	sock_compat->so_addr = sock.so_addr;
 	sock_compat->so_pcb = sock.so_pcb;
 	sock_compat->unp_conn = sock.unp_conn;
diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c
index 7854a4ebdf52..55837e4cb6ce 100644
--- a/sys/kern/sys_socket.c
+++ b/sys/kern/sys_socket.c
@@ -357,7 +357,6 @@ static int
 soo_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
 {
 	struct sockaddr_storage ss = { .ss_len = sizeof(ss) };
-	struct inpcb *inpcb;
 	struct unpcb *unpcb;
 	struct socket *so;
 	int error;
@@ -373,11 +372,8 @@ soo_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
 	switch (kif->kf_un.kf_sock.kf_sock_domain0) {
 	case AF_INET:
 	case AF_INET6:
-		if (so->so_pcb != NULL) {
-			inpcb = (struct inpcb *)(so->so_pcb);
-			kif->kf_un.kf_sock.kf_sock_inpcb =
-			    (uintptr_t)inpcb->inp_ppcb;
-		}
+		/* XXX: kf_sock_inpcb is obsolete.  It may be removed. */
+		kif->kf_un.kf_sock.kf_sock_inpcb = (uintptr_t)so->so_pcb;
 		kif->kf_un.kf_sock.kf_sock_rcv_sb_state =
 		    so->so_rcv.sb_state;
 		kif->kf_un.kf_sock.kf_sock_snd_sb_state =
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
index d966f8e386fe..d9caad6417ef 100644
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -1805,10 +1805,6 @@ in_pcbdrop(struct inpcb *inp)
 {
 
 	INP_WLOCK_ASSERT(inp);
-#ifdef INVARIANTS
-	if (inp->inp_socket != NULL && inp->inp_ppcb != NULL)
-		MPASS(inp->inp_refcount > 1);
-#endif
 
 	inp->inp_flags |= INP_DROPPED;
 	if (inp->inp_flags & INP_INHASHLIST)
@@ -2865,7 +2861,6 @@ in_pcbtoxinpcb(const struct inpcb *inp, struct xinpcb *xi)
 		sotoxsocket(inp->inp_socket, &xi->xi_socket);
 	bcopy(&inp->inp_inc, &xi->inp_inc, sizeof(struct in_conninfo));
 	xi->inp_gencnt = inp->inp_gencnt;
-	xi->inp_ppcb = (uintptr_t)inp->inp_ppcb;
 	xi->inp_flow = inp->inp_flow;
 	xi->inp_flowid = inp->inp_flowid;
 	xi->inp_flowtype = inp->inp_flowtype;
@@ -3150,10 +3145,6 @@ db_print_inpcb(struct inpcb *inp, const char *name, int indent)
 
 	db_print_inconninfo(&inp->inp_inc, "inp_conninfo", indent);
 
-	db_print_indent(indent);
-	db_printf("inp_ppcb: %p   inp_pcbinfo: %p   inp_socket: %p\n",
-	    inp->inp_ppcb, inp->inp_pcbinfo, inp->inp_socket);
-
 	db_print_indent(indent);
 	db_printf("inp_label: %p   inp_flags: 0x%x (",
 	   inp->inp_label, inp->inp_flags);
diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
index 7e9de02b0c7c..a4b4075b3501 100644
--- a/sys/netinet/in_pcb.h
+++ b/sys/netinet/in_pcb.h
@@ -133,8 +133,9 @@ struct in_conninfo {
 /*
  * struct inpcb captures the network layer state for TCP, UDP, and raw IPv4 and
  * IPv6 sockets.  In the case of TCP and UDP, further per-connection state is
- * hung off of inp_ppcb most of the time.  Almost all fields of struct inpcb
- * are static after creation or protected by a per-inpcb rwlock, inp_lock.
+ * located in a larger protocol specific structure that embeds inpcb in it.
+ * Almost all fields of struct inpcb are static after creation or protected by
+ * a per-inpcb rwlock, inp_lock.
  *
  * A inpcb database is indexed by addresses/ports hash as well as list of
  * all pcbs that belong to a certain proto. Database lookups or list traversals
@@ -177,7 +178,6 @@ struct inpcb {
 	int	inp_flags;		/* (i) generic IP/datagram flags */
 	int	inp_flags2;		/* (i) generic IP/datagram flags #2*/
 	uint8_t inp_numa_domain;	/* numa domain */
-	void	*inp_ppcb;		/* (i) pointer to per-protocol pcb */
 	struct	socket *inp_socket;	/* (i) back pointer to socket */
 	struct	inpcbinfo *inp_pcbinfo;	/* (c) PCB list info */
 	struct	ucred	*inp_cred;	/* (c) cache of socket cred */
@@ -266,8 +266,7 @@ struct xinpcb {
 	struct xsocket	xi_socket;		/* (s,p) */
 	struct in_conninfo inp_inc;		/* (s,p) */
 	uint64_t	inp_gencnt;		/* (s,p) */
-	kvaddr_t	inp_ppcb;		/* (s) netstat(1) */
-	int64_t		inp_spare64[4];
+	int64_t		inp_spare64[5];
 	uint32_t	inp_flow;		/* (s) */
 	uint32_t	inp_flowid;		/* (s) */
 	uint32_t	inp_flowtype;		/* (s) */
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index a6f84c297688..db335a890b28 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -3980,13 +3980,6 @@ tcp_inptoxtp(const struct inpcb *inp, struct xtcpcb *xt)
 
 	xt->xt_len = sizeof(struct xtcpcb);
 	in_pcbtoxinpcb(inp, &xt->xt_inp);
-	/*
-	 * TCP doesn't use inp_ppcb pointer, we embed inpcb into tcpcb.
-	 * Fixup the pointer that in_pcbtoxinpcb() has set.  When printing
-	 * TCP netstat(1) used to use this pointer, so this fixup needs to
-	 * stay for stable/14.
-	 */
-	xt->xt_inp.inp_ppcb = (uintptr_t)tp;
 }
 
 void
diff --git a/sys/sys/user.h b/sys/sys/user.h
index d70cfe3bd718..e76b2a66ae94 100644
--- a/sys/sys/user.h
+++ b/sys/sys/user.h
@@ -371,7 +371,7 @@ struct kinfo_file {
 				struct sockaddr_storage	kf_sa_peer;
 				/* Address of so_pcb. */
 				uint64_t	kf_sock_pcb;
-				/* Address of inp_ppcb. */
+				/* Obsolete! May be reused as a spare. */
 				uint64_t	kf_sock_inpcb;
 				/* Address of unp_conn. */
 				uint64_t	kf_sock_unpconn;
diff --git a/usr.bin/fstat/fstat.c b/usr.bin/fstat/fstat.c
index 328e35e9138f..e5d0755062d0 100644
--- a/usr.bin/fstat/fstat.c
+++ b/usr.bin/fstat/fstat.c
@@ -400,11 +400,10 @@ print_socket_info(struct procstat *procstat, struct filestat *fst)
 	/*
 	 * protocol specific formatting
 	 *
-	 * Try to find interesting things to print.  For tcp, the interesting
-	 * thing is the address of the tcpcb, for udp and others, just the
-	 * inpcb (socket pcb).  For unix domain, its the address of the socket
-	 * pcb and the address of the connected pcb (if connected).  Otherwise
-	 * just print the protocol number and address of the socket itself.
+	 * Try to find interesting things to print.  For internet and unix
+	 * sockets, its the address of the socket pcb.  For unix it is also the
+	 * address of the connected pcb (if connected).  Otherwise just print
+	 * the protocol number and address of the socket itself.
 	 * The idea is not to duplicate netstat, but to make available enough
 	 * information for further analysis.
 	 */
@@ -417,11 +416,7 @@ print_socket_info(struct procstat *procstat, struct filestat *fst)
 			printf(" %s", pe->p_name);
 		else
 			printf(" %d", sock.proto);
-		if (sock.proto == IPPROTO_TCP ) {
-			if (sock.inp_ppcb != 0)
-				printf(" %lx", (u_long)sock.inp_ppcb);
-		}
-		else if (sock.so_pcb != 0)
+		if (sock.so_pcb != 0)
 			printf(" %lx", (u_long)sock.so_pcb);
 		if (!sflg)
 			break;
diff --git a/usr.bin/systat/netstat.c b/usr.bin/systat/netstat.c
index bc4acabe4eda..6d188bf1dd0d 100644
--- a/usr.bin/systat/netstat.c
+++ b/usr.bin/systat/netstat.c
@@ -168,12 +168,11 @@ fetchnetstat(void)
 static void
 fetchnetstat_kvm(void)
 {
-	struct inpcb *next;
 	struct netinfo *p;
 	struct inpcbhead head;
-	struct inpcb inpcb;
 	struct socket sockb;
 	struct tcpcb tcpcb;
+	struct inpcb *inpcb;
 	void *off;
 	int istcp;
 
@@ -195,32 +194,31 @@ fetchnetstat_kvm(void)
 	}
 again:
 	KREAD(off, &head, sizeof (struct inpcbhead));
-	LIST_FOREACH(next, &head, inp_list) {
-		KREAD(next, &inpcb, sizeof (inpcb));
-		next = &inpcb;
+	LIST_FOREACH(inpcb, &head, inp_list) {
+		KREAD(inpcb, &tcpcb, istcp ? sizeof(tcpcb) : sizeof(inpcb));
+		inpcb = (struct inpcb *)&tcpcb;
 		if (!aflag) {
-			if (inpcb.inp_vflag & INP_IPV4) {
-				if (inpcb.inp_laddr.s_addr == INADDR_ANY)
+			if (inpcb->inp_vflag & INP_IPV4) {
+				if (inpcb->inp_laddr.s_addr == INADDR_ANY)
 					continue;
 			}
 #ifdef INET6
-			else if (inpcb.inp_vflag & INP_IPV6) {
-				if (memcmp(&inpcb.in6p_laddr,
+			else if (inpcb->inp_vflag & INP_IPV6) {
+				if (memcmp(&inpcb->in6p_laddr,
 				    &in6addr_any, sizeof(in6addr_any)) == 0)
 					continue;
 			}
 #endif
 		}
-		if (nhosts && !checkhost(&inpcb.inp_inc))
+		if (nhosts && !checkhost(&inpcb->inp_inc))
 			continue;
-		if (nports && !checkport(&inpcb.inp_inc))
+		if (nports && !checkport(&inpcb->inp_inc))
 			continue;
 		if (istcp) {
-			KREAD(inpcb.inp_socket, &sockb, sizeof (sockb));
-			KREAD(inpcb.inp_ppcb, &tcpcb, sizeof (tcpcb));
-			enter_kvm(&inpcb, &sockb, tcpcb.t_state, "tcp");
+			KREAD(inpcb->inp_socket, &sockb, sizeof (sockb));
+			enter_kvm(inpcb, &sockb, tcpcb.t_state, "tcp");
 		} else
-			enter_kvm(&inpcb, &sockb, 0, "udp");
+			enter_kvm(inpcb, &sockb, 0, "udp");
 	}
 	if (istcp && (protos&UDP)) {
 		istcp = 0;