Re: fibnum3
- Reply: Paul Vixie : "Re: fibnum3"
- In reply to: Paul Vixie : "fibnum3"
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Mon, 09 Jun 2025 17:19:53 UTC
On Mon, Jun 09, 2025 at 04:42:55AM +0000, Paul Vixie wrote:
> ok so i've learned a lot by trying to get fibnum2 upstreamed. needed ipv6,
> needed to be more minimalistic about unrelated changes, needed to keep in
> synch with the freebsd-current tree -- the works. i also scrambled my git tree
> somehow in a way that i found unrecoverable, thus i ended up closing that PR
> after fixing everything i learned about other than man page changes.
I've made a number of inline comments.
> before i try again i'd like to go back to where it all started -- this e-mail
> list. visual review would be great, testing would be even better. for those
> who didn't keep state from last time, the intent is to let every interface
> have its own default route, and ensure inbound/outbound path symmetry. i use
> different interfaces for serving http-et-al than for doing ssh for operations/
> management, and i've never been comfortable with a single default route.
>
> diff is attached. "patch -p1" is your friend.
>
> --
> Paul Vixie
> diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
> index 6c9eb7139cd1..518ee7ee3e75 100644
> --- a/sys/kern/uipc_socket.c
> +++ b/sys/kern/uipc_socket.c
> @@ -1003,13 +1003,13 @@ SYSCTL_TIMEVAL_SEC(_kern_ipc, OID_AUTO, sooverinterval, CTLFLAG_RW,
> * accept(2), the protocol has two options:
> * 1) Call legacy sonewconn() function, which would call protocol attach
> * method, same as used for socket(2).
> - * 2) Call solisten_clone(), do attach that is specific to a cloned connection,
> - * and then call solisten_enqueue().
> + * 2) Call solisten_clone(), do an attach that is specific to a cloned
> + * connection, and then call solisten_enqueue().
> *
> * Note: the ref count on the socket is 0 on return.
> */
> struct socket *
> -solisten_clone(struct socket *head)
> +solisten_clone(struct socket *head, int fibnum)
> {
> struct sbuf descrsb;
> struct socket *so;
> @@ -1180,7 +1180,8 @@ solisten_clone(struct socket *head)
> SO_DONTROUTE | SO_LINGER | SO_OOBINLINE | SO_NOSIGPIPE);
> so->so_linger = head->so_linger;
> so->so_state = head->so_state;
> - so->so_fibnum = head->so_fibnum;
> + if ((so->so_fibnum = head->so_fibnum) == 0)
> + so->so_fibnum = fibnum;
The double assignment looks odd to me. It'd be neater to write:
so->so_fibnum == head->so_fibnum != 0 ? head->so_fibnum : fibnum;
> so->so_proto = head->so_proto;
> so->so_cred = crhold(head->so_cred);
> #ifdef SOCKET_HHOOK
> @@ -1226,7 +1227,7 @@ sonewconn(struct socket *head, int connstatus)
> {
> struct socket *so;
>
> - if ((so = solisten_clone(head)) == NULL)
> + if ((so = solisten_clone(head, head->so_fibnum)) == NULL)
> return (NULL);
>
> if (so->so_proto->pr_attach(so, 0, NULL) != 0) {
> diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
> index 95d857b2625d..29b6d2abb090 100644
> --- a/sys/kern/uipc_usrreq.c
> +++ b/sys/kern/uipc_usrreq.c
> @@ -2916,7 +2916,7 @@ unp_connectat(int fd, struct socket *so, struct sockaddr *nam,
> }
> if (connreq) {
> if (SOLISTENING(so2))
> - so2 = solisten_clone(so2);
> + so2 = solisten_clone(so2, 0);
> else
> so2 = NULL;
> if (so2 == NULL) {
> diff --git a/sys/net/if.c b/sys/net/if.c
> index 0255c27a3136..0eb8d028cba7 100644
> --- a/sys/net/if.c
> +++ b/sys/net/if.c
> @@ -1805,11 +1805,11 @@ sa_dl_equal(const struct sockaddr *a, const struct sockaddr *b)
> }
>
> /*
> - * Locate an interface based on a complete address.
> + * Locate an interface based on a complete address,
> + * and optionally retrieve its FIB number.
> */
> -/*ARGSUSED*/
> struct ifaddr *
> -ifa_ifwithaddr(const struct sockaddr *addr)
> +ifa_ifwithaddr_getfib(const struct sockaddr *addr, uint16_t *fibnum)
> {
> struct ifnet *ifp;
> struct ifaddr *ifa;
> @@ -1834,17 +1834,23 @@ ifa_ifwithaddr(const struct sockaddr *addr)
> }
> ifa = NULL;
> done:
> + if (fibnum != NULL && ifp != NULL)
> + *fibnum = ifp->if_fib;
Rather than complicating this function to add an additional return
value, why not let the caller get the fib number directly? They can
just get ifa->ifa_ifp->if_fib.
> return (ifa);
> }
>
> +/*
> + * Test for existence of an interface having this complete address,
> + * optionally retrieving its FIB number.
> + */
> int
> -ifa_ifwithaddr_check(const struct sockaddr *addr)
> +ifa_ifwithaddr_check_getfib(const struct sockaddr *addr, uint16_t *fibnum)
> {
> struct epoch_tracker et;
> int rc;
>
> NET_EPOCH_ENTER(et);
> - rc = (ifa_ifwithaddr(addr) != NULL);
> + rc = (ifa_ifwithaddr_getfib(addr, fibnum) != NULL);
> NET_EPOCH_EXIT(et);
> return (rc);
> }
Similarly, I think adding this extra helper is unnecessary. Callers
which want the FIB number can use ifa_ifwithaddr() and extract it
directly. That's how in6_pcbbind_avail() handles fetching ifaddr flags,
for instance.
> diff --git a/sys/net/if_var.h b/sys/net/if_var.h
> index 83d33330987e..e825be3180a3 100644
> --- a/sys/net/if_var.h
> +++ b/sys/net/if_var.h
> @@ -545,8 +545,9 @@ int ifa_add_loopback_route(struct ifaddr *, struct sockaddr *);
> int ifa_del_loopback_route(struct ifaddr *, struct sockaddr *);
> int ifa_switch_loopback_route(struct ifaddr *, struct sockaddr *);
>
> -struct ifaddr *ifa_ifwithaddr(const struct sockaddr *);
> -int ifa_ifwithaddr_check(const struct sockaddr *);
> +struct ifaddr *ifa_ifwithaddr_getfib(const struct sockaddr *, uint16_t *);
> +int ifa_ifwithaddr_check_getfib(const struct sockaddr *,
> + uint16_t *);
> struct ifaddr *ifa_ifwithbroadaddr(const struct sockaddr *, int);
> struct ifaddr *ifa_ifwithdstaddr(const struct sockaddr *, int);
> struct ifaddr *ifa_ifwithnet(const struct sockaddr *, int, int);
> @@ -555,6 +556,18 @@ struct ifaddr *ifa_ifwithroute(int, const struct sockaddr *,
> struct ifaddr *ifaof_ifpforaddr(const struct sockaddr *, if_t);
> int ifa_preferred(struct ifaddr *, struct ifaddr *);
>
> +static inline struct ifaddr *
> +ifa_ifwithaddr(const struct sockaddr *sa)
> +{
> + return (ifa_ifwithaddr_getfib(sa, NULL));
> +}
> +
> +static inline int
> +ifa_ifwithaddr_check(const struct sockaddr *sa)
> +{
> + return (ifa_ifwithaddr_check_getfib(sa, NULL));
> +}
> +
> int if_simloop(if_t ifp, struct mbuf *m, int af, int hlen);
>
> typedef void *if_com_alloc_t(u_char type, if_t ifp);
> diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
> index bccd4b84561a..e469c3e1275a 100644
> --- a/sys/netinet/in_pcb.c
> +++ b/sys/netinet/in_pcb.c
> @@ -646,7 +646,8 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
> inp->inp_pcbinfo = pcbinfo;
> inp->inp_socket = so;
> inp->inp_cred = crhold(so->so_cred);
> - inp->inp_inc.inc_fibnum = so->so_fibnum;
> + /* The FIB number is in both inp_inc and inp_socket; synchronize. */
> + inp->inp_inc.inc_fibnum = inp->inp_socket->so_fibnum;
so == inp->inp_socket, so this part of the change is redundant.
> #ifdef MAC
> error = mac_inpcb_init(inp, M_NOWAIT);
> if (error != 0)
> @@ -729,7 +730,8 @@ in_pcbbind(struct inpcb *inp, struct sockaddr_in *sin, int flags,
> return (EINVAL);
> anonport = sin == NULL || sin->sin_port == 0;
> error = in_pcbbind_setup(inp, sin, &inp->inp_laddr.s_addr,
> - &inp->inp_lport, flags, cred);
> + &inp->inp_lport, flags,
> + &inp->inp_inc.inc_fibnum, cred);
> if (error)
> return (error);
> if (__predict_false((error = in_pcbinshash(inp)) != 0)) {
> @@ -741,6 +743,8 @@ in_pcbbind(struct inpcb *inp, struct sockaddr_in *sin, int flags,
> }
> if (anonport)
> inp->inp_flags |= INP_ANONPORT;
> + /* The FIB number is in both inp_inc and inp_socket; synchronize. */
> + sosetfib(inp->inp_socket, inp->inp_inc.inc_fibnum);
Why is there no such call in the IPv6 case?
> return (0);
> }
> #endif
> @@ -910,8 +914,8 @@ in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp,
> */
> static int
> in_pcbbind_avail(struct inpcb *inp, const struct in_addr laddr,
> - const u_short lport, const int fib, int sooptions, int lookupflags,
> - struct ucred *cred)
> + const u_short lport, const int fib, int sooptions,
> + int lookupflags, uint16_t *fibnum, struct ucred *cred)
> {
> int reuseport, reuseport_lb;
>
> @@ -951,7 +955,8 @@ in_pcbbind_avail(struct inpcb *inp, const struct in_addr laddr,
> * to any endpoint address, local or not.
> */
> if ((inp->inp_flags & INP_BINDANY) == 0 &&
> - ifa_ifwithaddr_check((const struct sockaddr *)&sin) == 0)
> + ifa_ifwithaddr_check_getfib((const struct sockaddr *)&sin,
> + fibnum) == 0)
This excludes the IN_MULTICAST() case--is that intentional?
> return (EADDRNOTAVAIL);
> }
>
> @@ -1005,11 +1010,12 @@ in_pcbbind_avail(struct inpcb *inp, const struct in_addr laddr,
> * calling in_pcbinshash(), or they can just use the resulting
> * port and address to authorise the sending of a once-off packet.
> *
> - * On error, the values of *laddrp and *lportp are not changed.
> + * On error, the values of *laddrp, *lportp and *fibnum are not changed.
> */
> int
> in_pcbbind_setup(struct inpcb *inp, struct sockaddr_in *sin, in_addr_t *laddrp,
> - u_short *lportp, int flags, struct ucred *cred)
> + u_short *lportp, int flags, uint16_t *fibnum,
> + struct ucred *cred)
> {
> struct socket *so = inp->inp_socket;
> struct in_addr laddr;
> @@ -1055,7 +1061,7 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr_in *sin, in_addr_t *laddrp,
>
> /* See if this address/port combo is available. */
> error = in_pcbbind_avail(inp, laddr, lport, fib, sooptions,
> - lookupflags, cred);
> + lookupflags, fibnum, cred);
Indentation on continuing lines should be by four spaces.
> if (error != 0)
> return (error);
> }
> diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
> index 5fe12c4f1e76..2bb36b41a60b 100644
> --- a/sys/netinet/in_pcb.h
> +++ b/sys/netinet/in_pcb.h
> @@ -638,7 +638,7 @@ int in_pcballoc(struct socket *, struct inpcbinfo *);
> #define INPBIND_FIB 0x0001 /* bind to the PCB's FIB only */
> int in_pcbbind(struct inpcb *, struct sockaddr_in *, int, struct ucred *);
> int in_pcbbind_setup(struct inpcb *, struct sockaddr_in *, in_addr_t *,
> - u_short *, int, struct ucred *);
> + u_short *, int, uint16_t *, struct ucred *);
> int in_pcbconnect(struct inpcb *, struct sockaddr_in *, struct ucred *);
> void in_pcbdisconnect(struct inpcb *);
> void in_pcbdrop(struct inpcb *);
> diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
> index c00a102e8520..4c3d26c58dc4 100644
> --- a/sys/netinet/tcp_input.c
> +++ b/sys/netinet/tcp_input.c
> @@ -1047,7 +1047,8 @@ tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
> }
> inc.inc_fport = th->th_sport;
> inc.inc_lport = th->th_dport;
> - inc.inc_fibnum = so->so_fibnum;
> + if ((inc.inc_fibnum = so->so_fibnum) == 0)
> + inc.inc_fibnum = m->m_pkthdr.fibnum;
Same comment about double assignment.
>
> /*
> * Check for an existing connection attempt in syncache if
> diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
> index 606d808676e1..f0b64edcdacc 100644
> --- a/sys/netinet/tcp_syncache.c
> +++ b/sys/netinet/tcp_syncache.c
> @@ -794,7 +794,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
> * as they would have been set up if we had created the
> * connection when the SYN arrived.
> */
> - if ((so = solisten_clone(lso)) == NULL)
> + if ((so = solisten_clone(lso, sc->sc_inc.inc_fibnum)) == NULL)
> goto allocfail;
> #ifdef MAC
> mac_socketpeer_set_from_mbuf(m, so);
> diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
> index dafbaf6dc672..8ffc1015a125 100644
> --- a/sys/netinet/udp_usrreq.c
> +++ b/sys/netinet/udp_usrreq.c
> @@ -1261,7 +1261,7 @@ udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
> }
> INP_HASH_WLOCK(pcbinfo);
> error = in_pcbbind_setup(inp, &src, &laddr.s_addr, &lport,
> - V_udp_bind_all_fibs ? 0 : INPBIND_FIB, td->td_ucred);
> + V_udp_bind_all_fibs ? 0 : INPBIND_FIB, NULL, td->td_ucred);
> INP_HASH_WUNLOCK(pcbinfo);
> if ((flags & PRUS_IPV6) != 0)
> inp->inp_vflag = vflagsav;
> diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c
> index dfda0c60c0ba..90862c79b13a 100644
> --- a/sys/netinet6/in6_pcb.c
> +++ b/sys/netinet6/in6_pcb.c
> @@ -164,8 +164,9 @@ in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct ucred *cred)
> * Determine whether the inpcb can be bound to the specified address/port tuple.
> */
> static int
> -in6_pcbbind_avail(struct inpcb *inp, const struct sockaddr_in6 *sin6, int fib,
> - int sooptions, int lookupflags, struct ucred *cred)
> +in6_pcbbind_avail(struct inpcb *inp, const struct sockaddr_in6 *sin6,
> + int fib, int sooptions, int lookupflags,
> + uint16_t *fibnum, struct ucred *cred)
Indentation of continuation lines should be four spaces.
> {
> const struct in6_addr *laddr;
> int reuseport, reuseport_lb;
> @@ -207,8 +208,9 @@ in6_pcbbind_avail(struct inpcb *inp, const struct sockaddr_in6 *sin6, int fib,
> sin6.sin6_addr = *laddr;
>
> NET_EPOCH_ENTER(et);
> - if ((ifa = ifa_ifwithaddr((const struct sockaddr *)&sin6)) ==
> - NULL && (inp->inp_flags & INP_BINDANY) == 0) {
> + ifa = ifa_ifwithaddr_getfib((const struct sockaddr *)&sin6,
> + fibnum);
> + if ((inp->inp_flags & INP_BINDANY) == 0 && ifa == NULL) {
> NET_EPOCH_EXIT(et);
> return (EADDRNOTAVAIL);
> }
> @@ -337,8 +339,9 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr_in6 *sin6, int flags,
> RT_ALL_FIBS;
>
> /* See if this address/port combo is available. */
> - error = in6_pcbbind_avail(inp, sin6, fib, sooptions, lookupflags,
> - cred);
> + error = in6_pcbbind_avail(inp, sin6, fib, sooptions,
> + lookupflags,
> + &inp->inp_inc.inc_fibnum, cred);
> if (error != 0)
> return (error);
>
> diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h
> index 6512a2d69fd5..c0243d799c5a 100644
> --- a/sys/sys/socketvar.h
> +++ b/sys/sys/socketvar.h
> @@ -521,7 +521,7 @@ int solisten_proto_check(struct socket *so);
> bool solisten_enqueue(struct socket *, int);
> int solisten_dequeue(struct socket *, struct socket **, int);
> struct socket *
> - solisten_clone(struct socket *);
> + solisten_clone(struct socket *, int fibnum);
> struct socket *
> sonewconn(struct socket *head, int connstatus);
> struct socket *