Re: git: e1882428dcbb - main - ifnet/mbuf: provide KPI to serialize/restore m->m_pkthdr.rcvif
- Reply: Gleb Smirnoff : "Re: git: e1882428dcbb - main - ifnet/mbuf: provide KPI to serialize/restore m->m_pkthdr.rcvif"
- Reply: Gleb Smirnoff : "Re: git: e1882428dcbb - main - ifnet/mbuf: provide KPI to serialize/restore m->m_pkthdr.rcvif"
- In reply to: Gleb Smirnoff : "git: e1882428dcbb - main - ifnet/mbuf: provide KPI to serialize/restore m->m_pkthdr.rcvif"
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Thu, 27 Jan 2022 16:59:03 UTC
On 1/27/22, Gleb Smirnoff <glebius@freebsd.org> wrote:
> The branch main has been updated by glebius:
>
> URL:
> https://cgit.FreeBSD.org/src/commit/?id=e1882428dcbbafd2814d7e17b977a8f686784b39
>
> commit e1882428dcbbafd2814d7e17b977a8f686784b39
> Author: Gleb Smirnoff <glebius@FreeBSD.org>
> AuthorDate: 2022-01-27 05:58:50 +0000
> Commit: Gleb Smirnoff <glebius@FreeBSD.org>
> CommitDate: 2022-01-27 05:58:50 +0000
>
> ifnet/mbuf: provide KPI to serialize/restore m->m_pkthdr.rcvif
>
> Supplement ifindex table with generation count and use it to
> serialize & restore an ifnet pointer.
>
This commit or something from the series reliably causes my box to
crash on boot:
Fatal trap 12: page fault while in kernel mode
cpuid = 7; apic id = 07
fault virtual address = 0x54
fault code = supervisor read data, page not present
instruction pointer = 0x20:0xffffffff80616d98
stack pointer = 0x28:0xfffffe06462889e0
frame pointer = 0x28:0xfffffe06462889e0
code segment = base 0x0, limit 0xfffff, type 0x1b
= DPL 0, pres 1, long 1, def32 0, gran 1
processor eflags = interrupt enabled, resume, IOPL = 0
current process = 1855 (route)
trap number = 12
panic: page fault
cpuid = 7
time = 1643302734
KDB: stack backtrace:
db_trace_self_wrapper() at db_trace_self_wrapper+0x2b/frame 0xfffffe06462887a0
vpanic() at vpanic+0x17f/frame 0xfffffe06462887f0
panic() at panic+0x43/frame 0xfffffe0646288850
trap_fatal() at trap_fatal+0x385/frame 0xfffffe06462888b0
trap_pfault() at trap_pfault+0x4f/frame 0xfffffe0646288910
calltrap() at calltrap+0x8/frame 0xfffffe0646288910
--- trap 0xc, rip = 0xffffffff80616d98, rsp = 0xfffffe06462889e0, rbp
= 0xfffffe06462889e0 ---
m_rcvif_serialize() at m_rcvif_serialize+0x8/frame 0xfffffe06462889e0
netisr_queue_src() at netisr_queue_src+0x15c/frame 0xfffffe0646288a30
route_output() at route_output+0x890/frame 0xfffffe0646288c70
sosend_generic() at sosend_generic+0x456/frame 0xfffffe0646288d10
soo_write() at soo_write+0x43/frame 0xfffffe0646288d40
dofilewrite() at dofilewrite+0x81/frame 0xfffffe0646288d90
sys_write() at sys_write+0xbc/frame 0xfffffe0646288e00
amd64_syscall() at amd64_syscall+0x107/frame 0xfffffe0646288f30
fast_syscall_common() at fast_syscall_common+0xf8/frame 0xfffffe0646288f30
--- syscall (4, FreeBSD ELF64, sys_write), rip = 0x3b000ba6703a, rsp =
0x3b0009378e48, rbp = 0x3b00093796f0 ---
KDB: enter: panic
[ thread pid 1855 tid 102265 ]
Stopped at kdb_enter+0x37: movq $0,0x9cc17e(%rip)
> Reviewed by: kp
> Differential revision: https://reviews.freebsd.org/D33266
> Fun note: git show e6abef09187a
> ---
> sys/kern/kern_mbuf.c | 22 ++++++++++++++++++++++
> sys/net/if.c | 49
> ++++++++++++++++++++++++++++++++++++-------------
> sys/net/if_var.h | 9 ++++++++-
> sys/sys/mbuf.h | 6 ++++++
> 4 files changed, 72 insertions(+), 14 deletions(-)
>
> diff --git a/sys/kern/kern_mbuf.c b/sys/kern/kern_mbuf.c
> index f1e76ef00c65..5c69f663c0e2 100644
> --- a/sys/kern/kern_mbuf.c
> +++ b/sys/kern/kern_mbuf.c
> @@ -1635,6 +1635,28 @@ m_snd_tag_destroy(struct m_snd_tag *mst)
> counter_u64_add(snd_tag_count, -1);
> }
>
> +void
> +m_rcvif_serialize(struct mbuf *m)
> +{
> + u_short idx, gen;
> +
> + M_ASSERTPKTHDR(m);
> + idx = m->m_pkthdr.rcvif->if_index;
> + gen = m->m_pkthdr.rcvif->if_idxgen;
> + m->m_pkthdr.rcvidx = idx;
> + m->m_pkthdr.rcvgen = gen;
> +}
> +
> +struct ifnet *
> +m_rcvif_restore(struct mbuf *m)
> +{
> +
> + M_ASSERTPKTHDR(m);
> +
> + return ((m->m_pkthdr.rcvif = ifnet_byindexgen(m->m_pkthdr.rcvidx,
> + m->m_pkthdr.rcvgen)));
> +}
> +
> /*
> * Allocate an mbuf with anonymous external pages.
> */
> diff --git a/sys/net/if.c b/sys/net/if.c
> index f148ae8c9c6d..e8d65e64518a 100644
> --- a/sys/net/if.c
> +++ b/sys/net/if.c
> @@ -313,7 +313,10 @@ VNET_DEFINE(struct ifgrouphead, ifg_head);
> /* Table of ifnet by index. */
> static int if_index;
> static int if_indexlim = 8;
> -static struct ifnet **ifindex_table;
> +static struct ifindex_entry {
> + struct ifnet *ife_ifnet;
> + uint16_t ife_gencnt;
> +} *ifindex_table;
>
> SYSCTL_NODE(_net_link_generic, IFMIB_SYSTEM, system,
> CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
> @@ -325,8 +328,8 @@ sysctl_ifcount(SYSCTL_HANDLER_ARGS)
>
> IFNET_RLOCK();
> for (int i = 1; i <= if_index; i++)
> - if (ifindex_table[i] != NULL &&
> - ifindex_table[i]->if_vnet == curvnet)
> + if (ifindex_table[i].ife_ifnet != NULL &&
> + ifindex_table[i].ife_ifnet->if_vnet == curvnet)
> rv = i;
> IFNET_RUNLOCK();
>
> @@ -370,7 +373,7 @@ ifnet_byindex(u_int idx)
> if (__predict_false(idx > if_index))
> return (NULL);
>
> - ifp = ck_pr_load_ptr(&ifindex_table[idx]);
> + ifp = ck_pr_load_ptr(&ifindex_table[idx].ife_ifnet);
>
> if (curvnet != NULL && ifp != NULL && ifp->if_vnet != curvnet)
> ifp = NULL;
> @@ -391,6 +394,24 @@ ifnet_byindex_ref(u_int idx)
> return (ifp);
> }
>
> +struct ifnet *
> +ifnet_byindexgen(uint16_t idx, uint16_t gen)
> +{
> + struct ifnet *ifp;
> +
> + NET_EPOCH_ASSERT();
> +
> + if (__predict_false(idx > if_index))
> + return (NULL);
> +
> + ifp = ck_pr_load_ptr(&ifindex_table[idx].ife_ifnet);
> +
> + if (ifindex_table[idx].ife_gencnt == gen)
> + return (ifp);
> + else
> + return (NULL);
> +}
> +
> struct ifaddr *
> ifaddr_byindex(u_short idx)
> {
> @@ -571,13 +592,13 @@ if_alloc_domain(u_char type, int numa_domain)
> * next slot.
> */
> for (idx = 1; idx <= if_index; idx++) {
> - if (ifindex_table[idx] == NULL)
> + if (ifindex_table[idx].ife_ifnet == NULL)
> break;
> }
>
> /* Catch if_index overflow. */
> if (idx >= if_indexlim) {
> - struct ifnet **new, **old;
> + struct ifindex_entry *new, *old;
> int newlim;
>
> newlim = if_indexlim * 2;
> @@ -593,7 +614,8 @@ if_alloc_domain(u_char type, int numa_domain)
> if_index = idx;
>
> ifp->if_index = idx;
> - ck_pr_store_ptr(&ifindex_table[idx], ifp);
> + ifp->if_idxgen = ifindex_table[idx].ife_gencnt;
> + ck_pr_store_ptr(&ifindex_table[idx].ife_ifnet, ifp);
> IFNET_WUNLOCK();
>
> return (ifp);
> @@ -668,9 +690,10 @@ if_free(struct ifnet *ifp)
> * virtualized and interface would outlive the vnet.
> */
> IFNET_WLOCK();
> - MPASS(ifindex_table[ifp->if_index] == ifp);
> - ck_pr_store_ptr(&ifindex_table[ifp->if_index], NULL);
> - while (if_index > 0 && ifindex_table[if_index] == NULL)
> + MPASS(ifindex_table[ifp->if_index].ife_ifnet == ifp);
> + ck_pr_store_ptr(&ifindex_table[ifp->if_index].ife_ifnet, NULL);
> + ifindex_table[ifp->if_index].ife_gencnt++;
> + while (if_index > 0 && ifindex_table[if_index].ife_ifnet == NULL)
> if_index--;
> IFNET_WUNLOCK();
>
> @@ -819,7 +842,7 @@ if_attach_internal(struct ifnet *ifp, bool vmove)
> struct sockaddr_dl *sdl;
> struct ifaddr *ifa;
>
> - MPASS(ifindex_table[ifp->if_index] == ifp);
> + MPASS(ifindex_table[ifp->if_index].ife_ifnet == ifp);
>
> #ifdef VIMAGE
> ifp->if_vnet = curvnet;
> @@ -4508,8 +4531,8 @@ if_show_ifnet(struct ifnet *ifp)
> IF_DB_PRINTF("%d", if_dunit);
> IF_DB_PRINTF("%s", if_description);
> IF_DB_PRINTF("%u", if_index);
> + IF_DB_PRINTF("%d", if_idxgen);
> IF_DB_PRINTF("%u", if_refcount);
> - IF_DB_PRINTF("%d", if_index_reserved);
> IF_DB_PRINTF("%p", if_softc);
> IF_DB_PRINTF("%p", if_l2com);
> IF_DB_PRINTF("%p", if_llsoftc);
> @@ -4564,7 +4587,7 @@ DB_SHOW_ALL_COMMAND(ifnets, db_show_all_ifnets)
> u_short idx;
>
> for (idx = 1; idx <= if_index; idx++) {
> - ifp = ifindex_table[idx];
> + ifp = ifindex_table[idx].ife_ifnet;
> if (ifp == NULL)
> continue;
> db_printf( "%20s ifp=%p\n", ifp->if_xname, ifp);
> diff --git a/sys/net/if_var.h b/sys/net/if_var.h
> index dedc73718125..21b3687f62c1 100644
> --- a/sys/net/if_var.h
> +++ b/sys/net/if_var.h
> @@ -334,7 +334,7 @@ struct ifnet {
> const char *if_dname; /* driver name */
> int if_dunit; /* unit or IF_DUNIT_NONE */
> u_short if_index; /* numeric abbreviation for this if */
> - short if_index_reserved; /* spare space to grow if_index */
> + u_short if_idxgen; /* ... and its generation count */
> char if_xname[IFNAMSIZ]; /* external name (name + unit) */
> char *if_description; /* interface description */
>
> @@ -644,6 +644,13 @@ extern struct sx ifnet_sxlock;
> struct ifnet *ifnet_byindex(u_int);
> struct ifnet *ifnet_byindex_ref(u_int);
>
> +/*
> + * ifnet_byindexgen() looks up ifnet by index and generation count,
> + * attempting to restore a weak pointer that had been stored across
> + * the epoch.
> + */
> +struct ifnet *ifnet_byindexgen(uint16_t idx, uint16_t gen);
> +
> /*
> * Given the index, ifaddr_byindex() returns the one and only
> * link-level ifaddr for the interface. You are not supposed to use
> diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h
> index 77364f428b12..ebe8ef205055 100644
> --- a/sys/sys/mbuf.h
> +++ b/sys/sys/mbuf.h
> @@ -159,6 +159,10 @@ struct pkthdr {
> union {
> struct m_snd_tag *snd_tag; /* send tag, if any */
> struct ifnet *rcvif; /* rcv interface */
> + struct {
> + uint16_t rcvidx; /* rcv interface index ... */
> + uint16_t rcvgen; /* ... and generation count */
> + };
> };
> SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */
> int32_t len; /* total packet length */
> @@ -862,6 +866,8 @@ int m_snd_tag_alloc(struct ifnet *,
> void m_snd_tag_init(struct m_snd_tag *, struct ifnet *,
> const struct if_snd_tag_sw *);
> void m_snd_tag_destroy(struct m_snd_tag *);
> +void m_rcvif_serialize(struct mbuf *);
> +struct ifnet *m_rcvif_restore(struct mbuf *);
>
> static __inline int
> m_gettype(int size)
>
--
Mateusz Guzik <mjguzik gmail.com>