5.4-RELEASE lockups on amd64 SMP

Matthew Grooms mgrooms at seton.org
Thu Jun 9 13:47:18 GMT 2005


Max,

      Not a problem. Looks good so far. Its been up for an hour or a 
half with all the debug options turned on. I will let it cook in my 
production environment over the weekend and update you on Monday. Thanks 
for your help.

Matthew Grooms

Max Laier wrote:
> On Thursday 09 June 2005 01:23, Grooms, Matthew wrote:
> 
>>Max,
>>
>>     With your patch applied, I get a panic very quickly during the boot
>>cycle with output that looks like this ...
> 
> 
> My bad, missed the mtx_init() ... 
> | @@ -216,6 +219,9 @@
> |         callout_init(&sc->sc_tmo, 0);
> |         callout_init(&sc->sc_bulk_tmo, 0);
> |         callout_init(&sc->sc_bulkfail_tmo, 0);
> | +       callout_init(&sc->sc_send_tmo, 0);
> | +       mtx_init(&sc->sc_ifq.ifq_mtx, ifp->if_xname, "pfsync send queue",
> | +           MTX_DEF);
> |         if_attach(&sc->sc_if);
> |  
> |         LIST_INSERT_HEAD(&pfsync_list, sc, sc_next);
> 
> Complete updated patch attached and uploaded to:
> http://people.freebsd.org/~mlaier/if_pfsync.senddef5.diff
> 
> Sorry.
> 
> 
>>net.inet.carp.preempt: 0 -> 1
>>Setting hostname: ---.
>>em: Link is up 100 Mbps Full Duplex
>>panic: mtx_lock() of spin mutex (null) @ ../../../net/if.c:1983
>>cpuid = 1
>>KDB: enter: panic
>>[thread pid 282 tid 100157 ]
>>Stopped at      kdb_enter+0x2f: nop
>>db> trace
>>Tracing pid 282 tid 100157 td 0xffffff000af78280
>>kdb_enter() at kdb_enter+0x2f
>>panic() at panic+0x249
>>_mtx_lock_flags() at _mtx_lock_flags+0xd6
>>if_handoff() at if_handoff+0x49
>>pfsync_sendout() at pfsync_sendout+0x268
>>pfsyncioctl() at pfsyncioctl+0x497
>>in_control() at in_control+0x8cb
>>ifioctl() at ifioctl+0x178
>>sooo_ioctl() at soo_ioctl+0x2d6
>>ioctl() at ioctl+0xfc
>>syscall() at syscall+0x4ab
>>Xfast_syscall() at Xfast_syscall+0xa8
>>--- syscall (54, FreeBSD ELF64, ioctl), rip = 0x800793340, rsp =
>>0x7fffffffeca8, rbp = 0x7fffffffef8b --- db> show locks
>>eclusive sleep mutex pf task mtx r = 0 (0xffffffff80752f60) locked @
>>contrib/pf/net/if_pfsync.c:973
>>
>>Rebooting the machine with the same kernel produces an identical panic. Let
>>me know what else I can do to help. Right now I have just been rebooting
>>back to a UP kernel which has never shown any sign of problems.
>>
>>Matthew Grooms
>>
>>-----Original Message-----
>>From: Grooms, Matthew
>>Sent: Wed 6/8/2005 6:22 PM
>>To: Max Laier
>>Cc: Palle Girgensohn; Kris Kennaway; freebsd-stable at freebsd.org;
>>glebius at freebsd.org; pf at freebsd.org Subject: RE: 5.4-RELEASE lockups on
>>amd64 SMP
>>
>>Matthew,
>>
>>can you try the attached diff.  Available for 5 and CURRENT.  I recall that
>>this problem was seen before, strange that I didn't see the problem. 
>>Sounds familiar to you?  Please try the patch and let me know if that
>>helps.  Thanks a lot.
>>
>>On Wednesday 08 June 2005 01:35, Matthew Grooms wrote:
>>
>>>Once again, here are the backtraces for the panic and lor ...
>>>
>>>Tracing id 110 tid 100089 td 0xffffff012f3f0c80
>>>kdb_enter() at kdb_enter+0x2f
>>>panic() at panic+0x249
>>>uma_dbg_free() at uma_dbg_free+0x188
>>>uma_zfree_arg() at uma_zfree_arg+0x1b0
>>>pf_purge_expired_states() at pf_purge_expired_states+0x41
>>>pfsync_input at pfsync_input+xb35
>>>pf_input() at ip_input+0x10f
>>>netisr_processqueue() at netisr_processqueue+0x17
>>>swi_net() at swi_net+0xa8
>>>ithread_loop() at ithread_loop+0xd9
>>>fork_exit() at fork_exit+0xc3
>>>fork_trampoline() at fork_trampoline+0xe
>>>--- trap 0, rip = 0, rsp = 0xffffffffb44f9d00, rbp = 0 ---
>>>db> continue
>>>boot() called on cpu#0
>>>Uptime: 13h42m43s
>>>Dumping 4864 MB
>>>  16 32 ...
>>>
>>>lock order reversal
>>
>>...
>>
>>
>>>alltraps_with_regs_pushed() at alltraps_with_regs_pushed+0x5
>>>pf_state_tree_lan_ext_RB_REMOVE() at
>>>pf_state_tree_lan_ext_RB_REMOVE+0x10c
>>
>>This LOR is a consequence of the fault, so it can be disregarded.
> 
> 
> 
> ------------------------------------------------------------------------
> 
> Index: if_pfsync.c
> ===================================================================
> RCS file: /usr/store/mlaier/fcvs/src/sys/contrib/pf/net/if_pfsync.c,v
> retrieving revision 1.11.2.2
> diff -u -r1.11.2.2 if_pfsync.c
> --- if_pfsync.c	19 May 2005 10:59:22 -0000	1.11.2.2
> +++ if_pfsync.c	8 Jun 2005 23:42:45 -0000
> @@ -130,6 +130,7 @@
>  
>  static void	pfsync_clone_destroy(struct ifnet *);
>  static int	pfsync_clone_create(struct if_clone *, int);
> +static void	pfsync_senddef(void *);
>  #else
>  void	pfsyncattach(int);
>  #endif
> @@ -170,6 +171,8 @@
>  	callout_stop(&sc->sc_bulk_tmo);
>  	callout_stop(&sc->sc_bulkfail_tmo);
>  
> +	callout_stop(&sc->sc_send_tmo);
> +
>  #if NBPFILTER > 0
>          bpfdetach(ifp);
>  #endif
> @@ -216,6 +219,9 @@
>  	callout_init(&sc->sc_tmo, 0);
>  	callout_init(&sc->sc_bulk_tmo, 0);
>  	callout_init(&sc->sc_bulkfail_tmo, 0);
> +	callout_init(&sc->sc_send_tmo, 0);
> +	mtx_init(&sc->sc_ifq.ifq_mtx, ifp->if_xname, "pfsync send queue",
> +	    MTX_DEF);
>  	if_attach(&sc->sc_if);
>  
>  	LIST_INSERT_HEAD(&pfsync_list, sc, sc_next);
> @@ -913,6 +919,7 @@
>  		if (pfsyncr.pfsyncr_maxupdates > 255)
>  			return (EINVAL);
>  #ifdef __FreeBSD__
> +		callout_drain(&sc->sc_send_tmo);
>  		PF_LOCK();
>  #endif
>  		sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
> @@ -1634,15 +1641,14 @@
>  #endif
>  
>  		pfsyncstats.pfsyncs_opackets++;
> -
>  #ifdef __FreeBSD__
> -		PF_UNLOCK();
> -#endif
> +		if (IF_HANDOFF(&sc->sc_ifq, m, NULL))
> +			pfsyncstats.pfsyncs_oerrors++;
> +		else
> +			callout_reset(&sc->sc_send_tmo, 1, pfsync_senddef, sc);
> +#else
>  		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))
>  			pfsyncstats.pfsyncs_oerrors++;
> -
> -#ifdef __FreeBSD__
> -		PF_LOCK();
>  #endif
>  	} else
>  		m_freem(m);
> @@ -1652,6 +1658,22 @@
>  
>  
>  #ifdef __FreeBSD__
> +static void
> +pfsync_senddef(void *arg)
> +{
> +	struct pfsync_softc *sc = (struct pfsync_softc *)arg;
> +	struct mbuf *m;
> +
> +	for(;;) {
> +		IF_DEQUEUE(&sc->sc_ifq, m);
> +		if (m == NULL)
> +			break;
> +		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))
> +			pfsyncstats.pfsyncs_oerrors++;
> +	}
> +}
> +
> +
>  static int
>  pfsync_modevent(module_t mod, int type, void *data)
>  {
> Index: if_pfsync.h
> ===================================================================
> RCS file: /usr/store/mlaier/fcvs/src/sys/contrib/pf/net/if_pfsync.h,v
> retrieving revision 1.4
> diff -u -r1.4 if_pfsync.h
> --- if_pfsync.h	16 Jun 2004 23:24:00 -0000	1.4
> +++ if_pfsync.h	8 Jun 2005 23:42:59 -0000
> @@ -158,8 +158,12 @@
>  	struct timeout		 sc_bulkfail_tmo;
>  #endif
>  	struct in_addr		 sc_sendaddr;
> -	struct mbuf		*sc_mbuf;	/* current cummulative mbuf */
> -	struct mbuf		*sc_mbuf_net;	/* current cummulative mbuf */
> +	struct mbuf		*sc_mbuf;	/* current cumulative mbuf */
> +	struct mbuf		*sc_mbuf_net;	/* current cumulative mbuf */
> +#ifdef __FreeBSD__
> +	struct ifqueue		 sc_ifq;
> +	struct callout		 sc_send_tmo;
> +#endif
>  	union sc_statep		 sc_statep;
>  	union sc_statep		 sc_statep_net;
>  	u_int32_t		 sc_ureq_received;


More information about the freebsd-pf mailing list