Re: git: 9ce46cbc95d7 - main - ip_mroute: move ip_mrouter_done outside lock

From: Kristof Provost <kp_at_FreeBSD.org>
Date: Fri, 21 Jan 2022 13:33:27 UTC
On 21 Jan 2022, at 14:01, Kristof Provost wrote:
> Hi Wojciech,
>
> On 21 Jan 2022, at 6:19, Wojciech Macek wrote:
>> The branch main has been updated by wma:
>>
>> URL: 
>> https://cgit.FreeBSD.org/src/commit/?id=9ce46cbc95d7a6fccb55af0d42cbb85c29f10639
>>
>> commit 9ce46cbc95d7a6fccb55af0d42cbb85c29f10639
>> Author:     Wojciech Macek <wma@FreeBSD.org>
>> AuthorDate: 2022-01-21 05:15:08 +0000
>> Commit:     Wojciech Macek <wma@FreeBSD.org>
>> CommitDate: 2022-01-21 05:17:19 +0000
>>
>>     ip_mroute: move ip_mrouter_done outside lock
>>
>>     X_ip_mrouter_done might sleep, which triggers INVARIANTS to
>>     print additional errors on the screen.
>>     Move it outside the lock, but provide some basic synchronization
>>     to avoid race condition during module uninit/unload.
>>
>>     Obtained from:          Semihalf
>>     Sponsored by:           Stormshield
>
> I suspect this change causes panics like this one: 
> https://ci.freebsd.org/job/FreeBSD-main-amd64-test/20437/consoleText
>
>>  sys/netinet/ip_mroute.c | 11 ++++++++---
>>  sys/netinet/ip_mroute.h |  4 +++-
>>  sys/netinet/raw_ip.c    | 11 ++++++++---
>>  3 files changed, 19 insertions(+), 7 deletions(-)
>>
>> diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c
>> index 8cd0b2ac7449..0566048621ad 100644
>> --- a/sys/netinet/ip_mroute.c
>> +++ b/sys/netinet/ip_mroute.c
>> @@ -300,7 +300,7 @@ VNET_DEFINE_STATIC(struct ifnet *, 
>> multicast_register_if);
>>  static u_long	X_ip_mcast_src(int);
>>  static int	X_ip_mforward(struct ip *, struct ifnet *, struct mbuf *,
>>  		    struct ip_moptions *);
>> -static int	X_ip_mrouter_done(void);
>> +static int	X_ip_mrouter_done(void *);
>>  static int	X_ip_mrouter_get(struct socket *, struct sockopt *);
>>  static int	X_ip_mrouter_set(struct socket *, struct sockopt *);
>>  static int	X_legal_vif_num(int);
>> @@ -431,7 +431,7 @@ X_ip_mrouter_set(struct socket *so, struct 
>> sockopt *sopt)
>>  	break;
>>
>>      case MRT_DONE:
>> -	error = ip_mrouter_done();
>> +	error = ip_mrouter_done(NULL);
>>  	break;
>>
>>      case MRT_ADD_VIF:
>> @@ -734,7 +734,7 @@ ip_mrouter_init(struct socket *so, int version)
>>   * Disable multicast forwarding.
>>   */
>>  static int
>> -X_ip_mrouter_done(void)
>> +X_ip_mrouter_done(void *locked)
>>  {
>>      struct ifnet *ifp;
>>      u_long i;
>> @@ -751,6 +751,11 @@ X_ip_mrouter_done(void)
>>      atomic_subtract_int(&ip_mrouter_cnt, 1);
>>      V_mrt_api_config = 0;
>>
>> +    if (locked) {
>> +	struct epoch_tracker *mrouter_et = locked;
>> +	MROUTER_RUNLOCK_PARAM(mrouter_et);
>> +    }
>> +
>>      MROUTER_WAIT();
>>
>>      /* Stop and drain task queue */
>> diff --git a/sys/netinet/ip_mroute.h b/sys/netinet/ip_mroute.h
>> index 65c5bdd3a025..016d026d184c 100644
>> --- a/sys/netinet/ip_mroute.h
>> +++ b/sys/netinet/ip_mroute.h
>> @@ -363,12 +363,14 @@ struct sockopt;
>>
>>  extern int	(*ip_mrouter_set)(struct socket *, struct sockopt *);
>>  extern int	(*ip_mrouter_get)(struct socket *, struct sockopt *);
>> -extern int	(*ip_mrouter_done)(void);
>> +extern int	(*ip_mrouter_done)(void *);
>>  extern int	(*mrt_ioctl)(u_long, caddr_t, int);
>>
>>  #define	MROUTER_RLOCK_TRACKER	struct epoch_tracker mrouter_et
>> +#define	MROUTER_RLOCK_PARAM_PTR		&mrouter_et
>>  #define	MROUTER_RLOCK()	epoch_enter_preempt(net_epoch_preempt, 
>> &mrouter_et)
>>  #define	MROUTER_RUNLOCK()	epoch_exit_preempt(net_epoch_preempt, 
>> &mrouter_et)
>> +#define	MROUTER_RUNLOCK_PARAM(param)	epoch_exit_preempt(net_epoch_preempt, 
>> param)
>>  #define	MROUTER_WAIT()	epoch_wait_preempt(net_epoch_preempt)
>>
>>  #endif /* _KERNEL */
>> diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
>> index 7c495745806e..08ce848a63f7 100644
>> --- a/sys/netinet/raw_ip.c
>> +++ b/sys/netinet/raw_ip.c
>> @@ -119,7 +119,7 @@ VNET_DEFINE(struct socket *, ip_mrouter);
>>   */
>>  int (*ip_mrouter_set)(struct socket *, struct sockopt *);
>>  int (*ip_mrouter_get)(struct socket *, struct sockopt *);
>> -int (*ip_mrouter_done)(void);
>> +int (*ip_mrouter_done)(void *locked);
>>  int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
>>  		   struct ip_moptions *);
>>  int (*mrt_ioctl)(u_long, caddr_t, int);
>> @@ -879,18 +879,23 @@ static void
>>  rip_detach(struct socket *so)
>>  {
>>  	struct inpcb *inp;
>> +	MROUTER_RLOCK_TRACKER;
>>
>>  	inp = sotoinpcb(so);
>>  	KASSERT(inp != NULL, ("rip_detach: inp == NULL"));
>>  	KASSERT(inp->inp_faddr.s_addr == INADDR_ANY,
>>  	    ("rip_detach: not closed"));
>>
>> +	/* Disable mrouter first, lock released inside ip_mrouter_done */
>> +	MROUTER_RLOCK();
>> +	if (so == V_ip_mrouter && ip_mrouter_done)
>> +		ip_mrouter_done(MROUTER_RLOCK_PARAM_PTR);
>> +
>
> I believe this is the problem.
>
> If we do not enter ip_mrouter_done() here we’ll exit the function 
> without exiting epoch. The epoch tracker on the stack will be 
> overwritten, and that could produce the panic we see in 
> ci.freebsd.org.
>
I’m currently running with this patch:

	diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c
	index 0566048621ad..ff68b140af7e 100644
	--- a/sys/netinet/ip_mroute.c
	+++ b/sys/netinet/ip_mroute.c
	@@ -741,8 +741,13 @@ X_ip_mrouter_done(void *locked)
	     vifi_t vifi;
	     struct bw_upcall *bu;

	-    if (V_ip_mrouter == NULL)
	-       return EINVAL;
	+    if (V_ip_mrouter == NULL) {
	+        if (locked) {
	+            struct epoch_tracker *mrouter_et = locked;
	+            MROUTER_RUNLOCK_PARAM(mrouter_et);
	+        }
	+        return (EINVAL);
	+    }

	     /*
	      * Detach/disable hooks to the reset of the system.
	diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
	index 08ce848a63f7..4354bee3cfcc 100644
	--- a/sys/netinet/raw_ip.c
	+++ b/sys/netinet/raw_ip.c
	@@ -887,9 +887,10 @@ rip_detach(struct socket *so)
	            ("rip_detach: not closed"));

	        /* Disable mrouter first, lock released inside ip_mrouter_done 
*/
	-       MROUTER_RLOCK();
	-       if (so == V_ip_mrouter && ip_mrouter_done)
	+       if (so == V_ip_mrouter && ip_mrouter_done) {
	+               MROUTER_RLOCK();
	                ip_mrouter_done(MROUTER_RLOCK_PARAM_PTR);
	+       }

	        INP_WLOCK(inp);
	        INP_HASH_WLOCK(&V_ripcbinfo);

However, it’s not at all clear to me what we’re actually 
accomplishing by entering the net epoch here. As far as I can tell 
that’s basically a no-op.

Kristof