PERFORCE change 167260 for review

Julian Elischer julian at elischer.org
Wed Aug 12 21:58:49 UTC 2009


Marko Zec wrote:
> http://perforce.freebsd.org/chv.cgi?CH=167260
> 
> Change 167260 by zec at zec_tpx32 on 2009/08/12 21:08:10
> 
> 	Significanlty simplify / reduce previous patch aimed at
> 	making ipdivert work with VIMAGE.  The restriction is that
> 	with current patch it is not permitted to kldunload -f
> 	ipdivert if built with options VIMAGE enabled.  The patch
> 	is verified to work with natd running in a non-default vimage.
> 	
> 	Diff size against head/sys/netinet/ip_divert.c:
> 	
> 	tpx32% wc before.diff 
> 	     287    1073    7891 before.diff
> 	tpx32% wc after.diff 
> 	      90     261    2370 after.diff
> 
> Affected files ...
> 
> .. //depot/projects/vimage-commit2/src/sys/netinet/ip_divert.c#38 edit
> 
> Differences ...
> 
> ==== //depot/projects/vimage-commit2/src/sys/netinet/ip_divert.c#38 (text+ko) ====
> 
> @@ -125,11 +125,13 @@
>  static u_long	div_sendspace = DIVSNDQ;	/* XXX sysctl ? */
>  static u_long	div_recvspace = DIVRCVQ;	/* XXX sysctl ? */
>  
> +static eventhandler_tag ip_divert_event_tag;
> +
>  /*
>   * Initialize divert connection block queue.
>   */
>  static void
> -div_zone_change(struct vnet *vnet)
> +div_zone_change(void *tag)
>  {
>  	VNET_ITERATOR_DECL(vnet_iter);
>  
> @@ -139,7 +141,7 @@
>  		uma_zone_set_max(V_divcbinfo.ipi_zone, maxsockets);
>  		CURVNET_RESTORE();
>  	}
> -	VNET_LIST_RUNLOCK_NOSLEEP();	
> +	VNET_LIST_RUNLOCK_NOSLEEP();
>  }
>  
>  static int
> @@ -159,6 +161,30 @@
>  	INP_LOCK_DESTROY(inp);
>  }
>  
> +static void
> +div_init(void)
> +{
> +
> +	INP_INFO_LOCK_INIT(&V_divcbinfo, "div");
> +	LIST_INIT(&V_divcb);
> +	V_divcbinfo.ipi_listhead = &V_divcb;
> +#ifdef VIMAGE
> +	V_divcbinfo.ipi_vnet = curvnet;
> +#endif
> +	/*
> +	 * XXX We don't use the hash list for divert IP, but it's easier
> +	 * to allocate a one entry hash list than it is to check all
> +	 * over the place for hashbase == NULL.
> +	 */
> +	V_divcbinfo.ipi_hashbase = hashinit(1, M_PCB, &V_divcbinfo.ipi_hashmask);
> +	V_divcbinfo.ipi_porthashbase = hashinit(1, M_PCB,
> +	    &V_divcbinfo.ipi_porthashmask);
> +	V_divcbinfo.ipi_zone = uma_zcreate("divcb", sizeof(struct inpcb),
> +	    NULL, NULL, div_inpcb_init, div_inpcb_fini, UMA_ALIGN_PTR,
> +	    UMA_ZONE_NOFREE);
> +	uma_zone_set_max(V_divcbinfo.ipi_zone, maxsockets);
> +}
> +
>  /*


> -	}
> +	error = in_pcbbind(inp, nam, td->td_ucred);
>  	INP_WUNLOCK(inp);
>  	INP_INFO_WUNLOCK(&V_divcbinfo);
>  	return error;
> @@ -710,22 +715,36 @@
>  	.pr_input =		div_input,
>  	.pr_ctlinput =		div_ctlinput,
>  	.pr_ctloutput =		ip_ctloutput,
> -	.pr_init =		NULL,
> +	.pr_init =		div_init,
>  	.pr_usrreqs =		&div_usrreqs



If you are going to make pr_init() called for every vnet then 
pr_destroy should be as well. But in fact that is not really safe.
(either of them)

The trouble is that we can not guarantee that other protocols can 
handle being called multiple times in their init and destroy methods.
Especially 3rd party protocols.

We need to ensure only protocols that have been converted to run
with multiple vnets are ever called with multiple vnets.

for this reason the only safe way to do this is via the VNET_SYSINIT
and VNET_SYSUNINIT calls.



>  };
>  
> -static int div_loaded = 0;
> -static eventhandler_tag div_evh_tag;
>  static int
>  div_modevent(module_t mod, int type, void *unused)
>  {
>  	int err = 0;
> +#ifndef VIMAGE
> +	int n;
> +#endif
>  
>  	switch (type) {
>  	case MOD_LOAD:
> -
> +		/*
> +		 * Protocol will be initialized by pf_proto_register().
> +		 * We don't have to register ip_protox because we are not
> +		 * a true IP protocol that goes over the wire.
> +		 */
> +		err = pf_proto_register(PF_INET, &div_protosw);
> +		if (err != 0)
> +			return (err);
> +		ip_divert_ptr = divert_packet;
> +		ip_divert_event_tag = EVENTHANDLER_REGISTER(maxsockets_change,
> +		    div_zone_change, NULL, EVENTHANDLER_PRI_ANY);
>  		break;
>  	case MOD_QUIESCE:
> +#ifdef VIMAGE
> +	case MOD_UNLOAD:
> +#endif
>  		/*
>  		 * IPDIVERT may normally not be unloaded because of the
>  		 * potential race conditions.  Tell kldunload we can't be
> @@ -733,8 +752,34 @@
>  		 */
>  		err = EPERM;
>  		break;
> +#ifndef VIMAGE
>  	case MOD_UNLOAD:
> +		/*
> +		 * Forced unload.
> +		 *
> +		 * Module ipdivert can only be unloaded if no sockets are
> +		 * connected.  Maybe this can be changed later to forcefully
> +		 * disconnect any open sockets.
> +		 *
> +		 * XXXRW: Note that there is a slight race here, as a new
> +		 * socket open request could be spinning on the lock and then
> +		 * we destroy the lock.
> +		 */
> +		INP_INFO_WLOCK(&V_divcbinfo);
> +		n = V_divcbinfo.ipi_count;
> +		if (n != 0) {
> +			err = EBUSY;
> +			INP_INFO_WUNLOCK(&V_divcbinfo);
> +			break;
> +		}
> +		ip_divert_ptr = NULL;
> +		err = pf_proto_unregister(PF_INET, IPPROTO_DIVERT, SOCK_RAW);
> +		INP_INFO_WUNLOCK(&V_divcbinfo);
> +		INP_INFO_LOCK_DESTROY(&V_divcbinfo);
> +		uma_zdestroy(V_divcbinfo.ipi_zone);
> +		EVENTHANDLER_DEREGISTER(maxsockets_change, ip_divert_event_tag);
>  		break;
> +#endif /* !VIMAGE */
>  	default:
>  		err = EOPNOTSUPP;
>  		break;
> @@ -748,124 +793,6 @@
>          0
>  };
>  
> -/* init on boot or module load */
> -static void 
> -div_init(void)
> -{
> -	int err;
> -
> -	/*
> - 	 * Protocol will be initialized by pf_proto_register().
> - 	 * We don't have to register ip_protox because we are not
> - 	 * a true IP protocol that goes over the wire.
> - 	 */
> -	err = pf_proto_register(PF_INET, &div_protosw);
> -	if (err == 0) {
> -		ip_divert_ptr = divert_packet;
> -		div_evh_tag =
> -		    EVENTHANDLER_REGISTER(maxsockets_change, div_zone_change,
> -			NULL, EVENTHANDLER_PRI_ANY);
> -		div_loaded = 1;
> -	}
> -	return;
> -}
> -
> -/****************
> - * Stuff that must be initialized for every instance
> - * (including the first of course).
> - */
> -static int
> -div_vnet_init(const void *unused)
> -{
> -	if (div_loaded == 0)
> -		return (0);
> -	INP_INFO_LOCK_INIT(&V_divcbinfo, "div");
> -	LIST_INIT(&V_divcb);
> -	V_divcbinfo.ipi_listhead = &V_divcb;
> -#ifdef VIMAGE
> -	V_divcbinfo.ipi_vnet = curvnet;
> -#endif
> -	/*
> -	 * XXX We don't use the hash list for divert IP, but it's easier
> -	 * to allocate a one entry hash list than it is to check all
> -	 * over the place for hashbase == NULL.
> -	 */
> -	V_divcbinfo.ipi_hashbase = hashinit(1, M_PCB, &V_divcbinfo.ipi_hashmask);
> -	V_divcbinfo.ipi_porthashbase = hashinit(1, M_PCB,
> -	    &V_divcbinfo.ipi_porthashmask);
> -	V_divcbinfo.ipi_zone = uma_zcreate("divcb", sizeof(struct inpcb),
> -	    NULL, NULL, div_inpcb_init, div_inpcb_fini, UMA_ALIGN_PTR,
> -	    UMA_ZONE_NOFREE);
> -	uma_zone_set_max(V_divcbinfo.ipi_zone, maxsockets);
> -	return (0);
> -}
> -
> -/**********************
> - * Called for the removal of the last instance only on module unload.
> - */
> -static void
> -div_uninit(void)
> -{
> -	int err;
> -	
> -	if (div_loaded == 0)
> -		return;
> -	div_loaded = 0;
> -	ip_divert_ptr = NULL;
> -	EVENTHANDLER_DEREGISTER(maxsockets_change, div_evh_tag);
> -	err = pf_proto_unregister(PF_INET, IPPROTO_DIVERT, SOCK_RAW);
> -}
> -
> -/***********************
> - * Called for the removal of each instance.
> - */
> -static int
> -div_vnet_uninit(const void *unused)
> -{
> -	int err = 0;
> -	int n;
> -
> -	if (div_loaded == 0)
> -		return (0);
> -	/*
> -	 * Forced unload.
> -	 *
> -	 * Module ipdivert can only be unloaded if no sockets are
> -	 * connected.  Maybe this can be changed later to forcefully
> -	 * disconnect any open sockets.
> -	 *
> -	 * XXXRW: Note that there is a slight race here, as a new
> -	 * socket open request could be spinning on the lock and then
> -	 * we destroy the lock.
> -	 */
> -	INP_INFO_WLOCK(&V_divcbinfo);
> -	n = V_divcbinfo.ipi_count;
> -	INP_INFO_WUNLOCK(&V_divcbinfo);
> -	if (n != 0) {
> -		err = EBUSY;
> -	} else {
> -		INP_INFO_LOCK_DESTROY(&V_divcbinfo);
> -		uma_zdestroy(V_divcbinfo.ipi_zone);
> -	}
> -	return (err);
> -}
> -
> -
> -#define	DIV_MAJOR_ORDER		SI_SUB_PROTO_IFATTACHDOMAIN
> -#define	DIV_MODULE_ORDER	(SI_ORDER_ANY + 64)
> -#define	DIV_SYSINIT_ORDER	(DIV_MODULE_ORDER  + 1)
> -#define	DIV_VNET_ORDER		(DIV_SYSINIT_ORDER + 1 )
> -
> -DECLARE_MODULE(ipdivert, ipdivertmod, DIV_MAJOR_ORDER, DIV_MODULE_ORDER);
> +DECLARE_MODULE(ipdivert, ipdivertmod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
>  MODULE_DEPEND(dummynet, ipfw, 2, 2, 2);
>  MODULE_VERSION(ipdivert, 1);
> -
> -SYSINIT(div_init, DIV_MAJOR_ORDER, DIV_SYSINIT_ORDER, div_init, NULL);
> -SYSUNINIT(div_uninit, DIV_MAJOR_ORDER, DIV_SYSINIT_ORDER,
> -    div_uninit, NULL);
> -
> -VNET_SYSINIT(div_vnet_init, DIV_MAJOR_ORDER, DIV_VNET_ORDER,
> -    div_vnet_init, NULL);
> -VNET_SYSUNINIT(div_vnet_uninit, DIV_MAJOR_ORDER, DIV_VNET_ORDER,
> -    div_vnet_uninit, NULL);
> -



More information about the p4-projects mailing list