pf + vimage patch

Wed Jun 5 08:52:27 UTC 2013

Hi,

On Mon, Jun 03, 2013 at 01:58:38PM +0200, Nikos Vassiliadis wrote:
> Hi,
> 
> Please review this patch. It fixes some problems with pf and vimage.
> For the time being only pf works. ALTQ, pflog, pfsync are not changed
> nor tested but as time permits, I'll work on them. Basic packet
> filtering functionality per VNET should be ok.
> 

Thank you for working on this. I'd really like to see pf and vimage
integrated, as it looks like one of the main stoppers to have vimage
in GENERIC.

I hoped more knowledgeable people would comment on this
though. Anyway, not being familiar with pf, here is a couple of things
I would recommend to make your patch more attractive for potential
reviewers:

1) It looks like the patch can be split on several parts. A log
message to every change describing why it is needed and what problem
solves would be very helpful. As a tool to maintain such changes I
personally prefer git.

2) You use spaces for indentation, where tabs should be. This adds
unnecessary noise and makes the patch less readable. Also someone
will need to fix this when eventually committing to the tree.

3) When generating diff from svn, adding -x-pu options will make the
diff more readable.

Also see some questions/comments below.

> Index: sys/net/pfvar.h
> ===================================================================
> --- sys/net/pfvar.h	(revision 251294)
> +++ sys/net/pfvar.h	(working copy)
> @@ -901,7 +901,6 @@
>      struct pf_ruleset *, struct pf_pdesc *, int);
>  extern pflog_packet_t		*pflog_packet_ptr;
>  
> -#define	V_pf_end_threads	VNET(pf_end_threads)
>  #endif /* _KERNEL */
>  
>  #define	PFSYNC_FLAG_SRCNODE	0x04
> Index: sys/netpfil/pf/pf.c
> ===================================================================
> --- sys/netpfil/pf/pf.c	(revision 251294)
> +++ sys/netpfil/pf/pf.c	(working copy)
> @@ -300,8 +300,6 @@
>  
>  int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
>  
> -VNET_DECLARE(int, pf_end_threads);
> -
>  VNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]);
>  
>  #define	PACKET_LOOPED(pd)	((pd)->pf_mtag &&			\
> @@ -359,15 +357,13 @@
>  
>  SYSCTL_NODE(_net, OID_AUTO, pf, CTLFLAG_RW, 0, "pf(4)");
>  
> -VNET_DEFINE(u_long, pf_hashsize);
> -#define	V_pf_hashsize	VNET(pf_hashsize)
> -SYSCTL_VNET_UINT(_net_pf, OID_AUTO, states_hashsize, CTLFLAG_RDTUN,
> -    &VNET_NAME(pf_hashsize), 0, "Size of pf(4) states hashtable");
> +u_long		pf_hashsize;
> +SYSCTL_UINT(_net_pf, OID_AUTO, states_hashsize, CTLFLAG_RDTUN,
> +    &pf_hashsize, 0, "Size of pf(4) states hashtable");
>  
> -VNET_DEFINE(u_long, pf_srchashsize);
> -#define	V_pf_srchashsize	VNET(pf_srchashsize)
> -SYSCTL_VNET_UINT(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_RDTUN,
> -    &VNET_NAME(pf_srchashsize), 0, "Size of pf(4) source nodes hashtable");
> +u_long		pf_srchashsize;
> +SYSCTL_UINT(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_RDTUN,
> +    &pf_srchashsize, 0, "Size of pf(4) source nodes hashtable");
>  

Why do you have to devirtualize these variables? Are per vnet
hashtables sizes not useful or do they cause issues?

>  VNET_DEFINE(void *, pf_swi_cookie);
>  
> @@ -698,12 +694,12 @@
>  	struct pf_srchash	*sh;
>  	u_int i;
>  
> -	TUNABLE_ULONG_FETCH("net.pf.states_hashsize", &V_pf_hashsize);
> -	if (V_pf_hashsize == 0 || !powerof2(V_pf_hashsize))
> -		V_pf_hashsize = PF_HASHSIZ;
> -	TUNABLE_ULONG_FETCH("net.pf.source_nodes_hashsize", &V_pf_srchashsize);
> -	if (V_pf_srchashsize == 0 || !powerof2(V_pf_srchashsize))
> -		V_pf_srchashsize = PF_HASHSIZ / 4;
> +	TUNABLE_ULONG_FETCH("net.pf.states_hashsize", &pf_hashsize);
> +	if (pf_hashsize == 0 || !powerof2(pf_hashsize))
> +		pf_hashsize = PF_HASHSIZ;
> +	TUNABLE_ULONG_FETCH("net.pf.source_nodes_hashsize", &pf_srchashsize);
> +	if (pf_srchashsize == 0 || !powerof2(pf_srchashsize))
> +		pf_srchashsize = PF_HASHSIZ / 4;
>  
>  	V_pf_hashseed = arc4random();
>  
> @@ -717,11 +713,11 @@
>  	V_pf_state_key_z = uma_zcreate("pf state keys",
>  	    sizeof(struct pf_state_key), pf_state_key_ctor, NULL, NULL, NULL,
>  	    UMA_ALIGN_PTR, 0);
> -	V_pf_keyhash = malloc(V_pf_hashsize * sizeof(struct pf_keyhash),
> +	V_pf_keyhash = malloc(pf_hashsize * sizeof(struct pf_keyhash),
>  	    M_PFHASH, M_WAITOK | M_ZERO);
> -	V_pf_idhash = malloc(V_pf_hashsize * sizeof(struct pf_idhash),
> +	V_pf_idhash = malloc(pf_hashsize * sizeof(struct pf_idhash),
>  	    M_PFHASH, M_WAITOK | M_ZERO);
> -	V_pf_hashmask = V_pf_hashsize - 1;
> +	V_pf_hashmask = pf_hashsize - 1;
>  	for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= V_pf_hashmask;
>  	    i++, kh++, ih++) {
>  		mtx_init(&kh->lock, "pf_keyhash", NULL, MTX_DEF);
> @@ -735,9 +731,9 @@
>  	V_pf_limits[PF_LIMIT_SRC_NODES].zone = V_pf_sources_z;
>  	uma_zone_set_max(V_pf_sources_z, PFSNODE_HIWAT);
>  	uma_zone_set_warning(V_pf_sources_z, "PF source nodes limit reached");
> -	V_pf_srchash = malloc(V_pf_srchashsize * sizeof(struct pf_srchash),
> +	V_pf_srchash = malloc(pf_srchashsize * sizeof(struct pf_srchash),
>  	  M_PFHASH, M_WAITOK|M_ZERO);
> -	V_pf_srchashmask = V_pf_srchashsize - 1;
> +	V_pf_srchashmask = pf_srchashsize - 1;
>  	for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++)
>  		mtx_init(&sh->lock, "pf_srchash", NULL, MTX_DEF);
>  
> @@ -757,13 +753,17 @@
>  	STAILQ_INIT(&V_pf_sendqueue);
>  	SLIST_INIT(&V_pf_overloadqueue);
>  	TASK_INIT(&V_pf_overloadtask, 0, pf_overload_task, &V_pf_overloadqueue);
> -	mtx_init(&pf_sendqueue_mtx, "pf send queue", NULL, MTX_DEF);
> -	mtx_init(&pf_overloadqueue_mtx, "pf overload/flush queue", NULL,
> -	    MTX_DEF);
> +	if (IS_DEFAULT_VNET(curvnet)) {
> +	    mtx_init(&pf_sendqueue_mtx, "pf send queue", NULL, MTX_DEF);
> +	    mtx_init(&pf_overloadqueue_mtx, "pf overload/flush queue", NULL,
> +		MTX_DEF);
> +	}
>  
>  	/* Unlinked, but may be referenced rules. */
>  	TAILQ_INIT(&V_pf_unlinked_rules);
> -	mtx_init(&pf_unlnkdrules_mtx, "pf unlinked rules", NULL, MTX_DEF);
> +	if (IS_DEFAULT_VNET(curvnet))
> +	    mtx_init(&pf_unlnkdrules_mtx, "pf unlinked rules", NULL, MTX_DEF);
> +
>  }

"if (IS_DEFAULT_VNET(curvnet))" constructions look a little ugly for
me. Isn't possible to split these initialization functions on two
parts: one (not "virtualized") to run by pf_load() and the other by
vnet_pf_init()?

>  
>  void
> @@ -1309,68 +1309,35 @@
>  pf_purge_thread(void *v)
>  {
>  	u_int idx = 0;
> +	VNET_ITERATOR_DECL(vnet_iter);
>  
> -	CURVNET_SET((struct vnet *)v);
> -
>  	for (;;) {
> -		PF_RULES_RLOCK();
> -		rw_sleep(pf_purge_thread, &pf_rules_lock, 0, "pftm", hz / 10);
> +	    tsleep(pf_purge_thread, PWAIT, "pftm", hz / 10);
> +	    VNET_LIST_RLOCK();
> +	    VNET_FOREACH(vnet_iter) {
> +		CURVNET_SET(vnet_iter);
>  
> -		if (V_pf_end_threads) {
> -			/*
> -			 * To cleanse up all kifs and rules we need
> -			 * two runs: first one clears reference flags,
> -			 * then pf_purge_expired_states() doesn't
> -			 * raise them, and then second run frees.
> -			 */
> -			PF_RULES_RUNLOCK();
> -			pf_purge_unlinked_rules();
> -			pfi_kif_purge();
> -
> -			/*
> -			 * Now purge everything.
> -			 */
> -			pf_purge_expired_states(0, V_pf_hashmask);
> -			pf_purge_expired_fragments();
> -			pf_purge_expired_src_nodes();
> -
> -			/*
> -			 * Now all kifs & rules should be unreferenced,
> -			 * thus should be successfully freed.
> -			 */
> -			pf_purge_unlinked_rules();
> -			pfi_kif_purge();
> -
> -			/*
> -			 * Announce success and exit.
> -			 */
> -			PF_RULES_RLOCK();
> -			V_pf_end_threads++;
> -			PF_RULES_RUNLOCK();
> -			wakeup(pf_purge_thread);
> -			kproc_exit(0);
> -		}

Running only one instance of pf_purge_thread, which iterates over all
vnets looks like a good idea to me, but do we really not need this
clean up stuff for our only thread? Don't you have issues e.g. on pf
module unload?

> -		PF_RULES_RUNLOCK();
> -
>  		/* Process 1/interval fraction of the state table every run. */
>  		idx = pf_purge_expired_states(idx, V_pf_hashmask /
> -			    (V_pf_default_rule.timeout[PFTM_INTERVAL] * 10));
> +		    (V_pf_default_rule.timeout[PFTM_INTERVAL] * 10));
>  
>  		/* Purge other expired types every PFTM_INTERVAL seconds. */
>  		if (idx == 0) {
> -			/*
> -			 * Order is important:
> -			 * - states and src nodes reference rules
> -			 * - states and rules reference kifs
> -			 */
> -			pf_purge_expired_fragments();
> -			pf_purge_expired_src_nodes();
> -			pf_purge_unlinked_rules();
> -			pfi_kif_purge();
> +		    /*
> +		     * Order is important:
> +		     * - states and src nodes reference rules
> +		     * - states and rules reference kifs
> +		     */
> +		    pf_purge_expired_fragments();
> +		    pf_purge_expired_src_nodes();
> +		    pf_purge_unlinked_rules();
> +		    pfi_kif_purge();

This is a good example of unnecessary whitespace noise.

>  		}
> +		CURVNET_RESTORE();
> +	    }
> +	    VNET_LIST_RUNLOCK();
>  	}
>  	/* not reached */
> -	CURVNET_RESTORE();
>  }
>  

-- 
Mikolaj Golub