pf + vimage patch

Thu Jun 6 10:36:09 UTC 2013

Hi,

Comments below.

On 06/05/2013 10:52 AM, Mikolaj Golub wrote:
> 1) It looks like the patch can be split on several parts. A log
> message to every change describing why it is needed and what problem
> solves would be very helpful. As a tool to maintain such changes I
> personally prefer git.

I'll try to break it in parts. It should be easy now to break it.
Getting familiar with git will need some time so I'll handle it myself 
this time.

> 2) You use spaces for indentation, where tabs should be. This adds
> unnecessary noise and makes the patch less readable. Also someone
> will need to fix this when eventually committing to the tree.

Yes, I wrongly assumed that pf didn't follow style(9) strictly.
Will fix that.

> 3) When generating diff from svn, adding -x-pu options will make the
> diff more readable.

Yes indeed, thanks!

> Also see some questions/comments below.
>
>> Index: sys/net/pfvar.h
>> ===================================================================
>> --- sys/net/pfvar.h	(revision 251294)
>> +++ sys/net/pfvar.h	(working copy)
>> @@ -901,7 +901,6 @@
>>       struct pf_ruleset *, struct pf_pdesc *, int);
>>   extern pflog_packet_t		*pflog_packet_ptr;
>>
>> -#define	V_pf_end_threads	VNET(pf_end_threads)
>>   #endif /* _KERNEL */
>>
>>   #define	PFSYNC_FLAG_SRCNODE	0x04
>> Index: sys/netpfil/pf/pf.c
>> ===================================================================
>> --- sys/netpfil/pf/pf.c	(revision 251294)
>> +++ sys/netpfil/pf/pf.c	(working copy)
>> @@ -300,8 +300,6 @@
>>
>>   int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
>>
>> -VNET_DECLARE(int, pf_end_threads);
>> -
>>   VNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]);
>>
>>   #define	PACKET_LOOPED(pd)	((pd)->pf_mtag &&			\
>> @@ -359,15 +357,13 @@
>>
>>   SYSCTL_NODE(_net, OID_AUTO, pf, CTLFLAG_RW, 0, "pf(4)");
>>
>> -VNET_DEFINE(u_long, pf_hashsize);
>> -#define	V_pf_hashsize	VNET(pf_hashsize)
>> -SYSCTL_VNET_UINT(_net_pf, OID_AUTO, states_hashsize, CTLFLAG_RDTUN,
>> -    &VNET_NAME(pf_hashsize), 0, "Size of pf(4) states hashtable");
>> +u_long		pf_hashsize;
>> +SYSCTL_UINT(_net_pf, OID_AUTO, states_hashsize, CTLFLAG_RDTUN,
>> +    &pf_hashsize, 0, "Size of pf(4) states hashtable");
>>
>> -VNET_DEFINE(u_long, pf_srchashsize);
>> -#define	V_pf_srchashsize	VNET(pf_srchashsize)
>> -SYSCTL_VNET_UINT(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_RDTUN,
>> -    &VNET_NAME(pf_srchashsize), 0, "Size of pf(4) source nodes hashtable");
>> +u_long		pf_srchashsize;
>> +SYSCTL_UINT(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_RDTUN,
>> +    &pf_srchashsize, 0, "Size of pf(4) source nodes hashtable");
>>
>
> Why do you have to devirtualize these variables? Are per vnet
> hashtables sizes not useful or do they cause issues?

Per VNET variables are not useful for pf_hashsize and pf_srchashsize
since these values are RO and cannot be modified at runtime.

>>   VNET_DEFINE(void *, pf_swi_cookie);
>>
>> @@ -698,12 +694,12 @@
>>   	struct pf_srchash	*sh;
>>   	u_int i;
>>
>> -	TUNABLE_ULONG_FETCH("net.pf.states_hashsize", &V_pf_hashsize);
>> -	if (V_pf_hashsize == 0 || !powerof2(V_pf_hashsize))
>> -		V_pf_hashsize = PF_HASHSIZ;
>> -	TUNABLE_ULONG_FETCH("net.pf.source_nodes_hashsize", &V_pf_srchashsize);
>> -	if (V_pf_srchashsize == 0 || !powerof2(V_pf_srchashsize))
>> -		V_pf_srchashsize = PF_HASHSIZ / 4;
>> +	TUNABLE_ULONG_FETCH("net.pf.states_hashsize", &pf_hashsize);
>> +	if (pf_hashsize == 0 || !powerof2(pf_hashsize))
>> +		pf_hashsize = PF_HASHSIZ;
>> +	TUNABLE_ULONG_FETCH("net.pf.source_nodes_hashsize", &pf_srchashsize);
>> +	if (pf_srchashsize == 0 || !powerof2(pf_srchashsize))
>> +		pf_srchashsize = PF_HASHSIZ / 4;
>>
>>   	V_pf_hashseed = arc4random();
>>
>> @@ -717,11 +713,11 @@
>>   	V_pf_state_key_z = uma_zcreate("pf state keys",
>>   	    sizeof(struct pf_state_key), pf_state_key_ctor, NULL, NULL, NULL,
>>   	    UMA_ALIGN_PTR, 0);
>> -	V_pf_keyhash = malloc(V_pf_hashsize * sizeof(struct pf_keyhash),
>> +	V_pf_keyhash = malloc(pf_hashsize * sizeof(struct pf_keyhash),
>>   	    M_PFHASH, M_WAITOK | M_ZERO);
>> -	V_pf_idhash = malloc(V_pf_hashsize * sizeof(struct pf_idhash),
>> +	V_pf_idhash = malloc(pf_hashsize * sizeof(struct pf_idhash),
>>   	    M_PFHASH, M_WAITOK | M_ZERO);
>> -	V_pf_hashmask = V_pf_hashsize - 1;
>> +	V_pf_hashmask = pf_hashsize - 1;
>>   	for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= V_pf_hashmask;
>>   	    i++, kh++, ih++) {
>>   		mtx_init(&kh->lock, "pf_keyhash", NULL, MTX_DEF);
>> @@ -735,9 +731,9 @@
>>   	V_pf_limits[PF_LIMIT_SRC_NODES].zone = V_pf_sources_z;
>>   	uma_zone_set_max(V_pf_sources_z, PFSNODE_HIWAT);
>>   	uma_zone_set_warning(V_pf_sources_z, "PF source nodes limit reached");
>> -	V_pf_srchash = malloc(V_pf_srchashsize * sizeof(struct pf_srchash),
>> +	V_pf_srchash = malloc(pf_srchashsize * sizeof(struct pf_srchash),
>>   	  M_PFHASH, M_WAITOK|M_ZERO);
>> -	V_pf_srchashmask = V_pf_srchashsize - 1;
>> +	V_pf_srchashmask = pf_srchashsize - 1;
>>   	for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++)
>>   		mtx_init(&sh->lock, "pf_srchash", NULL, MTX_DEF);
>>
>> @@ -757,13 +753,17 @@
>>   	STAILQ_INIT(&V_pf_sendqueue);
>>   	SLIST_INIT(&V_pf_overloadqueue);
>>   	TASK_INIT(&V_pf_overloadtask, 0, pf_overload_task, &V_pf_overloadqueue);
>> -	mtx_init(&pf_sendqueue_mtx, "pf send queue", NULL, MTX_DEF);
>> -	mtx_init(&pf_overloadqueue_mtx, "pf overload/flush queue", NULL,
>> -	    MTX_DEF);
>> +	if (IS_DEFAULT_VNET(curvnet)) {
>> +	    mtx_init(&pf_sendqueue_mtx, "pf send queue", NULL, MTX_DEF);
>> +	    mtx_init(&pf_overloadqueue_mtx, "pf overload/flush queue", NULL,
>> +		MTX_DEF);
>> +	}
>>
>>   	/* Unlinked, but may be referenced rules. */
>>   	TAILQ_INIT(&V_pf_unlinked_rules);
>> -	mtx_init(&pf_unlnkdrules_mtx, "pf unlinked rules", NULL, MTX_DEF);
>> +	if (IS_DEFAULT_VNET(curvnet))
>> +	    mtx_init(&pf_unlnkdrules_mtx, "pf unlinked rules", NULL, MTX_DEF);
>> +
>>   }
>
> "if (IS_DEFAULT_VNET(curvnet))" constructions look a little ugly for
> me. Isn't possible to split these initialization functions on two
> parts: one (not "virtualized") to run by pf_load() and the other by
> vnet_pf_init()?

It is indeed ugly. I was trying not to diverse to much from the original
code. I will do it properly.

>>
>>   void
>> @@ -1309,68 +1309,35 @@
>>   pf_purge_thread(void *v)
>>   {
>>   	u_int idx = 0;
>> +	VNET_ITERATOR_DECL(vnet_iter);
>>
>> -	CURVNET_SET((struct vnet *)v);
>> -
>>   	for (;;) {
>> -		PF_RULES_RLOCK();
>> -		rw_sleep(pf_purge_thread, &pf_rules_lock, 0, "pftm", hz / 10);
>> +	    tsleep(pf_purge_thread, PWAIT, "pftm", hz / 10);
>> +	    VNET_LIST_RLOCK();
>> +	    VNET_FOREACH(vnet_iter) {
>> +		CURVNET_SET(vnet_iter);
>>
>> -		if (V_pf_end_threads) {
>> -			/*
>> -			 * To cleanse up all kifs and rules we need
>> -			 * two runs: first one clears reference flags,
>> -			 * then pf_purge_expired_states() doesn't
>> -			 * raise them, and then second run frees.
>> -			 */
>> -			PF_RULES_RUNLOCK();
>> -			pf_purge_unlinked_rules();
>> -			pfi_kif_purge();
>> -
>> -			/*
>> -			 * Now purge everything.
>> -			 */
>> -			pf_purge_expired_states(0, V_pf_hashmask);
>> -			pf_purge_expired_fragments();
>> -			pf_purge_expired_src_nodes();
>> -
>> -			/*
>> -			 * Now all kifs & rules should be unreferenced,
>> -			 * thus should be successfully freed.
>> -			 */
>> -			pf_purge_unlinked_rules();
>> -			pfi_kif_purge();
>> -
>> -			/*
>> -			 * Announce success and exit.
>> -			 */
>> -			PF_RULES_RLOCK();
>> -			V_pf_end_threads++;
>> -			PF_RULES_RUNLOCK();
>> -			wakeup(pf_purge_thread);
>> -			kproc_exit(0);
>> -		}
>
> Running only one instance of pf_purge_thread, which iterates over all
> vnets looks like a good idea to me, but do we really not need this
> clean up stuff for our only thread? Don't you have issues e.g. on pf
> module unload?

module unload is broken:( Maybe it can be fixed at a (bit) later date?

>> -		PF_RULES_RUNLOCK();
>> -
>>   		/* Process 1/interval fraction of the state table every run. */
>>   		idx = pf_purge_expired_states(idx, V_pf_hashmask /
>> -			    (V_pf_default_rule.timeout[PFTM_INTERVAL] * 10));
>> +		    (V_pf_default_rule.timeout[PFTM_INTERVAL] * 10));
>>
>>   		/* Purge other expired types every PFTM_INTERVAL seconds. */
>>   		if (idx == 0) {
>> -			/*
>> -			 * Order is important:
>> -			 * - states and src nodes reference rules
>> -			 * - states and rules reference kifs
>> -			 */
>> -			pf_purge_expired_fragments();
>> -			pf_purge_expired_src_nodes();
>> -			pf_purge_unlinked_rules();
>> -			pfi_kif_purge();
>> +		    /*
>> +		     * Order is important:
>> +		     * - states and src nodes reference rules
>> +		     * - states and rules reference kifs
>> +		     */
>> +		    pf_purge_expired_fragments();
>> +		    pf_purge_expired_src_nodes();
>> +		    pf_purge_unlinked_rules();
>> +		    pfi_kif_purge();
>
> This is a good example of unnecessary whitespace noise.

will fix these.

>>   		}
>> +		CURVNET_RESTORE();
>> +	    }
>> +	    VNET_LIST_RUNLOCK();
>>   	}
>>   	/* not reached */
>> -	CURVNET_RESTORE();
>>   }
>>
>

Thank you for your comments. I was informed by bz@ that there
is a security issue with pf being exposed in a jail, so I'll
start from there and then will continue with your comments.

Nikos