divert and deadlock issues

Andre Oppermann andre at freebsd.org
Thu Aug 2 08:12:56 UTC 2007


Christian S.J. Peron wrote:
> Group,
> 
> I've come up with a basic patch, here are the highlights as per our discussion:
> 
> - Check for the presence of socket options, if they are present duplicate
>   them using m_dup(9)
> - Drop the INP/INFO locks after duplication
> - Activate ip_output() with the cloned mbuf (for socket options).  Also,
>   set the multicast options to NULL
> - Add div_cltoutput() to handle any calls to setsockopt(2) that might be
>   changing multicast parameters.  If we see any multicast parameters,
>   return EOPNOTSUPP (Operation Not Supported), otherwise wrap the call
>   into ip_ctloutput() (as it was before).
> 
> One portion that is missing with rwatson's netisr change. I've done some very
> basic testing on this end and things appear to work.  If this group is OK
> with this patch, I would like to forward it off to current@ for some
> potential testers and comment.

Looks good.

-- 
Andre

> Thanks!
> 
> 
> 
> ------------------------------------------------------------------------
> 
> Index: ip_divert.c
> ===================================================================
> RCS file: /usr/ncvs/src/sys/netinet/ip_divert.c,v
> retrieving revision 1.128
> diff -u -r1.128 ip_divert.c
> --- ip_divert.c	11 May 2007 10:20:50 -0000	1.128
> +++ ip_divert.c	1 Aug 2007 22:16:56 -0000
> @@ -305,6 +305,7 @@
>  	struct m_tag *mtag;
>  	struct divert_tag *dt;
>  	int error = 0;
> +	struct mbuf *clone;
>  
>  	/*
>  	 * An mbuf may hasn't come from userland, but we pretend
> @@ -373,15 +374,39 @@
>  #ifdef MAC
>  			mac_create_mbuf_from_inpcb(inp, m);
>  #endif
> -			error = ip_output(m,
> -				    inp->inp_options, NULL,
> -				    ((so->so_options & SO_DONTROUTE) ?
> -				    IP_ROUTETOIF : 0) |
> -				    IP_ALLOWBROADCAST | IP_RAWOUTPUT,
> -				    inp->inp_moptions, NULL);
> +			/*
> +			 * Get ready to inject the packet into ip_output().
> +			 * Just in case socket options were specified on the
> +			 * divert socket, we duplicate them.  This is done
> +			 * to avoid having to hold the PCB locks over the call
> +			 * to ip_output(), as doing this results in a number of
> +			 * lock ordering complexities.
> +			 *
> +			 * Note that we set the multicast options argument for
> +			 * ip_output() to NULL since it should be invariant that
> +			 * they are not present.
> +			 */
> +			KASSERT(inp->inp_moptions == NULL,
> +			    ("multicast options set on a divert socket"));
> +			clone = NULL;
> +			if (inp->inp_options != NULL) {
> +				clone = m_dup(inp->inp_options, M_DONTWAIT);
> +				if (clone == NULL)
> +					error = ENOBUFS;
> +			}
> +			INP_UNLOCK(inp);
> +			INP_INFO_WUNLOCK(&divcbinfo);
> +			if (error == ENOBUFS) {
> +				m_freem(m);
> +				return (error);
> +			}
> +			error = ip_output(m, clone, NULL,
> +			    ((so->so_options & SO_DONTROUTE) ?
> +			    IP_ROUTETOIF : 0) | IP_ALLOWBROADCAST |
> +			    IP_RAWOUTPUT, NULL, NULL);
> +			if (clone != NULL)
> +				m_freem(clone);
>  		}
> -		INP_UNLOCK(inp);
> -		INP_INFO_WUNLOCK(&divcbinfo);
>  	} else {
>  		dt->info |= IP_FW_DIVERT_LOOPBACK_FLAG;
>  		if (m->m_pkthdr.rcvif == NULL) {
> @@ -517,6 +542,34 @@
>  	return div_output(so, m, (struct sockaddr_in *)nam, control);
>  }
>  
> +static int
> +div_ctloutput(struct socket *so, struct sockopt *sopt)
> +{
> +
> +	/* Do not allow multicast options to be set on divert sockets. */
> +	switch (sopt->sopt_name) {
> +	case IP_MULTICAST_VIF:
> +	case IP_MULTICAST_IF:
> +	case IP_MULTICAST_TTL:
> +	case IP_MULTICAST_LOOP:
> +	case IP_ADD_MEMBERSHIP:
> +	case IP_ADD_SOURCE_MEMBERSHIP:
> +	case MCAST_JOIN_GROUP:
> +	case MCAST_JOIN_SOURCE_GROUP:
> +	case IP_DROP_MEMBERSHIP:
> +	case IP_DROP_SOURCE_MEMBERSHIP:
> +	case MCAST_LEAVE_GROUP:
> +	case MCAST_LEAVE_SOURCE_GROUP:
> +	case IP_BLOCK_SOURCE:
> +	case IP_UNBLOCK_SOURCE:
> +	case MCAST_BLOCK_SOURCE:
> +	case MCAST_UNBLOCK_SOURCE:
> +	case IP_MSFILTER:
> +		return (EOPNOTSUPP);
> +	}
> +	return (ip_ctloutput(so, sopt));
> +}
> +
>  void
>  div_ctlinput(int cmd, struct sockaddr *sa, void *vip)
>  {
> @@ -648,7 +701,7 @@
>  	.pr_flags =		PR_ATOMIC|PR_ADDR,
>  	.pr_input =		div_input,
>  	.pr_ctlinput =		div_ctlinput,
> -	.pr_ctloutput =		ip_ctloutput,
> +	.pr_ctloutput =		div_ctloutput,
>  	.pr_init =		div_init,
>  	.pr_usrreqs =		&div_usrreqs
>  };
> 
> 
> ------------------------------------------------------------------------
> 
> _______________________________________________
> freebsd-net at freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-net
> To unsubscribe, send any mail to "freebsd-net-unsubscribe at freebsd.org"



More information about the freebsd-net mailing list