divert and deadlock issues

Christian S.J. Peron csjp at FreeBSD.org
Wed Aug 1 22:26:30 UTC 2007


Group,

I've come up with a basic patch, here are the highlights as per our discussion:

- Check for the presence of socket options, if they are present duplicate
  them using m_dup(9)
- Drop the INP/INFO locks after duplication
- Activate ip_output() with the cloned mbuf (for socket options).  Also,
  set the multicast options to NULL
- Add div_cltoutput() to handle any calls to setsockopt(2) that might be
  changing multicast parameters.  If we see any multicast parameters,
  return EOPNOTSUPP (Operation Not Supported), otherwise wrap the call
  into ip_ctloutput() (as it was before).

One portion that is missing with rwatson's netisr change. I've done some very
basic testing on this end and things appear to work.  If this group is OK
with this patch, I would like to forward it off to current@ for some
potential testers and comment.

Thanks!

-- 
Christian S.J. Peron
csjp at FreeBSD.ORG
FreeBSD Committer
-------------- next part --------------
Index: ip_divert.c
===================================================================
RCS file: /usr/ncvs/src/sys/netinet/ip_divert.c,v
retrieving revision 1.128
diff -u -r1.128 ip_divert.c
--- ip_divert.c	11 May 2007 10:20:50 -0000	1.128
+++ ip_divert.c	1 Aug 2007 22:16:56 -0000
@@ -305,6 +305,7 @@
 	struct m_tag *mtag;
 	struct divert_tag *dt;
 	int error = 0;
+	struct mbuf *clone;
 
 	/*
 	 * An mbuf may hasn't come from userland, but we pretend
@@ -373,15 +374,39 @@
 #ifdef MAC
 			mac_create_mbuf_from_inpcb(inp, m);
 #endif
-			error = ip_output(m,
-				    inp->inp_options, NULL,
-				    ((so->so_options & SO_DONTROUTE) ?
-				    IP_ROUTETOIF : 0) |
-				    IP_ALLOWBROADCAST | IP_RAWOUTPUT,
-				    inp->inp_moptions, NULL);
+			/*
+			 * Get ready to inject the packet into ip_output().
+			 * Just in case socket options were specified on the
+			 * divert socket, we duplicate them.  This is done
+			 * to avoid having to hold the PCB locks over the call
+			 * to ip_output(), as doing this results in a number of
+			 * lock ordering complexities.
+			 *
+			 * Note that we set the multicast options argument for
+			 * ip_output() to NULL since it should be invariant that
+			 * they are not present.
+			 */
+			KASSERT(inp->inp_moptions == NULL,
+			    ("multicast options set on a divert socket"));
+			clone = NULL;
+			if (inp->inp_options != NULL) {
+				clone = m_dup(inp->inp_options, M_DONTWAIT);
+				if (clone == NULL)
+					error = ENOBUFS;
+			}
+			INP_UNLOCK(inp);
+			INP_INFO_WUNLOCK(&divcbinfo);
+			if (error == ENOBUFS) {
+				m_freem(m);
+				return (error);
+			}
+			error = ip_output(m, clone, NULL,
+			    ((so->so_options & SO_DONTROUTE) ?
+			    IP_ROUTETOIF : 0) | IP_ALLOWBROADCAST |
+			    IP_RAWOUTPUT, NULL, NULL);
+			if (clone != NULL)
+				m_freem(clone);
 		}
-		INP_UNLOCK(inp);
-		INP_INFO_WUNLOCK(&divcbinfo);
 	} else {
 		dt->info |= IP_FW_DIVERT_LOOPBACK_FLAG;
 		if (m->m_pkthdr.rcvif == NULL) {
@@ -517,6 +542,34 @@
 	return div_output(so, m, (struct sockaddr_in *)nam, control);
 }
 
+static int
+div_ctloutput(struct socket *so, struct sockopt *sopt)
+{
+
+	/* Do not allow multicast options to be set on divert sockets. */
+	switch (sopt->sopt_name) {
+	case IP_MULTICAST_VIF:
+	case IP_MULTICAST_IF:
+	case IP_MULTICAST_TTL:
+	case IP_MULTICAST_LOOP:
+	case IP_ADD_MEMBERSHIP:
+	case IP_ADD_SOURCE_MEMBERSHIP:
+	case MCAST_JOIN_GROUP:
+	case MCAST_JOIN_SOURCE_GROUP:
+	case IP_DROP_MEMBERSHIP:
+	case IP_DROP_SOURCE_MEMBERSHIP:
+	case MCAST_LEAVE_GROUP:
+	case MCAST_LEAVE_SOURCE_GROUP:
+	case IP_BLOCK_SOURCE:
+	case IP_UNBLOCK_SOURCE:
+	case MCAST_BLOCK_SOURCE:
+	case MCAST_UNBLOCK_SOURCE:
+	case IP_MSFILTER:
+		return (EOPNOTSUPP);
+	}
+	return (ip_ctloutput(so, sopt));
+}
+
 void
 div_ctlinput(int cmd, struct sockaddr *sa, void *vip)
 {
@@ -648,7 +701,7 @@
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		div_input,
 	.pr_ctlinput =		div_ctlinput,
-	.pr_ctloutput =		ip_ctloutput,
+	.pr_ctloutput =		div_ctloutput,
 	.pr_init =		div_init,
 	.pr_usrreqs =		&div_usrreqs
 };


More information about the freebsd-net mailing list