PERFORCE change 167211 for review

Marko Zec zec at FreeBSD.org
Tue Aug 11 14:38:04 UTC 2009


http://perforce.freebsd.org/chv.cgi?CH=167211

Change 167211 by zec at zec_tpx32 on 2009/08/11 14:37:08

	Merge in Julian's ip_divert V_ work for further testing.

Affected files ...

.. //depot/projects/vimage-commit2/src/sys/netinet/ip_divert.c#36 edit
.. //depot/projects/vimage-commit2/src/sys/netinet/ip_divert.h#2 edit

Differences ...

==== //depot/projects/vimage-commit2/src/sys/netinet/ip_divert.c#36 (text+ko) ====

@@ -129,10 +129,18 @@
  * Initialize divert connection block queue.
  */
 static void
-div_zone_change(void *tag)
+div_zone_change(struct vnet *vnet)
 {
+        VNET_ITERATOR_DECL(vnet_iter);
+
+        VNET_LIST_RLOCK_NOSLEEP();
+        VNET_FOREACH(vnet_iter) {
+                CURVNET_SET(vnet_iter);
+		uma_zone_set_max(V_divcbinfo.ipi_zone, maxsockets);
+                CURVNET_RESTORE();
+        }
+        VNET_LIST_RUNLOCK_NOSLEEP();	
 
-	uma_zone_set_max(V_divcbinfo.ipi_zone, maxsockets);
 }
 
 static int
@@ -152,32 +160,6 @@
 	INP_LOCK_DESTROY(inp);
 }
 
-void
-div_init(void)
-{
-
-	INP_INFO_LOCK_INIT(&V_divcbinfo, "div");
-	LIST_INIT(&V_divcb);
-	V_divcbinfo.ipi_listhead = &V_divcb;
-#ifdef VIMAGE
-	V_divcbinfo.ipi_vnet = curvnet;
-#endif
-	/*
-	 * XXX We don't use the hash list for divert IP, but it's easier
-	 * to allocate a one entry hash list than it is to check all
-	 * over the place for hashbase == NULL.
-	 */
-	V_divcbinfo.ipi_hashbase = hashinit(1, M_PCB, &V_divcbinfo.ipi_hashmask);
-	V_divcbinfo.ipi_porthashbase = hashinit(1, M_PCB,
-	    &V_divcbinfo.ipi_porthashmask);
-	V_divcbinfo.ipi_zone = uma_zcreate("divcb", sizeof(struct inpcb),
-	    NULL, NULL, div_inpcb_init, div_inpcb_fini, UMA_ALIGN_PTR,
-	    UMA_ZONE_NOFREE);
-	uma_zone_set_max(V_divcbinfo.ipi_zone, maxsockets);
-	EVENTHANDLER_REGISTER(maxsockets_change, div_zone_change,
-		NULL, EVENTHANDLER_PRI_ANY);
-}
-
 /*
  * IPPROTO_DIVERT is not in the real IP protocol number space; this
  * function should never be called.  Just in case, drop any packets.
@@ -524,7 +506,8 @@
 div_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct inpcb *inp;
-	int error;
+	int error = 0;
+	struct in_addr laddr;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("div_bind: inp == NULL"));
@@ -535,12 +518,32 @@
 	 * XXX -- divert should not be abusing in_pcbind
 	 * and should probably have its own family.
 	 */
+        if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
+                return (EINVAL);
+	/* possibly not needed.. but play safe */
+        inp->inp_fport = 0;
+	inp->inp_faddr.s_addr = INADDR_ANY;
+
 	if (nam->sa_family != AF_INET)
 		return EAFNOSUPPORT;
+
+	laddr.s_addr = INADDR_ANY;
+	/* just to be sure, since the man page says it is ignored. */
 	((struct sockaddr_in *)nam)->sin_addr.s_addr = INADDR_ANY;
+
 	INP_INFO_WLOCK(&V_divcbinfo);
 	INP_WLOCK(inp);
-	error = in_pcbbind(inp, nam, td->td_ucred);
+	if (in_pcblookup_local(&V_divcbinfo, laddr, 
+	    ((struct sockaddr_in *)nam)->sin_port, 0/*not wild ok */,
+	    td->td_ucred)) {
+		error = EADDRNOTAVAIL;
+	} else {
+        	inp->inp_lport = ((struct sockaddr_in *)nam)->sin_port;
+		if (in_pcbinshash(inp) != 0) {
+			inp->inp_lport = 0;
+			error = EAGAIN;
+		}
+	}
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&V_divcbinfo);
 	return error;
@@ -708,25 +711,20 @@
 	.pr_input =		div_input,
 	.pr_ctlinput =		div_ctlinput,
 	.pr_ctloutput =		ip_ctloutput,
-	.pr_init =		div_init,
+	.pr_init =		NULL,
 	.pr_usrreqs =		&div_usrreqs
 };
 
+static int div_loaded = 0;
+static eventhandler_tag div_evh_tag;
 static int
 div_modevent(module_t mod, int type, void *unused)
 {
 	int err = 0;
-	int n;
 
 	switch (type) {
 	case MOD_LOAD:
-		/*
-		 * Protocol will be initialized by pf_proto_register().
-		 * We don't have to register ip_protox because we are not
-		 * a true IP protocol that goes over the wire.
-		 */
-		err = pf_proto_register(PF_INET, &div_protosw);
-		ip_divert_ptr = divert_packet;
+
 		break;
 	case MOD_QUIESCE:
 		/*
@@ -737,29 +735,6 @@
 		err = EPERM;
 		break;
 	case MOD_UNLOAD:
-		/*
-		 * Forced unload.
-		 *
-		 * Module ipdivert can only be unloaded if no sockets are
-		 * connected.  Maybe this can be changed later to forcefully
-		 * disconnect any open sockets.
-		 *
-		 * XXXRW: Note that there is a slight race here, as a new
-		 * socket open request could be spinning on the lock and then
-		 * we destroy the lock.
-		 */
-		INP_INFO_WLOCK(&V_divcbinfo);
-		n = V_divcbinfo.ipi_count;
-		if (n != 0) {
-			err = EBUSY;
-			INP_INFO_WUNLOCK(&V_divcbinfo);
-			break;
-		}
-		ip_divert_ptr = NULL;
-		err = pf_proto_unregister(PF_INET, IPPROTO_DIVERT, SOCK_RAW);
-		INP_INFO_WUNLOCK(&V_divcbinfo);
-		INP_INFO_LOCK_DESTROY(&V_divcbinfo);
-		uma_zdestroy(V_divcbinfo.ipi_zone);
 		break;
 	default:
 		err = EOPNOTSUPP;
@@ -774,6 +749,125 @@
         0
 };
 
-DECLARE_MODULE(ipdivert, ipdivertmod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
+/* init on boot or module load */
+static void 
+div_init(void)
+{
+	int err;
+
+	/*
+ 	 * Protocol will be initialized by pf_proto_register().
+ 	 * We don't have to register ip_protox because we are not
+ 	 * a true IP protocol that goes over the wire.
+ 	 */
+	err = pf_proto_register(PF_INET, &div_protosw);
+	if (err == 0) {
+		ip_divert_ptr = divert_packet;
+		div_evh_tag =
+		    EVENTHANDLER_REGISTER(maxsockets_change, div_zone_change,
+		            NULL, EVENTHANDLER_PRI_ANY);
+		div_loaded = 1;
+	}
+	return;
+}
+
+/****************
+ * Stuff that must be initialized for every instance
+ * (including the first of course).
+ */
+static int
+div_vnet_init(const void *unused)
+{
+	if (div_loaded == 0)
+		return (0);
+	INP_INFO_LOCK_INIT(&V_divcbinfo, "div");
+	LIST_INIT(&V_divcb);
+	V_divcbinfo.ipi_listhead = &V_divcb;
+#ifdef VIMAGE
+	V_divcbinfo.ipi_vnet = curvnet;
+#endif
+	/*
+	 * XXX We don't use the hash list for divert IP, but it's easier
+	 * to allocate a one entry hash list than it is to check all
+	 * over the place for hashbase == NULL.
+	 */
+	V_divcbinfo.ipi_hashbase = hashinit(1, M_PCB, &V_divcbinfo.ipi_hashmask);
+	V_divcbinfo.ipi_porthashbase = hashinit(1, M_PCB,
+	    &V_divcbinfo.ipi_porthashmask);
+	V_divcbinfo.ipi_zone = uma_zcreate("divcb", sizeof(struct inpcb),
+	    NULL, NULL, div_inpcb_init, div_inpcb_fini, UMA_ALIGN_PTR,
+	    UMA_ZONE_NOFREE);
+	uma_zone_set_max(V_divcbinfo.ipi_zone, maxsockets);
+	return (0);
+}
+
+/**********************
+ * Called for the removal of the last instance only on module unload.
+ */
+static void
+div_uninit(void)
+{
+	int err;
+	
+	if (div_loaded == 0)
+		return;
+	div_loaded = 0;
+	ip_divert_ptr = NULL;
+	EVENTHANDLER_DEREGISTER(maxsockets_change, div_evh_tag);
+	err = pf_proto_unregister(PF_INET, IPPROTO_DIVERT, SOCK_RAW);
+}
+
+/***********************
+ * Called for the removal of each instance.
+ */
+static int
+div_vnet_uninit(const void *unused)
+{
+	int err = 0;
+	int n;
+
+	if (div_loaded == 0)
+		return (0);
+	/*
+	 * Forced unload.
+	 *
+	 * Module ipdivert can only be unloaded if no sockets are
+	 * connected.  Maybe this can be changed later to forcefully
+	 * disconnect any open sockets.
+	 *
+	 * XXXRW: Note that there is a slight race here, as a new
+	 * socket open request could be spinning on the lock and then
+	 * we destroy the lock.
+	 */
+	INP_INFO_WLOCK(&V_divcbinfo);
+	n = V_divcbinfo.ipi_count;
+	INP_INFO_WUNLOCK(&V_divcbinfo);
+	if (n != 0) {
+		err = EBUSY;
+	} else {
+		INP_INFO_LOCK_DESTROY(&V_divcbinfo);
+		uma_zdestroy(V_divcbinfo.ipi_zone);
+	}
+	return (err);
+}
+
+
+#define DIV_MAJOR_ORDER     SI_SUB_PROTO_IFATTACHDOMAIN
+#define DIV_MODULE_ORDER       (SI_ORDER_ANY + 64)
+#define DIV_SYSINIT_ORDER      (DIV_MODULE_ORDER  + 1)
+#define DIV_VNET_ORDER         (DIV_SYSINIT_ORDER + 1 )
+
+DECLARE_MODULE(ipdivert, ipdivertmod, DIV_MAJOR_ORDER, DIV_MODULE_ORDER);
 MODULE_DEPEND(dummynet, ipfw, 2, 2, 2);
 MODULE_VERSION(ipdivert, 1);
+
+SYSINIT(div_init, DIV_MAJOR_ORDER, DIV_SYSINIT_ORDER,
+    div_init, NULL);
+SYSUNINIT(div_uninit, DIV_MAJOR_ORDER, DIV_SYSINIT_ORDER,
+    div_uninit, NULL);
+
+VNET_SYSINIT(div_vnet_init, DIV_MAJOR_ORDER, DIV_VNET_ORDER,
+    div_vnet_init, NULL);
+VNET_SYSUNINIT(div_vnet_uninit, DIV_MAJOR_ORDER, DIV_VNET_ORDER,
+    div_vnet_uninit, NULL);
+

==== //depot/projects/vimage-commit2/src/sys/netinet/ip_divert.h#2 (text+ko) ====

@@ -83,7 +83,6 @@
 typedef	void ip_divert_packet_t(struct mbuf *m, int incoming);
 extern	ip_divert_packet_t *ip_divert_ptr;
 
-extern	void div_init(void);
 extern	void div_input(struct mbuf *, int);
 extern	void div_ctlinput(int, struct sockaddr *, void *);
 #endif /* _NETINET_IP_DIVERT_H_ */


More information about the p4-projects mailing list