PERFORCE change 176829 for review

Marko Zec zec at FreeBSD.org
Mon Apr 12 11:18:40 UTC 2010


http://p4web.freebsd.org/@@176829?ac=10

Change 176829 by zec at zec_tpx32 on 2010/04/12 11:18:05

	Remove a crude hack which once upon the time allowed one to
	directly connect a bpf tap to an ifnet in a remote vnet, but
	which stopped working a year ago due to some strange locking
	issues related to jail-vnet integration.
	
	This was never intended to be commited to head, and a patch
	has been commited to IMUNES which alows us to run wireshark
	in a remote vnet without this crap, so there's no point in
	keeping this dead code here any longer.
	
	Moreover, back out now obsolete V_irtualization patches
	for dummynet that anchie@ originally started developing back
	in September, but which were never fully completed, and which
	now collide with the new version of dummynet in the main tree.

Affected files ...

.. //depot/projects/vimage/src/sys/net/bpf.c#45 edit
.. //depot/projects/vimage/src/sys/netinet/ipfw/ip_dummynet.c#6 edit

Differences ...

==== //depot/projects/vimage/src/sys/net/bpf.c#45 (text+ko) ====

@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/net/bpf.c,v 1.219 2010/02/20 00:19:21 jkim Exp $");
+__FBSDID("$FreeBSD: head/sys/net/bpf.c 205858 2010-03-29 20:24:03Z jkim $");
 
 #include "opt_bpf.h"
 #include "opt_netgraph.h"
@@ -44,7 +44,6 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
-#include <sys/ctype.h>
 #include <sys/fcntl.h>
 #include <sys/jail.h>
 #include <sys/malloc.h>
@@ -615,6 +614,7 @@
 	mac_bpfdesc_destroy(d);
 #endif /* MAC */
 	knlist_destroy(&d->bd_sel.si_note);
+	callout_drain(&d->bd_callout);
 	bpf_freed(d);
 	free(d, M_BPF);
 }
@@ -652,7 +652,7 @@
 	mac_bpfdesc_create(td->td_ucred, d);
 #endif
 	mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
-	callout_init(&d->bd_callout, CALLOUT_MPSAFE);
+	callout_init_mtx(&d->bd_callout, &d->bd_mtx, 0);
 	knlist_init_mtx(&d->bd_sel.si_note, &d->bd_mtx);
 
 	return (0);
@@ -808,13 +808,15 @@
 {
 	struct bpf_d *d = (struct bpf_d *)arg;
 
-	BPFD_LOCK(d);
+	BPFD_LOCK_ASSERT(d);
+
+	if (callout_pending(&d->bd_callout) || !callout_active(&d->bd_callout))
+		return;
 	if (d->bd_state == BPF_WAITING) {
 		d->bd_state = BPF_TIMED_OUT;
 		if (d->bd_slen != 0)
 			bpf_wakeup(d);
 	}
-	BPFD_UNLOCK(d);
 }
 
 static int
@@ -1448,34 +1450,9 @@
 	struct bpf_if *bp;
 	struct ifnet *theywant;
 
-#define XVNET_BPF_SNOOPING
-#if defined(VIMAGE) && defined(XVNET_BPF_SNOOPING)
-	struct vnet *target_vnet = curvnet;
-	char *c;
-
-	/* Attempt to attach to an ifnet in a foreign vnet, specified as @ */
-	c = rindex(ifr->ifr_name, '@');
-	if ( c != NULL ) {
-		struct prison *target_pr;
-
-		*c++ = 0;
-		if (!isascii(*c) && !isdigit(*c))
-			return ENXIO;
-		target_pr = prison_find_name(curthread->td_ucred->cr_prison, c);
-		if (target_pr == NULL)
-			return ENXIO;
-		target_vnet = target_pr->pr_vnet;
-	}
-	CURVNET_SET_QUIET(target_vnet);
-#endif
-
 	theywant = ifunit(ifr->ifr_name);
-	if (theywant == NULL || theywant->if_bpf == NULL) {
-#if defined(VIMAGE) && defined(XVNET_BPF_SNOOPING)
-		CURVNET_RESTORE();
-#endif
+	if (theywant == NULL || theywant->if_bpf == NULL)
 		return (ENXIO);
-	}
 
 	bp = theywant->if_bpf;
 
@@ -1515,9 +1492,6 @@
 	BPFD_LOCK(d);
 	reset_d(d);
 	BPFD_UNLOCK(d);
-#if defined(VIMAGE) && defined(XVNET_BPF_SNOOPING)
-	CURVNET_RESTORE();
-#endif
 	return (0);
 }
 
@@ -1606,8 +1580,7 @@
 		kn->kn_data = d->bd_slen;
 		if (d->bd_hbuf)
 			kn->kn_data += d->bd_hlen;
-	}
-	else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
+	} else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
 		callout_reset(&d->bd_callout, d->bd_rtout,
 		    bpf_timed_out, d);
 		d->bd_state = BPF_WAITING;
@@ -1894,13 +1867,14 @@
 	 * free.
 	 */
 	bpf_free(d);
-	if (d->bd_rfilter) {
+	if (d->bd_rfilter != NULL) {
 		free((caddr_t)d->bd_rfilter, M_BPF);
 #ifdef BPF_JITTER
-		bpf_destroy_jit_filter(d->bd_bfilter);
+		if (d->bd_bfilter != NULL)
+			bpf_destroy_jit_filter(d->bd_bfilter);
 #endif
 	}
-	if (d->bd_wfilter)
+	if (d->bd_wfilter != NULL)
 		free((caddr_t)d->bd_wfilter, M_BPF);
 	mtx_destroy(&d->bd_mtx);
 }

==== //depot/projects/vimage/src/sys/netinet/ipfw/ip_dummynet.c#6 (text+ko) ====

@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 1998-2002 Luigi Rizzo, Universita` di Pisa
+ * Copyright (c) 1998-2002,2010 Luigi Rizzo, Universita` di Pisa
  * Portions Copyright (c) 2000 Akamba Corp.
  * All rights reserved
  *
@@ -26,36 +26,14 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/netinet/ipfw/ip_dummynet.c,v 1.5 2009/06/24 22:57:07 oleg Exp $");
+__FBSDID("$FreeBSD: head/sys/netinet/ipfw/ip_dummynet.c 206428 2010-04-09 18:02:19Z luigi $");
 
-#define	DUMMYNET_DEBUG
+/*
+ * Configuration and internal object management for dummynet.
+ */
 
 #include "opt_inet6.h"
 
-/*
- * This module implements IP dummynet, a bandwidth limiter/delay emulator
- * used in conjunction with the ipfw package.
- * Description of the data structures used is in ip_dummynet.h
- * Here you mainly find the following blocks of code:
- *  + variable declarations;
- *  + heap management functions;
- *  + scheduler and dummynet functions;
- *  + configuration and initialization.
- *
- * NOTA BENE: critical sections are protected by the "dummynet lock".
- *
- * Most important Changes:
- *
- * 011004: KLDable
- * 010124: Fixed WF2Q behaviour
- * 010122: Fixed spl protection.
- * 000601: WF2Q support
- * 000106: large rewrite, use heaps to handle very many pipes.
- * 980513:	initial release
- *
- * include files marked with XXX are probably not needed
- */
-
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
@@ -69,2258 +47,2147 @@
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/time.h>
-#include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 #include <net/if.h>	/* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
-#include <net/netisr.h>
-#include <net/vnet.h>
 #include <netinet/in.h>
-#include <netinet/ip.h>		/* ip_len, ip_off */
+#include <netinet/ip_var.h>	/* ip_output(), IP_FORWARDING */
 #include <netinet/ip_fw.h>
+#include <netinet/ipfw/ip_fw_private.h>
+#include <netinet/ipfw/dn_heap.h>
 #include <netinet/ip_dummynet.h>
-#include <netinet/ip_var.h>	/* ip_output(), IP_FORWARDING */
+#include <netinet/ipfw/ip_dn_private.h>
+#include <netinet/ipfw/dn_sched.h>
 
-#include <netinet/if_ether.h> /* various ether_* routines */
+/* which objects to copy */
+#define DN_C_LINK 	0x01
+#define DN_C_SCH	0x02
+#define DN_C_FLOW	0x04
+#define DN_C_FS		0x08
+#define DN_C_QUEUE	0x10
 
-#include <netinet/ip6.h>       /* for ip6_input, ip6_output prototypes */
-#include <netinet6/ip6_var.h>
+/* we use this argument in case of a schk_new */
+struct schk_new_arg {
+	struct dn_alg *fp;
+	struct dn_sch *sch;
+};
 
-/*
- * We keep a private variable for the simulation time, but we could
- * probably use an existing one ("softticks" in sys/kern/kern_timeout.c)
- */
-static VNET_DEFINE(dn_key, curr_time) = 0 ; /* current simulation time */
-#define	V_curr_time		VNET(curr_time)
+/*---- callout hooks. ----*/
+static struct callout dn_timeout;
+static struct task	dn_task;
+static struct taskqueue	*dn_tq = NULL;
 
-static VNET_DEFINE(int, dn_hash_size) = 64 ;	/* default hash size */
-#define	V_dn_hash_size		VNET(dn_hash_size)
+static void
+dummynet(void * __unused unused)
+{
 
-/* statistics on number of queue searches and search steps */
-static VNET_DEFINE(long, searches);
-static VNET_DEFINE(long, search_steps);
-static VNET_DEFINE(int, pipe_expire) = 1 ;   /* expire queue if empty */
-static VNET_DEFINE(int, dn_max_ratio) = 16 ; /* max queues/buckets ratio */
-#define	V_searches		VNET(searches)
-#define	V_search_steps		VNET(search_steps)
-#define	V_pipe_expire		VNET(pipe_expire)
-#define	V_dn_max_ratio		VNET(dn_max_ratio)
+	taskqueue_enqueue(dn_tq, &dn_task);
+}
 
-static VNET_DEFINE(long, pipe_slot_limit) = 100; /* Foot shooting limit for pipe queues. */
-static VNET_DEFINE(long, pipe_byte_limit) = 1024 * 1024;
-#define V_pipe_slot_limit	VNET(pipe_slot_limit)
-#define V_pipe_byte_limit	VNET(pipe_byte_limit)
+void
+dn_reschedule(void)
+{
+	callout_reset(&dn_timeout, 1, dummynet, NULL);
+}
+/*----- end of callout hooks -----*/
 
-static VNET_DEFINE(int, red_lookup_depth) = 256;	/* RED - default lookup table depth */
-static VNET_DEFINE(int, red_avg_pkt_size) = 512;      /* RED - default medium packet size */
-static VNET_DEFINE(int, red_max_pkt_size) = 1500;     /* RED - default max packet size */
-#define	V_red_lookup_depth	VNET(red_lookup_depth)
-#define V_red_avg_pkt_size	VNET(red_avg_pkt_size)
-#define V_red_max_pkt_size	VNET(red_max_pkt_size)
+/* Return a scheduler descriptor given the type or name. */
+static struct dn_alg *
+find_sched_type(int type, char *name)
+{
+	struct dn_alg *d;
 
-static VNET_DEFINE(struct timeval, prev_t);
-static VNET_DEFINE(struct timeval, t);
-static VNET_DEFINE(long, tick_last);			/* Last tick duration (usec). */
-static VNET_DEFINE(long, tick_delta);			/* Last vs standard tick diff (usec). */
-static VNET_DEFINE(long, tick_delta_sum);		/* Accumulated tick difference (usec).*/
-static VNET_DEFINE(long, tick_adjustment);		/* Tick adjustments done. */
-static VNET_DEFINE(long, tick_lost);			/* Lost(coalesced) ticks number. */
-/* Adjusted vs non-adjusted curr_time difference (ticks). */
-static VNET_DEFINE(long, tick_diff);
-#define	V_prev_t		VNET(prev_t)
-#define	V_t			VNET(t)
-#define	V_tick_last		VNET(tick_last)
-#define	V_tick_delta		VNET(tick_delta)
-#define	V_tick_delta_sum	VNET(tick_delta_sum)
-#define	V_tick_adjustment	VNET(tick_adjustment)
-#define	V_tick_lost		VNET(tick_lost)
-#define	V_tick_diff		VNET(tick_diff)
+	SLIST_FOREACH(d, &dn_cfg.schedlist, next) {
+		if (d->type == type || (name && !strcmp(d->name, name)))
+			return d;
+	}
+	return NULL; /* not found */
+}
 
-static VNET_DEFINE(int, io_fast);
-static VNET_DEFINE(unsigned long, io_pkt);
-static VNET_DEFINE(unsigned long, io_pkt_fast);
-static VNET_DEFINE(long, io_pkt_drop);
-#define V_io_fast		VNET(io_fast)
-#define	V_io_pkt		VNET(io_pkt)
-#define	V_io_pkt_fast		VNET(io_pkt_fast)
-#define	V_io_pkt_drop		VNET(io_pkt_drop)
+int
+ipdn_bound_var(int *v, int dflt, int lo, int hi, const char *msg)
+{
+	int oldv = *v;
+	const char *op = NULL;
+	if (oldv < lo) {
+		*v = dflt;
+		op = "Bump";
+	} else if (oldv > hi) {
+		*v = hi;
+		op = "Clamp";
+	} else
+		return *v;
+	if (op && msg)
+		printf("%s %s to %d (was %d)\n", op, msg, *v, oldv);
+	return *v;
+}
 
+/*---- flow_id mask, hash and compare functions ---*/
 /*
- * Three heaps contain queues and pipes that the scheduler handles:
- *
- * ready_heap contains all dn_flow_queue related to fixed-rate pipes.
- *
- * wfq_ready_heap contains the pipes associated with WF2Q flows
- *
- * extract_heap contains pipes associated with delay lines.
- *
+ * The flow_id includes the 5-tuple, the queue/pipe number
+ * which we store in the extra area in host order,
+ * and for ipv6 also the flow_id6.
+ * XXX see if we want the tos byte (can store in 'flags')
  */
+static struct ipfw_flow_id *
+flow_id_mask(struct ipfw_flow_id *mask, struct ipfw_flow_id *id)
+{
+	int is_v6 = IS_IP6_FLOW_ID(id);
 
-MALLOC_DEFINE(M_DUMMYNET, "dummynet", "dummynet heap");
+	id->dst_port &= mask->dst_port;
+	id->src_port &= mask->src_port;
+	id->proto &= mask->proto;
+	id->extra &= mask->extra;
+	if (is_v6) {
+		APPLY_MASK(&id->dst_ip6, &mask->dst_ip6);
+		APPLY_MASK(&id->src_ip6, &mask->src_ip6);
+		id->flow_id6 &= mask->flow_id6;
+	} else {
+		id->dst_ip &= mask->dst_ip;
+		id->src_ip &= mask->src_ip;
+	}
+	return id;
+}
 
-static VNET_DEFINE(struct dn_heap, ready_heap);
-static VNET_DEFINE(struct dn_heap, extract_heap);
-static VNET_DEFINE(struct dn_heap, wfq_ready_heap);
+/* computes an OR of two masks, result in dst and also returned */
+static struct ipfw_flow_id *
+flow_id_or(struct ipfw_flow_id *src, struct ipfw_flow_id *dst)
+{
+	int is_v6 = IS_IP6_FLOW_ID(dst);
 
-static int	heap_init(struct dn_heap *h, int size);
-static int	heap_insert (struct dn_heap *h, dn_key key1, void *p);
-static void	heap_extract(struct dn_heap *h, void *obj);
-static void	transmit_event(struct dn_pipe *pipe, struct mbuf **head,
-		    struct mbuf **tail);
-static void	ready_event(struct dn_flow_queue *q, struct mbuf **head,
-		    struct mbuf **tail);
-static void	ready_event_wfq(struct dn_pipe *p, struct mbuf **head,
-		    struct mbuf **tail);
+	dst->dst_port |= src->dst_port;
+	dst->src_port |= src->src_port;
+	dst->proto |= src->proto;
+	dst->extra |= src->extra;
+	if (is_v6) {
+#define OR_MASK(_d, _s)                          \
+    (_d)->__u6_addr.__u6_addr32[0] |= (_s)->__u6_addr.__u6_addr32[0]; \
+    (_d)->__u6_addr.__u6_addr32[1] |= (_s)->__u6_addr.__u6_addr32[1]; \
+    (_d)->__u6_addr.__u6_addr32[2] |= (_s)->__u6_addr.__u6_addr32[2]; \
+    (_d)->__u6_addr.__u6_addr32[3] |= (_s)->__u6_addr.__u6_addr32[3];
+		OR_MASK(&dst->dst_ip6, &src->dst_ip6);
+		OR_MASK(&dst->src_ip6, &src->src_ip6);
+#undef OR_MASK
+		dst->flow_id6 |= src->flow_id6;
+	} else {
+		dst->dst_ip |= src->dst_ip;
+		dst->src_ip |= src->src_ip;
+	}
+	return dst;
+}
 
-#define	HASHSIZE	16
-#define	HASH(num)	((((num) >> 8) ^ ((num) >> 4) ^ (num)) & 0x0f)
-static VNET_DEFINE(struct dn_pipe_head, pipehash[HASHSIZE]);	/* all pipes */
-static VNET_DEFINE(struct dn_flow_set_head, flowsethash[HASHSIZE]);	/* all flowsets */
+static int
+nonzero_mask(struct ipfw_flow_id *m)
+{
+	if (m->dst_port || m->src_port || m->proto || m->extra)
+		return 1;
+	if (IS_IP6_FLOW_ID(m)) {
+		return
+			m->dst_ip6.__u6_addr.__u6_addr32[0] ||
+			m->dst_ip6.__u6_addr.__u6_addr32[1] ||
+			m->dst_ip6.__u6_addr.__u6_addr32[2] ||
+			m->dst_ip6.__u6_addr.__u6_addr32[3] ||
+			m->src_ip6.__u6_addr.__u6_addr32[0] ||
+			m->src_ip6.__u6_addr.__u6_addr32[1] ||
+			m->src_ip6.__u6_addr.__u6_addr32[2] ||
+			m->src_ip6.__u6_addr.__u6_addr32[3] ||
+			m->flow_id6;
+	} else {
+		return m->dst_ip || m->src_ip;
+	}
+}
 
-static VNET_DEFINE(struct callout, dn_timeout);
+/* XXX we may want a better hash function */
+static uint32_t
+flow_id_hash(struct ipfw_flow_id *id)
+{
+    uint32_t i;
 
-extern	void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
+    if (IS_IP6_FLOW_ID(id)) {
+	uint32_t *d = (uint32_t *)&id->dst_ip6;
+	uint32_t *s = (uint32_t *)&id->src_ip6;
+        i = (d[0]      ) ^ (d[1])       ^
+            (d[2]      ) ^ (d[3])       ^
+            (d[0] >> 15) ^ (d[1] >> 15) ^
+            (d[2] >> 15) ^ (d[3] >> 15) ^
+            (s[0] <<  1) ^ (s[1] <<  1) ^
+            (s[2] <<  1) ^ (s[3] <<  1) ^
+            (s[0] << 16) ^ (s[1] << 16) ^
+            (s[2] << 16) ^ (s[3] << 16) ^
+            (id->dst_port << 1) ^ (id->src_port) ^
+	    (id->extra) ^
+            (id->proto ) ^ (id->flow_id6);
+    } else {
+        i = (id->dst_ip)        ^ (id->dst_ip >> 15) ^
+            (id->src_ip << 1)   ^ (id->src_ip >> 16) ^
+	    (id->extra) ^
+            (id->dst_port << 1) ^ (id->src_port)     ^ (id->proto);
+    }
+    return i;
+}
 
-#ifdef SYSCTL_NODE
-SYSCTL_DECL(_net_inet);
-SYSCTL_DECL(_net_inet_ip);
+/* Like bcmp, returns 0 if ids match, 1 otherwise. */
+static int
+flow_id_cmp(struct ipfw_flow_id *id1, struct ipfw_flow_id *id2)
+{
+	int is_v6 = IS_IP6_FLOW_ID(id1);
 
-SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet");
-SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size,
-    CTLFLAG_RW, &VNET_NAME(dn_hash_size), 0, "Default hash table size");
-#if 0	/* curr_time is 64 bit */
-SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, curr_time,
-    CTLFLAG_RD, &VNET_NAME(curr_time), 0, "Current tick");
-#endif
-SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, ready_heap,
-    CTLFLAG_RD, &VNET_NAME(ready_heap).size, 0, "Size of ready heap");
-SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, extract_heap,
-    CTLFLAG_RD, &VNET_NAME(extract_heap).size, 0, "Size of extract heap");
-SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, searches,
-    CTLFLAG_RD, &VNET_NAME(searches), 0, "Number of queue searches");
-SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, search_steps,
-    CTLFLAG_RD, &VNET_NAME(search_steps), 0, "Number of queue search steps");
-SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, expire,
-    CTLFLAG_RW, &VNET_NAME(pipe_expire), 0, "Expire queue if empty");
-SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, max_chain_len,
-    CTLFLAG_RW, &VNET_NAME(dn_max_ratio), 0,
-    "Max ratio between dynamic queues and buckets");
-SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth,
-    CTLFLAG_RD, &VNET_NAME(red_lookup_depth), 0, "Depth of RED lookup table");
-SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size,
-    CTLFLAG_RD, &VNET_NAME(red_avg_pkt_size), 0, "RED Medium packet size");
-SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size,
-    CTLFLAG_RD, &VNET_NAME(red_max_pkt_size), 0, "RED Max packet size");
-SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta,
-    CTLFLAG_RD, &VNET_NAME(tick_delta), 0, "Last vs standard tick difference (usec).");
-SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta_sum,
-    CTLFLAG_RD, &VNET_NAME(tick_delta_sum), 0, "Accumulated tick difference (usec).");
-SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_adjustment,
-    CTLFLAG_RD, &VNET_NAME(tick_adjustment), 0, "Tick adjustments done.");
-SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_diff,
-    CTLFLAG_RD, &VNET_NAME(tick_diff), 0,
-    "Adjusted vs non-adjusted curr_time difference (ticks).");
-SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_lost,
-    CTLFLAG_RD, &VNET_NAME(tick_lost), 0,
-    "Number of ticks coalesced by dummynet taskqueue.");
-SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, io_fast,
-    CTLFLAG_RW, &VNET_NAME(io_fast), 0, "Enable fast dummynet io.");
-SYSCTL_VNET_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt,
-    CTLFLAG_RD, &VNET_NAME(io_pkt), 0,
-    "Number of packets passed to dummynet.");
-SYSCTL_VNET_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_fast,
-    CTLFLAG_RD, &VNET_NAME(io_pkt_fast), 0,
-    "Number of packets bypassed dummynet scheduler.");
-SYSCTL_VNET_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_drop,
-    CTLFLAG_RD, &VNET_NAME(io_pkt_drop), 0,
-    "Number of packets dropped by dummynet.");
-SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, pipe_slot_limit,
-    CTLFLAG_RW, &VNET_NAME(pipe_slot_limit), 0, "Upper limit in slots for pipe queue.");
-SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, pipe_byte_limit,
-    CTLFLAG_RW, &VNET_NAME(pipe_byte_limit), 0, "Upper limit in bytes for pipe queue.");
-#endif
+	if (!is_v6) {
+	    if (IS_IP6_FLOW_ID(id2))
+		return 1; /* different address families */
 
-#ifdef DUMMYNET_DEBUG
-VNET_DEFINE(int, dummynet_debug) = 0;
-#ifdef SYSCTL_NODE
-SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, debug, CTLFLAG_RW, &VNET_NAME(dummynet_debug),
-	    0, "control debugging printfs");
-#endif
-#define	DPRINTF(X)	if (V_dummynet_debug) printf X
-#else
-#define	DPRINTF(X)
-#endif
+	    return (id1->dst_ip == id2->dst_ip &&
+		    id1->src_ip == id2->src_ip &&
+		    id1->dst_port == id2->dst_port &&
+		    id1->src_port == id2->src_port &&
+		    id1->proto == id2->proto &&
+		    id1->extra == id2->extra) ? 0 : 1;
+	}
+	/* the ipv6 case */
+	return (
+	    !bcmp(&id1->dst_ip6,&id2->dst_ip6, sizeof(id1->dst_ip6)) &&
+	    !bcmp(&id1->src_ip6,&id2->src_ip6, sizeof(id1->src_ip6)) &&
+	    id1->dst_port == id2->dst_port &&
+	    id1->src_port == id2->src_port &&
+	    id1->proto == id2->proto &&
+	    id1->extra == id2->extra &&
+	    id1->flow_id6 == id2->flow_id6) ? 0 : 1;
+}
+/*--------- end of flow-id mask, hash and compare ---------*/
 
-static VNET_DEFINE(struct task, dn_task);
-static VNET_DEFINE(struct taskqueue *, dn_tq) = NULL;
-static void dummynet_task(void *, int);
+/*--- support functions for the qht hashtable ----
+ * Entries are hashed by flow-id
+ */
+static uint32_t
+q_hash(uintptr_t key, int flags, void *arg)
+{
+	/* compute the hash slot from the flow id */
+	struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ?
+		&((struct dn_queue *)key)->ni.fid :
+		(struct ipfw_flow_id *)key;
 
-static struct mtx dummynet_mtx;
-#define	DUMMYNET_LOCK_INIT() \
-	mtx_init(&dummynet_mtx, "dummynet", NULL, MTX_DEF)
-#define	DUMMYNET_LOCK_DESTROY()	mtx_destroy(&dummynet_mtx)
-#define	DUMMYNET_LOCK()		mtx_lock(&dummynet_mtx)
-#define	DUMMYNET_UNLOCK()	mtx_unlock(&dummynet_mtx)
-#define	DUMMYNET_LOCK_ASSERT()	mtx_assert(&dummynet_mtx, MA_OWNED)
+	return flow_id_hash(id);
+}
 
-static int	config_pipe(struct dn_pipe *p);
-static int	ip_dn_ctl(struct sockopt *sopt);
+static int
+q_match(void *obj, uintptr_t key, int flags, void *arg)
+{
+	struct dn_queue *o = (struct dn_queue *)obj;
+	struct ipfw_flow_id *id2;
 
-static void	dummynet(void *);
-static void	dummynet_flush(void);
-static void	dummynet_send(struct mbuf *);
-void		dummynet_drain(void);
-static int	dummynet_io(struct mbuf **, int , struct ip_fw_args *);
+	if (flags & DNHT_KEY_IS_OBJ) {
+		/* compare pointers */
+		id2 = &((struct dn_queue *)key)->ni.fid;
+	} else {
+		id2 = (struct ipfw_flow_id *)key;
+	}
+	return (0 == flow_id_cmp(&o->ni.fid,  id2));
+}
 
 /*
- * Heap management functions.
- *
- * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2.
- * Some macros help finding parent/children so we can optimize them.
- *
- * heap_init() is called to expand the heap when needed.
- * Increment size in blocks of 16 entries.
- * XXX failure to allocate a new element is a pretty bad failure
- * as we basically stall a whole queue forever!!
- * Returns 1 on error, 0 on success
+ * create a new queue instance for the given 'key'.
  */
-#define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 )
-#define HEAP_LEFT(x) ( 2*(x) + 1 )
-#define HEAP_IS_LEFT(x) ( (x) & 1 )
-#define HEAP_RIGHT(x) ( 2*(x) + 2 )
-#define	HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; }
-#define HEAP_INCREMENT	15
+static void *
+q_new(uintptr_t key, int flags, void *arg)
+{   
+	struct dn_queue *q, *template = arg;
+	struct dn_fsk *fs = template->fs;
+	int size = sizeof(*q) + fs->sched->fp->q_datalen;
+
+	q = malloc(size, M_DUMMYNET, M_NOWAIT | M_ZERO);
+	if (q == NULL) {
+		D("no memory for new queue");
+		return NULL;
+	}
 
-static int
-heap_init(struct dn_heap *h, int new_size)
-{
-    struct dn_heap_entry *p;
+	set_oid(&q->ni.oid, DN_QUEUE, size);
+	if (fs->fs.flags & DN_QHT_HASH)
+		q->ni.fid = *(struct ipfw_flow_id *)key;
+	q->fs = fs;
+	q->_si = template->_si;
+	q->_si->q_count++;
 
-    if (h->size >= new_size ) {
-	printf("dummynet: %s, Bogus call, have %d want %d\n", __func__,
-		h->size, new_size);
-	return 0 ;
-    }
-    new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT ;
-    p = malloc(new_size * sizeof(*p), M_DUMMYNET, M_NOWAIT);
-    if (p == NULL) {
-	printf("dummynet: %s, resize %d failed\n", __func__, new_size );
-	return 1 ; /* error */
-    }
-    if (h->size > 0) {
-	bcopy(h->p, p, h->size * sizeof(*p) );
-	free(h->p, M_DUMMYNET);
-    }
-    h->p = p ;
-    h->size = new_size ;
-    return 0 ;
+	if (fs->sched->fp->new_queue)
+		fs->sched->fp->new_queue(q);
+	dn_cfg.queue_count++;
+	return q;
 }
 
 /*
- * Insert element in heap. Normally, p != NULL, we insert p in
- * a new position and bubble up. If p == NULL, then the element is
- * already in place, and key is the position where to start the
- * bubble-up.
- * Returns 1 on failure (cannot allocate new heap entry)
- *
- * If offset > 0 the position (index, int) of the element in the heap is
- * also stored in the element itself at the given offset in bytes.
+ * Notify schedulers that a queue is going away.
+ * If (flags & DN_DESTROY), also free the packets.
+ * The version for callbacks is called q_delete_cb().
  */
-#define SET_OFFSET(heap, node) \
-    if (heap->offset > 0) \
-	    *((int *)((char *)(heap->p[node].object) + heap->offset)) = node ;
-/*
- * RESET_OFFSET is used for sanity checks. It sets offset to an invalid value.
- */
-#define RESET_OFFSET(heap, node) \
-    if (heap->offset > 0) \
-	    *((int *)((char *)(heap->p[node].object) + heap->offset)) = -1 ;
-static int
-heap_insert(struct dn_heap *h, dn_key key1, void *p)
+static void
+dn_delete_queue(struct dn_queue *q, int flags)
 {
-    int son = h->elements ;
+	struct dn_fsk *fs = q->fs;
 
-    if (p == NULL)	/* data already there, set starting point */
-	son = key1 ;
-    else {		/* insert new element at the end, possibly resize */
-	son = h->elements ;
-	if (son == h->size) /* need resize... */
-	    if (heap_init(h, h->elements+1) )
-		return 1 ; /* failure... */
-	h->p[son].object = p ;
-	h->p[son].key = key1 ;
-	h->elements++ ;
-    }
-    while (son > 0) {				/* bubble up */
-	int father = HEAP_FATHER(son) ;
-	struct dn_heap_entry tmp  ;
+	// D("fs %p si %p\n", fs, q->_si);
+	/* notify the parent scheduler that the queue is going away */
+	if (fs && fs->sched->fp->free_queue)
+		fs->sched->fp->free_queue(q);
+	q->_si->q_count--;
+	q->_si = NULL;
+	if (flags & DN_DESTROY) {
+		if (q->mq.head)
+			dn_free_pkts(q->mq.head);
+		bzero(q, sizeof(*q));	// safety
+		free(q, M_DUMMYNET);
+		dn_cfg.queue_count--;
+	}
+}
 
-	if (DN_KEY_LT( h->p[father].key, h->p[son].key ) )
-	    break ; /* found right position */
-	/* son smaller than father, swap and repeat */
-	HEAP_SWAP(h->p[son], h->p[father], tmp) ;
-	SET_OFFSET(h, son);
-	son = father ;
-    }
-    SET_OFFSET(h, son);
-    return 0 ;
+static int
+q_delete_cb(void *q, void *arg)
+{
+	int flags = (int)(uintptr_t)arg;
+	dn_delete_queue(q, flags);
+	return (flags & DN_DESTROY) ? DNHT_SCAN_DEL : 0;
 }
 
 /*
- * remove top element from heap, or obj if obj != NULL
+ * calls dn_delete_queue/q_delete_cb on all queues,
+ * which notifies the parent scheduler and possibly drains packets.
+ * flags & DN_DESTROY: drains queues and destroy qht;
  */
 static void
-heap_extract(struct dn_heap *h, void *obj)
+qht_delete(struct dn_fsk *fs, int flags)
 {
-    int child, father, max = h->elements - 1 ;
-
-    if (max < 0) {
-	printf("dummynet: warning, extract from empty heap 0x%p\n", h);
-	return ;
-    }
-    father = 0 ; /* default: move up smallest child */
-    if (obj != NULL) { /* extract specific element, index is at offset */
-	if (h->offset <= 0)
-	    panic("dummynet: heap_extract from middle not supported on this heap!!!\n");
-	father = *((int *)((char *)obj + h->offset)) ;
-	if (father < 0 || father >= h->elements) {
-	    printf("dummynet: heap_extract, father %d out of bound 0..%d\n",
-		father, h->elements);
-	    panic("dummynet: heap_extract");
+	ND("fs %d start flags %d qht %p",
+		fs->fs.fs_nr, flags, fs->qht);
+	if (!fs->qht)
+		return;
+	if (fs->fs.flags & DN_QHT_HASH) {
+		dn_ht_scan(fs->qht, q_delete_cb, (void *)(uintptr_t)flags);
+		if (flags & DN_DESTROY) {
+			dn_ht_free(fs->qht, 0);
+			fs->qht = NULL;
+		}
+	} else {
+		dn_delete_queue((struct dn_queue *)(fs->qht), flags);
+		if (flags & DN_DESTROY)
+			fs->qht = NULL;
 	}
-    }
-    RESET_OFFSET(h, father);
-    child = HEAP_LEFT(father) ;		/* left child */
-    while (child <= max) {		/* valid entry */
-	if (child != max && DN_KEY_LT(h->p[child+1].key, h->p[child].key) )
-	    child = child+1 ;		/* take right child, otherwise left */
-	h->p[father] = h->p[child] ;
-	SET_OFFSET(h, father);
-	father = child ;
-	child = HEAP_LEFT(child) ;   /* left child for next loop */
-    }
-    h->elements-- ;
-    if (father != max) {
-	/*
-	 * Fill hole with last entry and bubble up, reusing the insert code
-	 */
-	h->p[father] = h->p[max] ;
-	heap_insert(h, father, NULL); /* this one cannot fail */
-    }
 }
 
-#if 0
 /*
- * change object position and update references
- * XXX this one is never used!
+ * Find and possibly create the queue for a MULTIQUEUE scheduler.
+ * We never call it for !MULTIQUEUE (the queue is in the sch_inst).
  */
-static void
-heap_move(struct dn_heap *h, dn_key new_key, void *object)
+struct dn_queue *
+ipdn_q_find(struct dn_fsk *fs, struct dn_sch_inst *si,
+	struct ipfw_flow_id *id)
 {
-    int temp;
-    int i ;
-    int max = h->elements-1 ;
-    struct dn_heap_entry buf ;
+	struct dn_queue template;
 
-    if (h->offset <= 0)
-	panic("cannot move items on this heap");
+	template._si = si;
+	template.fs = fs;
 
-    i = *((int *)((char *)object + h->offset));
-    if (DN_KEY_LT(new_key, h->p[i].key) ) { /* must move up */
-	h->p[i].key = new_key ;
-	for (; i>0 && DN_KEY_LT(new_key, h->p[(temp = HEAP_FATHER(i))].key) ;
-		 i = temp ) { /* bubble up */
-	    HEAP_SWAP(h->p[i], h->p[temp], buf) ;
-	    SET_OFFSET(h, i);
-	}
-    } else {		/* must move down */
-	h->p[i].key = new_key ;
-	while ( (temp = HEAP_LEFT(i)) <= max ) { /* found left child */
-	    if ((temp != max) && DN_KEY_GT(h->p[temp].key, h->p[temp+1].key))
-		temp++ ; /* select child with min key */
-	    if (DN_KEY_GT(new_key, h->p[temp].key)) { /* go down */
-		HEAP_SWAP(h->p[i], h->p[temp], buf) ;
-		SET_OFFSET(h, i);
-	    } else
-		break ;
-	    i = temp ;
+	if (fs->fs.flags & DN_QHT_HASH) {
+		struct ipfw_flow_id masked_id;
+		if (fs->qht == NULL) {
+			fs->qht = dn_ht_init(NULL, fs->fs.buckets,
+				offsetof(struct dn_queue, q_next),
+				q_hash, q_match, q_new);
+			if (fs->qht == NULL)
+				return NULL;
+		}
+		masked_id = *id;
+		flow_id_mask(&fs->fsk_mask, &masked_id);
+		return dn_ht_find(fs->qht, (uintptr_t)&masked_id,
+			DNHT_INSERT, &template);
+	} else {
+		if (fs->qht == NULL)
+			fs->qht = q_new(0, 0, &template);
+		return (struct dn_queue *)fs->qht;
 	}
-    }
-    SET_OFFSET(h, i);
 }
-#endif /* heap_move, unused */
+/*--- end of queue hash table ---*/
 
-/*
- * heapify() will reorganize data inside an array to maintain the
- * heap property. It is needed when we delete a bunch of entries.
+/*--- support functions for the sch_inst hashtable ----
+ *
+ * These are hashed by flow-id
  */
-static void
-heapify(struct dn_heap *h)
+static uint32_t
+si_hash(uintptr_t key, int flags, void *arg)
 {
-    int i ;
+	/* compute the hash slot from the flow id */
+	struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ?
+		&((struct dn_sch_inst *)key)->ni.fid :
+		(struct ipfw_flow_id *)key;
 
-    for (i = 0 ; i < h->elements ; i++ )
-	heap_insert(h, i , NULL) ;
+	return flow_id_hash(id);
 }
 
-/*
- * cleanup the heap and free data structure
- */
-static void
-heap_free(struct dn_heap *h)
+static int
+si_match(void *obj, uintptr_t key, int flags, void *arg)
 {
-    if (h->size >0 )
-	free(h->p, M_DUMMYNET);
-    bzero(h, sizeof(*h) );
-}
+	struct dn_sch_inst *o = obj;
+	struct ipfw_flow_id *id2;
 
-/*
- * --- end of heap management functions ---
- */
-
-/*
- * Return the mbuf tag holding the dummynet state.  As an optimization
- * this is assumed to be the first tag on the list.  If this turns out
- * wrong we'll need to search the list.
- */
-static struct dn_pkt_tag *
-dn_tag_get(struct mbuf *m)
-{
-    struct m_tag *mtag = m_tag_first(m);
-    KASSERT(mtag != NULL &&
-	    mtag->m_tag_cookie == MTAG_ABI_COMPAT &&
-	    mtag->m_tag_id == PACKET_TAG_DUMMYNET,
-	    ("packet on dummynet queue w/o dummynet tag!"));
-    return (struct dn_pkt_tag *)(mtag+1);
+	id2 = (flags & DNHT_KEY_IS_OBJ) ?
+		&((struct dn_sch_inst *)key)->ni.fid :
+		(struct ipfw_flow_id *)key;
+	return flow_id_cmp(&o->ni.fid,  id2) == 0;
 }
 
 /*
- * Scheduler functions:
- *
- * transmit_event() is called when the delay-line needs to enter
- * the scheduler, either because of existing pkts getting ready,
- * or new packets entering the queue. The event handled is the delivery
- * time of the packet.
- *
- * ready_event() does something similar with fixed-rate queues, and the
- * event handled is the finish time of the head pkt.
- *
- * wfq_ready_event() does something similar with WF2Q queues, and the
- * event handled is the start time of the head pkt.
- *
- * In all cases, we make sure that the data structures are consistent
- * before passing pkts out, because this might trigger recursive
- * invocations of the procedures.
+ * create a new instance for the given 'key'
+ * Allocate memory for instance, delay line and scheduler private data.
  */
-static void
-transmit_event(struct dn_pipe *pipe, struct mbuf **head, struct mbuf **tail)
+static void *
+si_new(uintptr_t key, int flags, void *arg)
 {
-	struct mbuf *m;
-	struct dn_pkt_tag *pkt;
+	struct dn_schk *s = arg;
+	struct dn_sch_inst *si;
+	int l = sizeof(*si) + s->fp->si_datalen;
 
-	DUMMYNET_LOCK_ASSERT();
+	si = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO);
+	if (si == NULL)
+		goto error;
+	/* Set length only for the part passed up to userland. */
+	set_oid(&si->ni.oid, DN_SCH_I, sizeof(struct dn_flow));
+	set_oid(&(si->dline.oid), DN_DELAY_LINE,
+		sizeof(struct delay_line));
+	/* mark si and dline as outside the event queue */
+	si->ni.oid.id = si->dline.oid.id = -1;
 
-	while ((m = pipe->head) != NULL) {
-		pkt = dn_tag_get(m);
-		if (!DN_KEY_LEQ(pkt->output_time, V_curr_time))
-			break;
+	si->sched = s;
+	si->dline.si = si;
 
-		pipe->head = m->m_nextpkt;
-		if (*tail != NULL)
-			(*tail)->m_nextpkt = m;
-		else

>>> TRUNCATED FOR MAIL (1000 lines) <<<


More information about the p4-projects mailing list