PERFORCE change 176829 for review
Marko Zec
zec at FreeBSD.org
Mon Apr 12 11:18:40 UTC 2010
http://p4web.freebsd.org/@@176829?ac=10
Change 176829 by zec at zec_tpx32 on 2010/04/12 11:18:05
Remove a crude hack which once upon the time allowed one to
directly connect a bpf tap to an ifnet in a remote vnet, but
which stopped working a year ago due to some strange locking
issues related to jail-vnet integration.
This was never intended to be commited to head, and a patch
has been commited to IMUNES which alows us to run wireshark
in a remote vnet without this crap, so there's no point in
keeping this dead code here any longer.
Moreover, back out now obsolete V_irtualization patches
for dummynet that anchie@ originally started developing back
in September, but which were never fully completed, and which
now collide with the new version of dummynet in the main tree.
Affected files ...
.. //depot/projects/vimage/src/sys/net/bpf.c#45 edit
.. //depot/projects/vimage/src/sys/netinet/ipfw/ip_dummynet.c#6 edit
Differences ...
==== //depot/projects/vimage/src/sys/net/bpf.c#45 (text+ko) ====
@@ -35,7 +35,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/net/bpf.c,v 1.219 2010/02/20 00:19:21 jkim Exp $");
+__FBSDID("$FreeBSD: head/sys/net/bpf.c 205858 2010-03-29 20:24:03Z jkim $");
#include "opt_bpf.h"
#include "opt_netgraph.h"
@@ -44,7 +44,6 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/conf.h>
-#include <sys/ctype.h>
#include <sys/fcntl.h>
#include <sys/jail.h>
#include <sys/malloc.h>
@@ -615,6 +614,7 @@
mac_bpfdesc_destroy(d);
#endif /* MAC */
knlist_destroy(&d->bd_sel.si_note);
+ callout_drain(&d->bd_callout);
bpf_freed(d);
free(d, M_BPF);
}
@@ -652,7 +652,7 @@
mac_bpfdesc_create(td->td_ucred, d);
#endif
mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
- callout_init(&d->bd_callout, CALLOUT_MPSAFE);
+ callout_init_mtx(&d->bd_callout, &d->bd_mtx, 0);
knlist_init_mtx(&d->bd_sel.si_note, &d->bd_mtx);
return (0);
@@ -808,13 +808,15 @@
{
struct bpf_d *d = (struct bpf_d *)arg;
- BPFD_LOCK(d);
+ BPFD_LOCK_ASSERT(d);
+
+ if (callout_pending(&d->bd_callout) || !callout_active(&d->bd_callout))
+ return;
if (d->bd_state == BPF_WAITING) {
d->bd_state = BPF_TIMED_OUT;
if (d->bd_slen != 0)
bpf_wakeup(d);
}
- BPFD_UNLOCK(d);
}
static int
@@ -1448,34 +1450,9 @@
struct bpf_if *bp;
struct ifnet *theywant;
-#define XVNET_BPF_SNOOPING
-#if defined(VIMAGE) && defined(XVNET_BPF_SNOOPING)
- struct vnet *target_vnet = curvnet;
- char *c;
-
- /* Attempt to attach to an ifnet in a foreign vnet, specified as @ */
- c = rindex(ifr->ifr_name, '@');
- if ( c != NULL ) {
- struct prison *target_pr;
-
- *c++ = 0;
- if (!isascii(*c) && !isdigit(*c))
- return ENXIO;
- target_pr = prison_find_name(curthread->td_ucred->cr_prison, c);
- if (target_pr == NULL)
- return ENXIO;
- target_vnet = target_pr->pr_vnet;
- }
- CURVNET_SET_QUIET(target_vnet);
-#endif
-
theywant = ifunit(ifr->ifr_name);
- if (theywant == NULL || theywant->if_bpf == NULL) {
-#if defined(VIMAGE) && defined(XVNET_BPF_SNOOPING)
- CURVNET_RESTORE();
-#endif
+ if (theywant == NULL || theywant->if_bpf == NULL)
return (ENXIO);
- }
bp = theywant->if_bpf;
@@ -1515,9 +1492,6 @@
BPFD_LOCK(d);
reset_d(d);
BPFD_UNLOCK(d);
-#if defined(VIMAGE) && defined(XVNET_BPF_SNOOPING)
- CURVNET_RESTORE();
-#endif
return (0);
}
@@ -1606,8 +1580,7 @@
kn->kn_data = d->bd_slen;
if (d->bd_hbuf)
kn->kn_data += d->bd_hlen;
- }
- else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
+ } else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
callout_reset(&d->bd_callout, d->bd_rtout,
bpf_timed_out, d);
d->bd_state = BPF_WAITING;
@@ -1894,13 +1867,14 @@
* free.
*/
bpf_free(d);
- if (d->bd_rfilter) {
+ if (d->bd_rfilter != NULL) {
free((caddr_t)d->bd_rfilter, M_BPF);
#ifdef BPF_JITTER
- bpf_destroy_jit_filter(d->bd_bfilter);
+ if (d->bd_bfilter != NULL)
+ bpf_destroy_jit_filter(d->bd_bfilter);
#endif
}
- if (d->bd_wfilter)
+ if (d->bd_wfilter != NULL)
free((caddr_t)d->bd_wfilter, M_BPF);
mtx_destroy(&d->bd_mtx);
}
==== //depot/projects/vimage/src/sys/netinet/ipfw/ip_dummynet.c#6 (text+ko) ====
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 1998-2002 Luigi Rizzo, Universita` di Pisa
+ * Copyright (c) 1998-2002,2010 Luigi Rizzo, Universita` di Pisa
* Portions Copyright (c) 2000 Akamba Corp.
* All rights reserved
*
@@ -26,36 +26,14 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/netinet/ipfw/ip_dummynet.c,v 1.5 2009/06/24 22:57:07 oleg Exp $");
+__FBSDID("$FreeBSD: head/sys/netinet/ipfw/ip_dummynet.c 206428 2010-04-09 18:02:19Z luigi $");
-#define DUMMYNET_DEBUG
+/*
+ * Configuration and internal object management for dummynet.
+ */
#include "opt_inet6.h"
-/*
- * This module implements IP dummynet, a bandwidth limiter/delay emulator
- * used in conjunction with the ipfw package.
- * Description of the data structures used is in ip_dummynet.h
- * Here you mainly find the following blocks of code:
- * + variable declarations;
- * + heap management functions;
- * + scheduler and dummynet functions;
- * + configuration and initialization.
- *
- * NOTA BENE: critical sections are protected by the "dummynet lock".
- *
- * Most important Changes:
- *
- * 011004: KLDable
- * 010124: Fixed WF2Q behaviour
- * 010122: Fixed spl protection.
- * 000601: WF2Q support
- * 000106: large rewrite, use heaps to handle very many pipes.
- * 980513: initial release
- *
- * include files marked with XXX are probably not needed
- */
-
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
@@ -69,2258 +47,2147 @@
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/time.h>
-#include <sys/sysctl.h>
#include <sys/taskqueue.h>
#include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
-#include <net/netisr.h>
-#include <net/vnet.h>
#include <netinet/in.h>
-#include <netinet/ip.h> /* ip_len, ip_off */
+#include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */
#include <netinet/ip_fw.h>
+#include <netinet/ipfw/ip_fw_private.h>
+#include <netinet/ipfw/dn_heap.h>
#include <netinet/ip_dummynet.h>
-#include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */
+#include <netinet/ipfw/ip_dn_private.h>
+#include <netinet/ipfw/dn_sched.h>
-#include <netinet/if_ether.h> /* various ether_* routines */
+/* which objects to copy */
+#define DN_C_LINK 0x01
+#define DN_C_SCH 0x02
+#define DN_C_FLOW 0x04
+#define DN_C_FS 0x08
+#define DN_C_QUEUE 0x10
-#include <netinet/ip6.h> /* for ip6_input, ip6_output prototypes */
-#include <netinet6/ip6_var.h>
+/* we use this argument in case of a schk_new */
+struct schk_new_arg {
+ struct dn_alg *fp;
+ struct dn_sch *sch;
+};
-/*
- * We keep a private variable for the simulation time, but we could
- * probably use an existing one ("softticks" in sys/kern/kern_timeout.c)
- */
-static VNET_DEFINE(dn_key, curr_time) = 0 ; /* current simulation time */
-#define V_curr_time VNET(curr_time)
+/*---- callout hooks. ----*/
+static struct callout dn_timeout;
+static struct task dn_task;
+static struct taskqueue *dn_tq = NULL;
-static VNET_DEFINE(int, dn_hash_size) = 64 ; /* default hash size */
-#define V_dn_hash_size VNET(dn_hash_size)
+static void
+dummynet(void * __unused unused)
+{
-/* statistics on number of queue searches and search steps */
-static VNET_DEFINE(long, searches);
-static VNET_DEFINE(long, search_steps);
-static VNET_DEFINE(int, pipe_expire) = 1 ; /* expire queue if empty */
-static VNET_DEFINE(int, dn_max_ratio) = 16 ; /* max queues/buckets ratio */
-#define V_searches VNET(searches)
-#define V_search_steps VNET(search_steps)
-#define V_pipe_expire VNET(pipe_expire)
-#define V_dn_max_ratio VNET(dn_max_ratio)
+ taskqueue_enqueue(dn_tq, &dn_task);
+}
-static VNET_DEFINE(long, pipe_slot_limit) = 100; /* Foot shooting limit for pipe queues. */
-static VNET_DEFINE(long, pipe_byte_limit) = 1024 * 1024;
-#define V_pipe_slot_limit VNET(pipe_slot_limit)
-#define V_pipe_byte_limit VNET(pipe_byte_limit)
+void
+dn_reschedule(void)
+{
+ callout_reset(&dn_timeout, 1, dummynet, NULL);
+}
+/*----- end of callout hooks -----*/
-static VNET_DEFINE(int, red_lookup_depth) = 256; /* RED - default lookup table depth */
-static VNET_DEFINE(int, red_avg_pkt_size) = 512; /* RED - default medium packet size */
-static VNET_DEFINE(int, red_max_pkt_size) = 1500; /* RED - default max packet size */
-#define V_red_lookup_depth VNET(red_lookup_depth)
-#define V_red_avg_pkt_size VNET(red_avg_pkt_size)
-#define V_red_max_pkt_size VNET(red_max_pkt_size)
+/* Return a scheduler descriptor given the type or name. */
+static struct dn_alg *
+find_sched_type(int type, char *name)
+{
+ struct dn_alg *d;
-static VNET_DEFINE(struct timeval, prev_t);
-static VNET_DEFINE(struct timeval, t);
-static VNET_DEFINE(long, tick_last); /* Last tick duration (usec). */
-static VNET_DEFINE(long, tick_delta); /* Last vs standard tick diff (usec). */
-static VNET_DEFINE(long, tick_delta_sum); /* Accumulated tick difference (usec).*/
-static VNET_DEFINE(long, tick_adjustment); /* Tick adjustments done. */
-static VNET_DEFINE(long, tick_lost); /* Lost(coalesced) ticks number. */
-/* Adjusted vs non-adjusted curr_time difference (ticks). */
-static VNET_DEFINE(long, tick_diff);
-#define V_prev_t VNET(prev_t)
-#define V_t VNET(t)
-#define V_tick_last VNET(tick_last)
-#define V_tick_delta VNET(tick_delta)
-#define V_tick_delta_sum VNET(tick_delta_sum)
-#define V_tick_adjustment VNET(tick_adjustment)
-#define V_tick_lost VNET(tick_lost)
-#define V_tick_diff VNET(tick_diff)
+ SLIST_FOREACH(d, &dn_cfg.schedlist, next) {
+ if (d->type == type || (name && !strcmp(d->name, name)))
+ return d;
+ }
+ return NULL; /* not found */
+}
-static VNET_DEFINE(int, io_fast);
-static VNET_DEFINE(unsigned long, io_pkt);
-static VNET_DEFINE(unsigned long, io_pkt_fast);
-static VNET_DEFINE(long, io_pkt_drop);
-#define V_io_fast VNET(io_fast)
-#define V_io_pkt VNET(io_pkt)
-#define V_io_pkt_fast VNET(io_pkt_fast)
-#define V_io_pkt_drop VNET(io_pkt_drop)
+int
+ipdn_bound_var(int *v, int dflt, int lo, int hi, const char *msg)
+{
+ int oldv = *v;
+ const char *op = NULL;
+ if (oldv < lo) {
+ *v = dflt;
+ op = "Bump";
+ } else if (oldv > hi) {
+ *v = hi;
+ op = "Clamp";
+ } else
+ return *v;
+ if (op && msg)
+ printf("%s %s to %d (was %d)\n", op, msg, *v, oldv);
+ return *v;
+}
+/*---- flow_id mask, hash and compare functions ---*/
/*
- * Three heaps contain queues and pipes that the scheduler handles:
- *
- * ready_heap contains all dn_flow_queue related to fixed-rate pipes.
- *
- * wfq_ready_heap contains the pipes associated with WF2Q flows
- *
- * extract_heap contains pipes associated with delay lines.
- *
+ * The flow_id includes the 5-tuple, the queue/pipe number
+ * which we store in the extra area in host order,
+ * and for ipv6 also the flow_id6.
+ * XXX see if we want the tos byte (can store in 'flags')
*/
+static struct ipfw_flow_id *
+flow_id_mask(struct ipfw_flow_id *mask, struct ipfw_flow_id *id)
+{
+ int is_v6 = IS_IP6_FLOW_ID(id);
-MALLOC_DEFINE(M_DUMMYNET, "dummynet", "dummynet heap");
+ id->dst_port &= mask->dst_port;
+ id->src_port &= mask->src_port;
+ id->proto &= mask->proto;
+ id->extra &= mask->extra;
+ if (is_v6) {
+ APPLY_MASK(&id->dst_ip6, &mask->dst_ip6);
+ APPLY_MASK(&id->src_ip6, &mask->src_ip6);
+ id->flow_id6 &= mask->flow_id6;
+ } else {
+ id->dst_ip &= mask->dst_ip;
+ id->src_ip &= mask->src_ip;
+ }
+ return id;
+}
-static VNET_DEFINE(struct dn_heap, ready_heap);
-static VNET_DEFINE(struct dn_heap, extract_heap);
-static VNET_DEFINE(struct dn_heap, wfq_ready_heap);
+/* computes an OR of two masks, result in dst and also returned */
+static struct ipfw_flow_id *
+flow_id_or(struct ipfw_flow_id *src, struct ipfw_flow_id *dst)
+{
+ int is_v6 = IS_IP6_FLOW_ID(dst);
-static int heap_init(struct dn_heap *h, int size);
-static int heap_insert (struct dn_heap *h, dn_key key1, void *p);
-static void heap_extract(struct dn_heap *h, void *obj);
-static void transmit_event(struct dn_pipe *pipe, struct mbuf **head,
- struct mbuf **tail);
-static void ready_event(struct dn_flow_queue *q, struct mbuf **head,
- struct mbuf **tail);
-static void ready_event_wfq(struct dn_pipe *p, struct mbuf **head,
- struct mbuf **tail);
+ dst->dst_port |= src->dst_port;
+ dst->src_port |= src->src_port;
+ dst->proto |= src->proto;
+ dst->extra |= src->extra;
+ if (is_v6) {
+#define OR_MASK(_d, _s) \
+ (_d)->__u6_addr.__u6_addr32[0] |= (_s)->__u6_addr.__u6_addr32[0]; \
+ (_d)->__u6_addr.__u6_addr32[1] |= (_s)->__u6_addr.__u6_addr32[1]; \
+ (_d)->__u6_addr.__u6_addr32[2] |= (_s)->__u6_addr.__u6_addr32[2]; \
+ (_d)->__u6_addr.__u6_addr32[3] |= (_s)->__u6_addr.__u6_addr32[3];
+ OR_MASK(&dst->dst_ip6, &src->dst_ip6);
+ OR_MASK(&dst->src_ip6, &src->src_ip6);
+#undef OR_MASK
+ dst->flow_id6 |= src->flow_id6;
+ } else {
+ dst->dst_ip |= src->dst_ip;
+ dst->src_ip |= src->src_ip;
+ }
+ return dst;
+}
-#define HASHSIZE 16
-#define HASH(num) ((((num) >> 8) ^ ((num) >> 4) ^ (num)) & 0x0f)
-static VNET_DEFINE(struct dn_pipe_head, pipehash[HASHSIZE]); /* all pipes */
-static VNET_DEFINE(struct dn_flow_set_head, flowsethash[HASHSIZE]); /* all flowsets */
+static int
+nonzero_mask(struct ipfw_flow_id *m)
+{
+ if (m->dst_port || m->src_port || m->proto || m->extra)
+ return 1;
+ if (IS_IP6_FLOW_ID(m)) {
+ return
+ m->dst_ip6.__u6_addr.__u6_addr32[0] ||
+ m->dst_ip6.__u6_addr.__u6_addr32[1] ||
+ m->dst_ip6.__u6_addr.__u6_addr32[2] ||
+ m->dst_ip6.__u6_addr.__u6_addr32[3] ||
+ m->src_ip6.__u6_addr.__u6_addr32[0] ||
+ m->src_ip6.__u6_addr.__u6_addr32[1] ||
+ m->src_ip6.__u6_addr.__u6_addr32[2] ||
+ m->src_ip6.__u6_addr.__u6_addr32[3] ||
+ m->flow_id6;
+ } else {
+ return m->dst_ip || m->src_ip;
+ }
+}
-static VNET_DEFINE(struct callout, dn_timeout);
+/* XXX we may want a better hash function */
+static uint32_t
+flow_id_hash(struct ipfw_flow_id *id)
+{
+ uint32_t i;
-extern void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
+ if (IS_IP6_FLOW_ID(id)) {
+ uint32_t *d = (uint32_t *)&id->dst_ip6;
+ uint32_t *s = (uint32_t *)&id->src_ip6;
+ i = (d[0] ) ^ (d[1]) ^
+ (d[2] ) ^ (d[3]) ^
+ (d[0] >> 15) ^ (d[1] >> 15) ^
+ (d[2] >> 15) ^ (d[3] >> 15) ^
+ (s[0] << 1) ^ (s[1] << 1) ^
+ (s[2] << 1) ^ (s[3] << 1) ^
+ (s[0] << 16) ^ (s[1] << 16) ^
+ (s[2] << 16) ^ (s[3] << 16) ^
+ (id->dst_port << 1) ^ (id->src_port) ^
+ (id->extra) ^
+ (id->proto ) ^ (id->flow_id6);
+ } else {
+ i = (id->dst_ip) ^ (id->dst_ip >> 15) ^
+ (id->src_ip << 1) ^ (id->src_ip >> 16) ^
+ (id->extra) ^
+ (id->dst_port << 1) ^ (id->src_port) ^ (id->proto);
+ }
+ return i;
+}
-#ifdef SYSCTL_NODE
-SYSCTL_DECL(_net_inet);
-SYSCTL_DECL(_net_inet_ip);
+/* Like bcmp, returns 0 if ids match, 1 otherwise. */
+static int
+flow_id_cmp(struct ipfw_flow_id *id1, struct ipfw_flow_id *id2)
+{
+ int is_v6 = IS_IP6_FLOW_ID(id1);
-SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet");
-SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size,
- CTLFLAG_RW, &VNET_NAME(dn_hash_size), 0, "Default hash table size");
-#if 0 /* curr_time is 64 bit */
-SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, curr_time,
- CTLFLAG_RD, &VNET_NAME(curr_time), 0, "Current tick");
-#endif
-SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, ready_heap,
- CTLFLAG_RD, &VNET_NAME(ready_heap).size, 0, "Size of ready heap");
-SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, extract_heap,
- CTLFLAG_RD, &VNET_NAME(extract_heap).size, 0, "Size of extract heap");
-SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, searches,
- CTLFLAG_RD, &VNET_NAME(searches), 0, "Number of queue searches");
-SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, search_steps,
- CTLFLAG_RD, &VNET_NAME(search_steps), 0, "Number of queue search steps");
-SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, expire,
- CTLFLAG_RW, &VNET_NAME(pipe_expire), 0, "Expire queue if empty");
-SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, max_chain_len,
- CTLFLAG_RW, &VNET_NAME(dn_max_ratio), 0,
- "Max ratio between dynamic queues and buckets");
-SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth,
- CTLFLAG_RD, &VNET_NAME(red_lookup_depth), 0, "Depth of RED lookup table");
-SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size,
- CTLFLAG_RD, &VNET_NAME(red_avg_pkt_size), 0, "RED Medium packet size");
-SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size,
- CTLFLAG_RD, &VNET_NAME(red_max_pkt_size), 0, "RED Max packet size");
-SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta,
- CTLFLAG_RD, &VNET_NAME(tick_delta), 0, "Last vs standard tick difference (usec).");
-SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta_sum,
- CTLFLAG_RD, &VNET_NAME(tick_delta_sum), 0, "Accumulated tick difference (usec).");
-SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_adjustment,
- CTLFLAG_RD, &VNET_NAME(tick_adjustment), 0, "Tick adjustments done.");
-SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_diff,
- CTLFLAG_RD, &VNET_NAME(tick_diff), 0,
- "Adjusted vs non-adjusted curr_time difference (ticks).");
-SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_lost,
- CTLFLAG_RD, &VNET_NAME(tick_lost), 0,
- "Number of ticks coalesced by dummynet taskqueue.");
-SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, io_fast,
- CTLFLAG_RW, &VNET_NAME(io_fast), 0, "Enable fast dummynet io.");
-SYSCTL_VNET_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt,
- CTLFLAG_RD, &VNET_NAME(io_pkt), 0,
- "Number of packets passed to dummynet.");
-SYSCTL_VNET_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_fast,
- CTLFLAG_RD, &VNET_NAME(io_pkt_fast), 0,
- "Number of packets bypassed dummynet scheduler.");
-SYSCTL_VNET_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_drop,
- CTLFLAG_RD, &VNET_NAME(io_pkt_drop), 0,
- "Number of packets dropped by dummynet.");
-SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, pipe_slot_limit,
- CTLFLAG_RW, &VNET_NAME(pipe_slot_limit), 0, "Upper limit in slots for pipe queue.");
-SYSCTL_VNET_LONG(_net_inet_ip_dummynet, OID_AUTO, pipe_byte_limit,
- CTLFLAG_RW, &VNET_NAME(pipe_byte_limit), 0, "Upper limit in bytes for pipe queue.");
-#endif
+ if (!is_v6) {
+ if (IS_IP6_FLOW_ID(id2))
+ return 1; /* different address families */
-#ifdef DUMMYNET_DEBUG
-VNET_DEFINE(int, dummynet_debug) = 0;
-#ifdef SYSCTL_NODE
-SYSCTL_VNET_INT(_net_inet_ip_dummynet, OID_AUTO, debug, CTLFLAG_RW, &VNET_NAME(dummynet_debug),
- 0, "control debugging printfs");
-#endif
-#define DPRINTF(X) if (V_dummynet_debug) printf X
-#else
-#define DPRINTF(X)
-#endif
+ return (id1->dst_ip == id2->dst_ip &&
+ id1->src_ip == id2->src_ip &&
+ id1->dst_port == id2->dst_port &&
+ id1->src_port == id2->src_port &&
+ id1->proto == id2->proto &&
+ id1->extra == id2->extra) ? 0 : 1;
+ }
+ /* the ipv6 case */
+ return (
+ !bcmp(&id1->dst_ip6,&id2->dst_ip6, sizeof(id1->dst_ip6)) &&
+ !bcmp(&id1->src_ip6,&id2->src_ip6, sizeof(id1->src_ip6)) &&
+ id1->dst_port == id2->dst_port &&
+ id1->src_port == id2->src_port &&
+ id1->proto == id2->proto &&
+ id1->extra == id2->extra &&
+ id1->flow_id6 == id2->flow_id6) ? 0 : 1;
+}
+/*--------- end of flow-id mask, hash and compare ---------*/
-static VNET_DEFINE(struct task, dn_task);
-static VNET_DEFINE(struct taskqueue *, dn_tq) = NULL;
-static void dummynet_task(void *, int);
+/*--- support functions for the qht hashtable ----
+ * Entries are hashed by flow-id
+ */
+static uint32_t
+q_hash(uintptr_t key, int flags, void *arg)
+{
+ /* compute the hash slot from the flow id */
+ struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ?
+ &((struct dn_queue *)key)->ni.fid :
+ (struct ipfw_flow_id *)key;
-static struct mtx dummynet_mtx;
-#define DUMMYNET_LOCK_INIT() \
- mtx_init(&dummynet_mtx, "dummynet", NULL, MTX_DEF)
-#define DUMMYNET_LOCK_DESTROY() mtx_destroy(&dummynet_mtx)
-#define DUMMYNET_LOCK() mtx_lock(&dummynet_mtx)
-#define DUMMYNET_UNLOCK() mtx_unlock(&dummynet_mtx)
-#define DUMMYNET_LOCK_ASSERT() mtx_assert(&dummynet_mtx, MA_OWNED)
+ return flow_id_hash(id);
+}
-static int config_pipe(struct dn_pipe *p);
-static int ip_dn_ctl(struct sockopt *sopt);
+static int
+q_match(void *obj, uintptr_t key, int flags, void *arg)
+{
+ struct dn_queue *o = (struct dn_queue *)obj;
+ struct ipfw_flow_id *id2;
-static void dummynet(void *);
-static void dummynet_flush(void);
-static void dummynet_send(struct mbuf *);
-void dummynet_drain(void);
-static int dummynet_io(struct mbuf **, int , struct ip_fw_args *);
+ if (flags & DNHT_KEY_IS_OBJ) {
+ /* compare pointers */
+ id2 = &((struct dn_queue *)key)->ni.fid;
+ } else {
+ id2 = (struct ipfw_flow_id *)key;
+ }
+ return (0 == flow_id_cmp(&o->ni.fid, id2));
+}
/*
- * Heap management functions.
- *
- * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2.
- * Some macros help finding parent/children so we can optimize them.
- *
- * heap_init() is called to expand the heap when needed.
- * Increment size in blocks of 16 entries.
- * XXX failure to allocate a new element is a pretty bad failure
- * as we basically stall a whole queue forever!!
- * Returns 1 on error, 0 on success
+ * create a new queue instance for the given 'key'.
*/
-#define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 )
-#define HEAP_LEFT(x) ( 2*(x) + 1 )
-#define HEAP_IS_LEFT(x) ( (x) & 1 )
-#define HEAP_RIGHT(x) ( 2*(x) + 2 )
-#define HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; }
-#define HEAP_INCREMENT 15
+static void *
+q_new(uintptr_t key, int flags, void *arg)
+{
+ struct dn_queue *q, *template = arg;
+ struct dn_fsk *fs = template->fs;
+ int size = sizeof(*q) + fs->sched->fp->q_datalen;
+
+ q = malloc(size, M_DUMMYNET, M_NOWAIT | M_ZERO);
+ if (q == NULL) {
+ D("no memory for new queue");
+ return NULL;
+ }
-static int
-heap_init(struct dn_heap *h, int new_size)
-{
- struct dn_heap_entry *p;
+ set_oid(&q->ni.oid, DN_QUEUE, size);
+ if (fs->fs.flags & DN_QHT_HASH)
+ q->ni.fid = *(struct ipfw_flow_id *)key;
+ q->fs = fs;
+ q->_si = template->_si;
+ q->_si->q_count++;
- if (h->size >= new_size ) {
- printf("dummynet: %s, Bogus call, have %d want %d\n", __func__,
- h->size, new_size);
- return 0 ;
- }
- new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT ;
- p = malloc(new_size * sizeof(*p), M_DUMMYNET, M_NOWAIT);
- if (p == NULL) {
- printf("dummynet: %s, resize %d failed\n", __func__, new_size );
- return 1 ; /* error */
- }
- if (h->size > 0) {
- bcopy(h->p, p, h->size * sizeof(*p) );
- free(h->p, M_DUMMYNET);
- }
- h->p = p ;
- h->size = new_size ;
- return 0 ;
+ if (fs->sched->fp->new_queue)
+ fs->sched->fp->new_queue(q);
+ dn_cfg.queue_count++;
+ return q;
}
/*
- * Insert element in heap. Normally, p != NULL, we insert p in
- * a new position and bubble up. If p == NULL, then the element is
- * already in place, and key is the position where to start the
- * bubble-up.
- * Returns 1 on failure (cannot allocate new heap entry)
- *
- * If offset > 0 the position (index, int) of the element in the heap is
- * also stored in the element itself at the given offset in bytes.
+ * Notify schedulers that a queue is going away.
+ * If (flags & DN_DESTROY), also free the packets.
+ * The version for callbacks is called q_delete_cb().
*/
-#define SET_OFFSET(heap, node) \
- if (heap->offset > 0) \
- *((int *)((char *)(heap->p[node].object) + heap->offset)) = node ;
-/*
- * RESET_OFFSET is used for sanity checks. It sets offset to an invalid value.
- */
-#define RESET_OFFSET(heap, node) \
- if (heap->offset > 0) \
- *((int *)((char *)(heap->p[node].object) + heap->offset)) = -1 ;
-static int
-heap_insert(struct dn_heap *h, dn_key key1, void *p)
+static void
+dn_delete_queue(struct dn_queue *q, int flags)
{
- int son = h->elements ;
+ struct dn_fsk *fs = q->fs;
- if (p == NULL) /* data already there, set starting point */
- son = key1 ;
- else { /* insert new element at the end, possibly resize */
- son = h->elements ;
- if (son == h->size) /* need resize... */
- if (heap_init(h, h->elements+1) )
- return 1 ; /* failure... */
- h->p[son].object = p ;
- h->p[son].key = key1 ;
- h->elements++ ;
- }
- while (son > 0) { /* bubble up */
- int father = HEAP_FATHER(son) ;
- struct dn_heap_entry tmp ;
+ // D("fs %p si %p\n", fs, q->_si);
+ /* notify the parent scheduler that the queue is going away */
+ if (fs && fs->sched->fp->free_queue)
+ fs->sched->fp->free_queue(q);
+ q->_si->q_count--;
+ q->_si = NULL;
+ if (flags & DN_DESTROY) {
+ if (q->mq.head)
+ dn_free_pkts(q->mq.head);
+ bzero(q, sizeof(*q)); // safety
+ free(q, M_DUMMYNET);
+ dn_cfg.queue_count--;
+ }
+}
- if (DN_KEY_LT( h->p[father].key, h->p[son].key ) )
- break ; /* found right position */
- /* son smaller than father, swap and repeat */
- HEAP_SWAP(h->p[son], h->p[father], tmp) ;
- SET_OFFSET(h, son);
- son = father ;
- }
- SET_OFFSET(h, son);
- return 0 ;
+static int
+q_delete_cb(void *q, void *arg)
+{
+ int flags = (int)(uintptr_t)arg;
+ dn_delete_queue(q, flags);
+ return (flags & DN_DESTROY) ? DNHT_SCAN_DEL : 0;
}
/*
- * remove top element from heap, or obj if obj != NULL
+ * calls dn_delete_queue/q_delete_cb on all queues,
+ * which notifies the parent scheduler and possibly drains packets.
+ * flags & DN_DESTROY: drains queues and destroy qht;
*/
static void
-heap_extract(struct dn_heap *h, void *obj)
+qht_delete(struct dn_fsk *fs, int flags)
{
- int child, father, max = h->elements - 1 ;
-
- if (max < 0) {
- printf("dummynet: warning, extract from empty heap 0x%p\n", h);
- return ;
- }
- father = 0 ; /* default: move up smallest child */
- if (obj != NULL) { /* extract specific element, index is at offset */
- if (h->offset <= 0)
- panic("dummynet: heap_extract from middle not supported on this heap!!!\n");
- father = *((int *)((char *)obj + h->offset)) ;
- if (father < 0 || father >= h->elements) {
- printf("dummynet: heap_extract, father %d out of bound 0..%d\n",
- father, h->elements);
- panic("dummynet: heap_extract");
+ ND("fs %d start flags %d qht %p",
+ fs->fs.fs_nr, flags, fs->qht);
+ if (!fs->qht)
+ return;
+ if (fs->fs.flags & DN_QHT_HASH) {
+ dn_ht_scan(fs->qht, q_delete_cb, (void *)(uintptr_t)flags);
+ if (flags & DN_DESTROY) {
+ dn_ht_free(fs->qht, 0);
+ fs->qht = NULL;
+ }
+ } else {
+ dn_delete_queue((struct dn_queue *)(fs->qht), flags);
+ if (flags & DN_DESTROY)
+ fs->qht = NULL;
}
- }
- RESET_OFFSET(h, father);
- child = HEAP_LEFT(father) ; /* left child */
- while (child <= max) { /* valid entry */
- if (child != max && DN_KEY_LT(h->p[child+1].key, h->p[child].key) )
- child = child+1 ; /* take right child, otherwise left */
- h->p[father] = h->p[child] ;
- SET_OFFSET(h, father);
- father = child ;
- child = HEAP_LEFT(child) ; /* left child for next loop */
- }
- h->elements-- ;
- if (father != max) {
- /*
- * Fill hole with last entry and bubble up, reusing the insert code
- */
- h->p[father] = h->p[max] ;
- heap_insert(h, father, NULL); /* this one cannot fail */
- }
}
-#if 0
/*
- * change object position and update references
- * XXX this one is never used!
+ * Find and possibly create the queue for a MULTIQUEUE scheduler.
+ * We never call it for !MULTIQUEUE (the queue is in the sch_inst).
*/
-static void
-heap_move(struct dn_heap *h, dn_key new_key, void *object)
+struct dn_queue *
+ipdn_q_find(struct dn_fsk *fs, struct dn_sch_inst *si,
+ struct ipfw_flow_id *id)
{
- int temp;
- int i ;
- int max = h->elements-1 ;
- struct dn_heap_entry buf ;
+ struct dn_queue template;
- if (h->offset <= 0)
- panic("cannot move items on this heap");
+ template._si = si;
+ template.fs = fs;
- i = *((int *)((char *)object + h->offset));
- if (DN_KEY_LT(new_key, h->p[i].key) ) { /* must move up */
- h->p[i].key = new_key ;
- for (; i>0 && DN_KEY_LT(new_key, h->p[(temp = HEAP_FATHER(i))].key) ;
- i = temp ) { /* bubble up */
- HEAP_SWAP(h->p[i], h->p[temp], buf) ;
- SET_OFFSET(h, i);
- }
- } else { /* must move down */
- h->p[i].key = new_key ;
- while ( (temp = HEAP_LEFT(i)) <= max ) { /* found left child */
- if ((temp != max) && DN_KEY_GT(h->p[temp].key, h->p[temp+1].key))
- temp++ ; /* select child with min key */
- if (DN_KEY_GT(new_key, h->p[temp].key)) { /* go down */
- HEAP_SWAP(h->p[i], h->p[temp], buf) ;
- SET_OFFSET(h, i);
- } else
- break ;
- i = temp ;
+ if (fs->fs.flags & DN_QHT_HASH) {
+ struct ipfw_flow_id masked_id;
+ if (fs->qht == NULL) {
+ fs->qht = dn_ht_init(NULL, fs->fs.buckets,
+ offsetof(struct dn_queue, q_next),
+ q_hash, q_match, q_new);
+ if (fs->qht == NULL)
+ return NULL;
+ }
+ masked_id = *id;
+ flow_id_mask(&fs->fsk_mask, &masked_id);
+ return dn_ht_find(fs->qht, (uintptr_t)&masked_id,
+ DNHT_INSERT, &template);
+ } else {
+ if (fs->qht == NULL)
+ fs->qht = q_new(0, 0, &template);
+ return (struct dn_queue *)fs->qht;
}
- }
- SET_OFFSET(h, i);
}
-#endif /* heap_move, unused */
+/*--- end of queue hash table ---*/
-/*
- * heapify() will reorganize data inside an array to maintain the
- * heap property. It is needed when we delete a bunch of entries.
+/*--- support functions for the sch_inst hashtable ----
+ *
+ * These are hashed by flow-id
*/
-static void
-heapify(struct dn_heap *h)
+static uint32_t
+si_hash(uintptr_t key, int flags, void *arg)
{
- int i ;
+ /* compute the hash slot from the flow id */
+ struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ?
+ &((struct dn_sch_inst *)key)->ni.fid :
+ (struct ipfw_flow_id *)key;
- for (i = 0 ; i < h->elements ; i++ )
- heap_insert(h, i , NULL) ;
+ return flow_id_hash(id);
}
-/*
- * cleanup the heap and free data structure
- */
-static void
-heap_free(struct dn_heap *h)
+static int
+si_match(void *obj, uintptr_t key, int flags, void *arg)
{
- if (h->size >0 )
- free(h->p, M_DUMMYNET);
- bzero(h, sizeof(*h) );
-}
+ struct dn_sch_inst *o = obj;
+ struct ipfw_flow_id *id2;
-/*
- * --- end of heap management functions ---
- */
-
-/*
- * Return the mbuf tag holding the dummynet state. As an optimization
- * this is assumed to be the first tag on the list. If this turns out
- * wrong we'll need to search the list.
- */
-static struct dn_pkt_tag *
-dn_tag_get(struct mbuf *m)
-{
- struct m_tag *mtag = m_tag_first(m);
- KASSERT(mtag != NULL &&
- mtag->m_tag_cookie == MTAG_ABI_COMPAT &&
- mtag->m_tag_id == PACKET_TAG_DUMMYNET,
- ("packet on dummynet queue w/o dummynet tag!"));
- return (struct dn_pkt_tag *)(mtag+1);
+ id2 = (flags & DNHT_KEY_IS_OBJ) ?
+ &((struct dn_sch_inst *)key)->ni.fid :
+ (struct ipfw_flow_id *)key;
+ return flow_id_cmp(&o->ni.fid, id2) == 0;
}
/*
- * Scheduler functions:
- *
- * transmit_event() is called when the delay-line needs to enter
- * the scheduler, either because of existing pkts getting ready,
- * or new packets entering the queue. The event handled is the delivery
- * time of the packet.
- *
- * ready_event() does something similar with fixed-rate queues, and the
- * event handled is the finish time of the head pkt.
- *
- * wfq_ready_event() does something similar with WF2Q queues, and the
- * event handled is the start time of the head pkt.
- *
- * In all cases, we make sure that the data structures are consistent
- * before passing pkts out, because this might trigger recursive
- * invocations of the procedures.
+ * create a new instance for the given 'key'
+ * Allocate memory for instance, delay line and scheduler private data.
*/
-static void
-transmit_event(struct dn_pipe *pipe, struct mbuf **head, struct mbuf **tail)
+static void *
+si_new(uintptr_t key, int flags, void *arg)
{
- struct mbuf *m;
- struct dn_pkt_tag *pkt;
+ struct dn_schk *s = arg;
+ struct dn_sch_inst *si;
+ int l = sizeof(*si) + s->fp->si_datalen;
- DUMMYNET_LOCK_ASSERT();
+ si = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO);
+ if (si == NULL)
+ goto error;
+ /* Set length only for the part passed up to userland. */
+ set_oid(&si->ni.oid, DN_SCH_I, sizeof(struct dn_flow));
+ set_oid(&(si->dline.oid), DN_DELAY_LINE,
+ sizeof(struct delay_line));
+ /* mark si and dline as outside the event queue */
+ si->ni.oid.id = si->dline.oid.id = -1;
- while ((m = pipe->head) != NULL) {
- pkt = dn_tag_get(m);
- if (!DN_KEY_LEQ(pkt->output_time, V_curr_time))
- break;
+ si->sched = s;
+ si->dline.si = si;
- pipe->head = m->m_nextpkt;
- if (*tail != NULL)
- (*tail)->m_nextpkt = m;
- else
>>> TRUNCATED FOR MAIL (1000 lines) <<<
More information about the p4-projects
mailing list