svn commit: r204591 - in head: sbin/ipfw sys/conf sys/net sys/netinet sys/netinet/ipfw sys/netinet/ipfw/test

Luigi Rizzo luigi at FreeBSD.org
Tue Mar 2 17:40:49 UTC 2010


Author: luigi
Date: Tue Mar  2 17:40:48 2010
New Revision: 204591
URL: http://svn.freebsd.org/changeset/base/204591

Log:
  Bring in the most recent version of ipfw and dummynet, developed
  and tested over the past two months in the ipfw3-head branch.  This
  also happens to be the same code available in the Linux and Windows
  ports of ipfw and dummynet.
  
  The major enhancement is a completely restructured version of
  dummynet, with support for different packet scheduling algorithms
  (loadable at runtime), faster queue/pipe lookup, and a much cleaner
  internal architecture and kernel/userland ABI which simplifies
  future extensions.
  
  In addition to the existing schedulers (FIFO and WF2Q+), we include
  a Deficit Round Robin (DRR or RR for brevity) scheduler, and a new,
  very fast version of WF2Q+ called QFQ.
  
  Some test code is also present (in sys/netinet/ipfw/test) that
  lets you build and test schedulers in userland.
  
  Also, we have added a compatibility layer that understands requests
  from the RELENG_7 and RELENG_8 versions of the /sbin/ipfw binaries,
  and replies correctly (at least, it does its best; sometimes you
  just cannot tell who sent the request and how to answer).
  The compatibility layer should make it possible to MFC this code in a
  relatively short time.
  
  Some minor glitches (e.g. handling of ipfw set enable/disable,
  and a workaround for a bug in RELENG_7's /sbin/ipfw) will be
  fixed with separate commits.
  
  CREDITS:
  This work has been partly supported by the ONELAB2 project, and
  mostly developed by Riccardo Panicucci and myself.
  The code for the qfq scheduler is mostly from Fabio Checconi,
  and Marta Carbone and Francesco Magno have helped with testing,
  debugging and some bug fixes.

Added:
  head/sys/netinet/ipfw/dn_heap.c   (contents, props changed)
  head/sys/netinet/ipfw/dn_heap.h   (contents, props changed)
  head/sys/netinet/ipfw/dn_sched.h   (contents, props changed)
  head/sys/netinet/ipfw/dn_sched_fifo.c   (contents, props changed)
  head/sys/netinet/ipfw/dn_sched_qfq.c   (contents, props changed)
  head/sys/netinet/ipfw/dn_sched_rr.c   (contents, props changed)
  head/sys/netinet/ipfw/dn_sched_wf2q.c   (contents, props changed)
  head/sys/netinet/ipfw/dummynet.txt   (contents, props changed)
  head/sys/netinet/ipfw/ip_dn_glue.c   (contents, props changed)
  head/sys/netinet/ipfw/ip_dn_io.c   (contents, props changed)
  head/sys/netinet/ipfw/ip_dn_private.h   (contents, props changed)
  head/sys/netinet/ipfw/test/
  head/sys/netinet/ipfw/test/Makefile   (contents, props changed)
  head/sys/netinet/ipfw/test/dn_test.h   (contents, props changed)
  head/sys/netinet/ipfw/test/main.c   (contents, props changed)
  head/sys/netinet/ipfw/test/mylist.h   (contents, props changed)
  head/sys/netinet/ipfw/test/test_dn_heap.c   (contents, props changed)
  head/sys/netinet/ipfw/test/test_dn_sched.c   (contents, props changed)
Modified:
  head/sbin/ipfw/Makefile
  head/sbin/ipfw/altq.c
  head/sbin/ipfw/dummynet.c
  head/sbin/ipfw/ipfw.8
  head/sbin/ipfw/ipfw2.c
  head/sbin/ipfw/ipfw2.h
  head/sbin/ipfw/main.c
  head/sys/conf/files
  head/sys/net/if_bridge.c
  head/sys/net/if_ethersubr.c
  head/sys/netinet/ip_dummynet.h
  head/sys/netinet/ip_fw.h
  head/sys/netinet/ipfw/ip_dummynet.c
  head/sys/netinet/ipfw/ip_fw2.c
  head/sys/netinet/ipfw/ip_fw_dynamic.c
  head/sys/netinet/ipfw/ip_fw_log.c
  head/sys/netinet/ipfw/ip_fw_pfil.c
  head/sys/netinet/ipfw/ip_fw_private.h
  head/sys/netinet/ipfw/ip_fw_sockopt.c
  head/sys/netinet/ipfw/ip_fw_table.c

Modified: head/sbin/ipfw/Makefile
==============================================================================
--- head/sbin/ipfw/Makefile	Tue Mar  2 17:34:11 2010	(r204590)
+++ head/sbin/ipfw/Makefile	Tue Mar  2 17:40:48 2010	(r204591)
@@ -3,7 +3,6 @@
 PROG=	ipfw
 SRCS=	ipfw2.c dummynet.c ipv6.c main.c nat.c altq.c
 WARNS?=	2
-DPADD=	${LIBUTIL}
 LDADD=	-lutil
 MAN=	ipfw.8
 

Modified: head/sbin/ipfw/altq.c
==============================================================================
--- head/sbin/ipfw/altq.c	Tue Mar  2 17:34:11 2010	(r204590)
+++ head/sbin/ipfw/altq.c	Tue Mar  2 17:40:48 2010	(r204591)
@@ -39,6 +39,7 @@
 
 #include <net/if.h>		/* IFNAMSIZ */
 #include <net/pfvar.h>
+#include <netinet/in.h>	/* in_addr */
 #include <netinet/ip_fw.h>
 
 /*

Modified: head/sbin/ipfw/dummynet.c
==============================================================================
--- head/sbin/ipfw/dummynet.c	Tue Mar  2 17:34:11 2010	(r204590)
+++ head/sbin/ipfw/dummynet.c	Tue Mar  2 17:40:48 2010	(r204591)
@@ -1,10 +1,5 @@
 /*
- * Copyright (c) 2002-2003 Luigi Rizzo
- * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp
- * Copyright (c) 1994 Ugen J.S.Antsilevich
- *
- * Idea and grammar partially left from:
- * Copyright (c) 1993 Daniel Boulet
+ * Copyright (c) 2002-2003,2010 Luigi Rizzo
  *
  * Redistribution and use in source forms, with and without modification,
  * are permitted provided that this entire comment appears intact.
@@ -24,7 +19,6 @@
 
 #include <sys/types.h>
 #include <sys/socket.h>
-#include <sys/queue.h>
 /* XXX there are several sysctl leftover here */
 #include <sys/sysctl.h>
 
@@ -46,6 +40,7 @@
 #include <netinet/ip_dummynet.h>
 #include <arpa/inet.h>	/* inet_ntoa */
 
+
 static struct _s_x dummynet_params[] = {
 	{ "plr",		TOK_PLR },
 	{ "noerror",		TOK_NOERROR },
@@ -56,27 +51,59 @@ static struct _s_x dummynet_params[] = {
 	{ "src-port",		TOK_SRCPORT },
 	{ "proto",		TOK_PROTO },
 	{ "weight",		TOK_WEIGHT },
+	{ "lmax",		TOK_LMAX },
+	{ "maxlen",		TOK_LMAX },
 	{ "all",		TOK_ALL },
-	{ "mask",		TOK_MASK },
+	{ "mask",		TOK_MASK }, /* alias for both */
+	{ "sched_mask",		TOK_SCHED_MASK },
+	{ "flow_mask",		TOK_FLOW_MASK },
 	{ "droptail",		TOK_DROPTAIL },
 	{ "red",		TOK_RED },
 	{ "gred",		TOK_GRED },
 	{ "bw",			TOK_BW },
 	{ "bandwidth",		TOK_BW },
 	{ "delay",		TOK_DELAY },
+	{ "link",		TOK_LINK },
 	{ "pipe",		TOK_PIPE },
 	{ "queue",		TOK_QUEUE },
+	{ "flowset",		TOK_FLOWSET },
+	{ "sched",		TOK_SCHED },
+	{ "pri",		TOK_PRI },
+	{ "priority",		TOK_PRI },
+	{ "type",		TOK_TYPE },
 	{ "flow-id",		TOK_FLOWID},
 	{ "dst-ipv6",		TOK_DSTIP6},
 	{ "dst-ip6",		TOK_DSTIP6},
 	{ "src-ipv6",		TOK_SRCIP6},
 	{ "src-ip6",		TOK_SRCIP6},
-	{ "profile",		TOK_PIPE_PROFILE},
+	{ "profile",		TOK_PROFILE},
 	{ "burst",		TOK_BURST},
 	{ "dummynet-params",	TOK_NULL },
 	{ NULL, 0 }	/* terminator */
 };
 
+#define O_NEXT(p, len) ((void *)((char *)p + len))
+
+static void
+oid_fill(struct dn_id *oid, int len, int type, uintptr_t id)
+{
+	oid->len = len;
+	oid->type = type;
+	oid->subtype = 0;
+	oid->id = id;
+}
+
+/* make room in the buffer and move the pointer forward */
+static void *
+o_next(struct dn_id **o, int len, int type)
+{
+	struct dn_id *ret = *o;
+	oid_fill(ret, len, type, 0);
+	*o = O_NEXT(*o, len);
+	return ret;
+}
+
+#if 0
 static int
 sort_q(void *arg, const void *pa, const void *pb)
 {
@@ -108,117 +135,81 @@ sort_q(void *arg, const void *pa, const 
 		res = 1;
 	return (int)(rev ? res : -res);
 }
+#endif
 
+/* print a mask and header for the subsequent list of flows */
 static void
-list_queues(struct dn_flow_set *fs, struct dn_flow_queue *q)
+print_mask(struct ipfw_flow_id *id)
+{
+	if (!IS_IP6_FLOW_ID(id)) {
+		printf("    "
+		    "mask: 0x%02x 0x%08x/0x%04x -> 0x%08x/0x%04x\n",
+		    id->proto,
+		    id->src_ip, id->src_port,
+		    id->dst_ip, id->dst_port);
+
+		printf("BKT Prot ___Source IP/port____ "
+		    "____Dest. IP/port____ "
+		    "Tot_pkt/bytes Pkt/Byte Drp\n");
+	} else {
+		char buf[255];
+		printf("\n        mask: proto: 0x%02x, flow_id: 0x%08x,  ",
+		    id->proto, id->flow_id6);
+		inet_ntop(AF_INET6, &(id->src_ip6), buf, sizeof(buf));
+		printf("%s/0x%04x -> ", buf, id->src_port);
+		inet_ntop(AF_INET6, &(id->dst_ip6), buf, sizeof(buf));
+		printf("%s/0x%04x\n", buf, id->dst_port);
+
+		printf("BKT ___Prot___ _flow-id_ "
+		    "______________Source IPv6/port_______________ "
+		    "_______________Dest. IPv6/port_______________ "
+		    "Tot_pkt/bytes Pkt/Byte Drp\n");
+	}
+}
+
+static void
+list_flow(struct dn_flow *ni)
 {
-	int l;
-	int index_printed, indexes = 0;
 	char buff[255];
 	struct protoent *pe;
+	struct in_addr ina;
+	struct ipfw_flow_id *id = &ni->fid;
 
-	if (fs->rq_elements == 0)
-		return;
-
-	if (co.do_sort != 0)
-		qsort_r(q, fs->rq_elements, sizeof *q, NULL, sort_q);
-
-	/* Print IPv4 flows */
-	index_printed = 0;
-	for (l = 0; l < fs->rq_elements; l++) {
-		struct in_addr ina;
-
+	pe = getprotobynumber(id->proto);
 		/* XXX: Should check for IPv4 flows */
-		if (IS_IP6_FLOW_ID(&(q[l].id)))
-			continue;
-
-		if (!index_printed) {
-			index_printed = 1;
-			if (indexes > 0)	/* currently a no-op */
-				printf("\n");
-			indexes++;
-			printf("    "
-			    "mask: 0x%02x 0x%08x/0x%04x -> 0x%08x/0x%04x\n",
-			    fs->flow_mask.proto,
-			    fs->flow_mask.src_ip, fs->flow_mask.src_port,
-			    fs->flow_mask.dst_ip, fs->flow_mask.dst_port);
-
-			printf("BKT Prot ___Source IP/port____ "
-			    "____Dest. IP/port____ "
-			    "Tot_pkt/bytes Pkt/Byte Drp\n");
-		}
-
-		printf("%3d ", q[l].hash_slot);
-		pe = getprotobynumber(q[l].id.proto);
+	printf("%3u ", (ni->oid.id) & 0xff);
+	if (!IS_IP6_FLOW_ID(id)) {
 		if (pe)
 			printf("%-4s ", pe->p_name);
 		else
-			printf("%4u ", q[l].id.proto);
-		ina.s_addr = htonl(q[l].id.src_ip);
+			printf("%4u ", id->proto);
+		ina.s_addr = htonl(id->src_ip);
 		printf("%15s/%-5d ",
-		    inet_ntoa(ina), q[l].id.src_port);
-		ina.s_addr = htonl(q[l].id.dst_ip);
+		    inet_ntoa(ina), id->src_port);
+		ina.s_addr = htonl(id->dst_ip);
 		printf("%15s/%-5d ",
-		    inet_ntoa(ina), q[l].id.dst_port);
-		printf("%4llu %8llu %2u %4u %3u\n",
-		    align_uint64(&q[l].tot_pkts),
-		    align_uint64(&q[l].tot_bytes),
-		    q[l].len, q[l].len_bytes, q[l].drops);
-		if (co.verbose)
-			printf("   S %20llu  F %20llu\n",
-			    align_uint64(&q[l].S), align_uint64(&q[l].F));
-	}
-
-	/* Print IPv6 flows */
-	index_printed = 0;
-	for (l = 0; l < fs->rq_elements; l++) {
-		if (!IS_IP6_FLOW_ID(&(q[l].id)))
-			continue;
-
-		if (!index_printed) {
-			index_printed = 1;
-			if (indexes > 0)
-				printf("\n");
-			indexes++;
-			printf("\n        mask: proto: 0x%02x, flow_id: 0x%08x,  ",
-			    fs->flow_mask.proto, fs->flow_mask.flow_id6);
-			inet_ntop(AF_INET6, &(fs->flow_mask.src_ip6),
-			    buff, sizeof(buff));
-			printf("%s/0x%04x -> ", buff, fs->flow_mask.src_port);
-			inet_ntop( AF_INET6, &(fs->flow_mask.dst_ip6),
-			    buff, sizeof(buff) );
-			printf("%s/0x%04x\n", buff, fs->flow_mask.dst_port);
-
-			printf("BKT ___Prot___ _flow-id_ "
-			    "______________Source IPv6/port_______________ "
-			    "_______________Dest. IPv6/port_______________ "
-			    "Tot_pkt/bytes Pkt/Byte Drp\n");
-		}
-		printf("%3d ", q[l].hash_slot);
-		pe = getprotobynumber(q[l].id.proto);
+		    inet_ntoa(ina), id->dst_port);
+	} else {
+		/* Print IPv6 flows */
 		if (pe != NULL)
 			printf("%9s ", pe->p_name);
 		else
-			printf("%9u ", q[l].id.proto);
-		printf("%7d  %39s/%-5d ", q[l].id.flow_id6,
-		    inet_ntop(AF_INET6, &(q[l].id.src_ip6), buff, sizeof(buff)),
-		    q[l].id.src_port);
+			printf("%9u ", id->proto);
+		printf("%7d  %39s/%-5d ", id->flow_id6,
+		    inet_ntop(AF_INET6, &(id->src_ip6), buff, sizeof(buff)),
+		    id->src_port);
 		printf(" %39s/%-5d ",
-		    inet_ntop(AF_INET6, &(q[l].id.dst_ip6), buff, sizeof(buff)),
-		    q[l].id.dst_port);
-		printf(" %4llu %8llu %2u %4u %3u\n",
-		    align_uint64(&q[l].tot_pkts),
-		    align_uint64(&q[l].tot_bytes),
-		    q[l].len, q[l].len_bytes, q[l].drops);
-		if (co.verbose)
-			printf("   S %20llu  F %20llu\n",
-			    align_uint64(&q[l].S),
-			    align_uint64(&q[l].F));
+		    inet_ntop(AF_INET6, &(id->dst_ip6), buff, sizeof(buff)),
+		    id->dst_port);
 	}
+	printf("%4llu %8llu %2u %4u %3u\n",
+	    align_uint64(&ni->tot_pkts),
+	    align_uint64(&ni->tot_bytes),
+	    ni->length, ni->len_bytes, ni->drops);
 }
 
 static void
-print_flowset_parms(struct dn_flow_set *fs, char *prefix)
+print_flowset_parms(struct dn_fs *fs, char *prefix)
 {
 	int l;
 	char qs[30];
@@ -226,7 +217,7 @@ print_flowset_parms(struct dn_flow_set *
 	char red[90];	/* Display RED parameters */
 
 	l = fs->qsize;
-	if (fs->flags_fs & DN_QSIZE_IS_BYTES) {
+	if (fs->flags & DN_QSIZE_BYTES) {
 		if (l >= 8192)
 			sprintf(qs, "%d KB", l / 1024);
 		else
@@ -237,23 +228,34 @@ print_flowset_parms(struct dn_flow_set *
 		sprintf(plr, "plr %f", 1.0 * fs->plr / (double)(0x7fffffff));
 	else
 		plr[0] = '\0';
-	if (fs->flags_fs & DN_IS_RED)	/* RED parameters */
+
+	if (fs->flags & DN_IS_RED)	/* RED parameters */
 		sprintf(red,
 		    "\n\t %cRED w_q %f min_th %d max_th %d max_p %f",
-		    (fs->flags_fs & DN_IS_GENTLE_RED) ? 'G' : ' ',
+		    (fs->flags & DN_IS_GENTLE_RED) ? 'G' : ' ',
 		    1.0 * fs->w_q / (double)(1 << SCALE_RED),
-		    SCALE_VAL(fs->min_th),
-		    SCALE_VAL(fs->max_th),
+		    fs->min_th,
+		    fs->max_th,
 		    1.0 * fs->max_p / (double)(1 << SCALE_RED));
 	else
 		sprintf(red, "droptail");
 
-	printf("%s %s%s %d queues (%d buckets) %s\n",
-	    prefix, qs, plr, fs->rq_elements, fs->rq_size, red);
+	if (prefix[0]) {
+	    printf("%s %s%s %d queues (%d buckets) %s\n",
+		prefix, qs, plr, fs->oid.id, fs->buckets, red);
+	    prefix[0] = '\0';
+	} else {
+	    printf("q%05d %s%s %d flows (%d buckets) sched %d "
+			"weight %d lmax %d pri %d %s\n",
+		fs->fs_nr, qs, plr, fs->oid.id, fs->buckets,
+		fs->sched_nr, fs->par[0], fs->par[1], fs->par[2], red);
+	    if (fs->flags & DN_HAVE_MASK)
+		print_mask(&fs->flow_mask);
+	}
 }
 
 static void
-print_extra_delay_parms(struct dn_pipe *p)
+print_extra_delay_parms(struct dn_profile *p)
 {
 	double loss;
 	if (p->samples_no <= 0)
@@ -265,105 +267,126 @@ print_extra_delay_parms(struct dn_pipe *
 		p->name, loss, p->samples_no);
 }
 
-void
-ipfw_list_pipes(void *data, uint nbytes, int ac, char *av[])
+static void
+flush_buf(char *buf)
 {
-	int rulenum;
-	void *next = data;
-	struct dn_pipe *p = (struct dn_pipe *) data;
-	struct dn_flow_set *fs;
-	struct dn_flow_queue *q;
-	int l;
-
-	if (ac > 0)
-		rulenum = strtoul(*av++, NULL, 10);
-	else
-		rulenum = 0;
-	for (; nbytes >= sizeof *p; p = (struct dn_pipe *)next) {
-		double b = p->bandwidth;
-		char buf[30];
-		char prefix[80];
-		char burst[5 + 7];
-
-		if (SLIST_NEXT(p, next) != (struct dn_pipe *)DN_IS_PIPE)
-			break;	/* done with pipes, now queues */
-
-		/*
-		 * compute length, as pipe have variable size
-		 */
-		l = sizeof(*p) + p->fs.rq_elements * sizeof(*q);
-		next = (char *)p + l;
-		nbytes -= l;
-
-		if ((rulenum != 0 && rulenum != p->pipe_nr) || co.do_pipe == 2)
-			continue;
-
-		/*
-		 * Print rate (or clocking interface)
-		 */
-		if (p->if_name[0] != '\0')
-			sprintf(buf, "%s", p->if_name);
-		else if (b == 0)
-			sprintf(buf, "unlimited");
-		else if (b >= 1000000)
-			sprintf(buf, "%7.3f Mbit/s", b/1000000);
-		else if (b >= 1000)
-			sprintf(buf, "%7.3f Kbit/s", b/1000);
-		else
-			sprintf(buf, "%7.3f bit/s ", b);
-
-		sprintf(prefix, "%05d: %s %4d ms ",
-		    p->pipe_nr, buf, p->delay);
-
-		print_flowset_parms(&(p->fs), prefix);
-
-		if (humanize_number(burst, sizeof(burst), p->burst,
-		    "Byte", HN_AUTOSCALE, 0) < 0 || co.verbose)
-			printf("\t burst: %ju Byte\n", p->burst);
-		else
-			printf("\t burst: %s\n", burst);
-
-		print_extra_delay_parms(p);
-
-		q = (struct dn_flow_queue *)(p+1);
-		list_queues(&(p->fs), q);
-	}
-	for (fs = next; nbytes >= sizeof *fs; fs = next) {
-		char prefix[80];
+	if (buf[0])
+		printf("%s\n", buf);
+	buf[0] = '\0';
+}
+	
+/*
+ * generic list routine. We expect objects in a specific order, i.e.
+ * PIPES AND SCHEDULERS:
+ *	link; scheduler; internal flowset if any; instances
+ * we can tell a pipe from the number.
+ *
+ * FLOWSETS:
+ *	flowset; queues;
+ * link i (int queue); scheduler i; si(i) { flowsets() : queues }
+ */
+static void
+list_pipes(struct dn_id *oid, struct dn_id *end)
+{
+    char buf[160];	/* pending buffer */
+    buf[0] = '\0';
 
-		if (SLIST_NEXT(fs, next) != (struct dn_flow_set *)DN_IS_QUEUE)
-			break;
-		l = sizeof(*fs) + fs->rq_elements * sizeof(*q);
-		next = (char *)fs + l;
-		nbytes -= l;
+    for (; oid != end; oid = O_NEXT(oid, oid->len)) {
+	if (oid->len < sizeof(*oid))
+		errx(1, "invalid oid len %d\n", oid->len);
+
+	switch (oid->type) {
+	default:
+	    flush_buf(buf);
+	    printf("unrecognized object %d size %d\n", oid->type, oid->len);
+	    break;
+	case DN_TEXT: /* list of attached flowsets */
+	    {
+		int i, l;
+		struct {
+			struct dn_id id;
+			uint32_t p[0];
+		} *d = (void *)oid;
+		l = (oid->len - sizeof(*oid))/sizeof(d->p[0]);
+		if (l == 0)
+		    break;
+		printf("   Children flowsets: ");
+		for (i = 0; i < l; i++)
+			printf("%u ", d->p[i]);
+		printf("\n");
+		break;
+	    }
+	case DN_CMD_GET:
+	    if (co.verbose)
+		printf("answer for cmd %d, len %d\n", oid->type, oid->id);
+	    break;
+	case DN_SCH: {
+	    struct dn_sch *s = (struct dn_sch *)oid;
+	    flush_buf(buf);
+	    printf(" sched %d type %s flags 0x%x %d buckets %d active\n",
+			s->sched_nr,
+			s->name, s->flags, s->buckets, s->oid.id);
+	    if (s->flags & DN_HAVE_MASK)
+		print_mask(&s->sched_mask);
+	    }
+	    break;
 
-		if (rulenum != 0 && ((rulenum != fs->fs_nr && co.do_pipe == 2) ||
-		    (rulenum != fs->parent_nr && co.do_pipe == 1))) {
-			continue;
-		}
+	case DN_FLOW:
+	    list_flow((struct dn_flow *)oid);
+	    break;
+
+	case DN_LINK: {
+	    struct dn_link *p = (struct dn_link *)oid;
+	    double b = p->bandwidth;
+	    char bwbuf[30];
+	    char burst[5 + 7];
+
+	    /* This starts a new object so flush buffer */
+	    flush_buf(buf);
+	    /* data rate */
+	    if (b == 0)
+		sprintf(bwbuf, "unlimited     ");
+	    else if (b >= 1000000)
+		sprintf(bwbuf, "%7.3f Mbit/s", b/1000000);
+	    else if (b >= 1000)
+		sprintf(bwbuf, "%7.3f Kbit/s", b/1000);
+	    else
+		sprintf(bwbuf, "%7.3f bit/s ", b);
+
+	    if (humanize_number(burst, sizeof(burst), p->burst,
+		    "", HN_AUTOSCALE, 0) < 0 || co.verbose)
+		sprintf(burst, "%d", (int)p->burst);
+	    sprintf(buf, "%05d: %s %4d ms burst %s",
+		p->link_nr % DN_MAX_ID, bwbuf, p->delay, burst);
+	    }
+	    break;
 
-		q = (struct dn_flow_queue *)(fs+1);
-		sprintf(prefix, "q%05d: weight %d pipe %d ",
-		    fs->fs_nr, fs->weight, fs->parent_nr);
-		print_flowset_parms(fs, prefix);
-		list_queues(fs, q);
+	case DN_FS:
+	    print_flowset_parms((struct dn_fs *)oid, buf);
+	    break;
+	case DN_PROFILE:
+	    flush_buf(buf);
+	    print_extra_delay_parms((struct dn_profile *)oid);
 	}
+	flush_buf(buf); // XXX does it really go here ?
+    }
 }
 
 /*
- * Delete pipe or queue i
+ * Delete pipe, queue or scheduler i
  */
 int
-ipfw_delete_pipe(int pipe_or_queue, int i)
+ipfw_delete_pipe(int do_pipe, int i)
 {
-	struct dn_pipe p;
-
-	memset(&p, 0, sizeof p);
-	if (pipe_or_queue == 1)
-		p.pipe_nr = i;		/* pipe */
-	else
-		p.fs.fs_nr = i;		/* queue */
-	i = do_cmd(IP_DUMMYNET_DEL, &p, sizeof p);
+	struct {
+		struct dn_id oid;
+		uintptr_t a[1];	/* add more if we want a list */
+	} cmd;
+	oid_fill((void *)&cmd, sizeof(cmd), DN_CMD_DELETE, DN_API_VERSION);
+	cmd.oid.subtype = (do_pipe == 1) ? DN_LINK :
+		( (do_pipe == 2) ? DN_FS : DN_SCH);
+	cmd.a[0] = i;
+	i = do_cmd(IP_DUMMYNET3, &cmd, cmd.oid.len);
 	if (i) {
 		i = 1;
 		warn("rule %u: setsockopt(IP_DUMMYNET_DEL)", i);
@@ -400,7 +423,7 @@ ipfw_delete_pipe(int pipe_or_queue, int 
  * The empirical curve may have both vertical and horizontal lines.
  * Vertical lines represent constant delay for a range of
  * probabilities; horizontal lines correspond to a discontinuty
- * in the delay distribution: the pipe will use the largest delay
+ * in the delay distribution: the link will use the largest delay
  * for a given probability.
  * 
  * To pass the curve to dummynet, we must store the parameters
@@ -490,9 +513,12 @@ static void
 read_bandwidth(char *arg, int *bandwidth, char *if_name, int namelen)
 {
 	if (*bandwidth != -1)
-		warn("duplicate token, override bandwidth value!");
+		warnx("duplicate token, override bandwidth value!");
 
 	if (arg[0] >= 'a' && arg[0] <= 'z') {
+		if (!if_name) {
+			errx(1, "no if support");
+		}
 		if (namelen >= IFNAMSIZ)
 			warn("interface name truncated");
 		namelen--;
@@ -521,7 +547,8 @@ read_bandwidth(char *arg, int *bandwidth
 			errx(EX_DATAERR, "bandwidth too large");
 
 		*bandwidth = bw;
-		if_name[0] = '\0';
+		if (if_name)
+			if_name[0] = '\0';
 	}
 }
 
@@ -551,7 +578,8 @@ compare_points(const void *vp1, const vo
 #define ED_EFMT(s) EX_DATAERR,"error in %s at line %d: "#s,filename,lineno
 
 static void
-load_extra_delays(const char *filename, struct dn_pipe *p)
+load_extra_delays(const char *filename, struct dn_profile *p,
+	struct dn_link *link)
 {
 	char    line[ED_MAX_LINE_LEN];
 	FILE    *f;
@@ -566,6 +594,9 @@ load_extra_delays(const char *filename, 
 	struct point    points[ED_MAX_SAMPLES_NO];
 	int     points_no = 0;
 
+	/* XXX link never NULL? */
+	p->link_nr = link->link_nr;
+
 	profile_name[0] = '\0';
 	f = fopen(filename, "r");
 	if (f == NULL)
@@ -606,7 +637,8 @@ load_extra_delays(const char *filename, 
 				ED_MAX_SAMPLES_NO);
 		    do_points = 0;
 		} else if (!strcasecmp(name, ED_TOK_BW)) {
-		    read_bandwidth(arg, &p->bandwidth, p->if_name, sizeof(p->if_name));
+		    char buf[IFNAMSIZ];
+		    read_bandwidth(arg, &link->bandwidth, buf, sizeof(buf));
 		} else if (!strcasecmp(name, ED_TOK_LOSS)) {
 		    if (loss != -1.0)
 			errx(ED_EFMT("duplicated token: %s"), name);
@@ -676,17 +708,17 @@ load_extra_delays(const char *filename, 
 	    double y2 = points[i+1].prob * samples;
 	    double x2 = points[i+1].delay;
 
-	    int index = y1;
+	    int ix = y1;
 	    int stop = y2;
 
 	    if (x1 == x2) {
-		for (; index<stop; ++index)
-		    p->samples[index] = x1;
+		for (; ix<stop; ++ix)
+		    p->samples[ix] = x1;
 	    } else {
 		double m = (y2-y1)/(x2-x1);
 		double c = y1 - m*x1;
-		for (; index<stop ; ++index)
-		    p->samples[index] = (index - c)/m;
+		for (; ix<stop ; ++ix)
+		    p->samples[ix] = (ix - c)/m;
 	    }
 	}
 	p->samples_no = samples;
@@ -694,27 +726,120 @@ load_extra_delays(const char *filename, 
 	strncpy(p->name, profile_name, sizeof(p->name));
 }
 
+/*
+ * configuration of pipes, schedulers, flowsets.
+ * When we configure a new scheduler, an empty pipe is created, so:
+ * 
+ * do_pipe = 1 -> "pipe N config ..." only for backward compatibility
+ *	sched N+Delta type fifo sched_mask ...
+ *	pipe N+Delta <parameters>
+ *	flowset N+Delta pipe N+Delta (no parameters)
+ *	sched N type wf2q+ sched_mask ...
+ *	pipe N <parameters>
+ *
+ * do_pipe = 2 -> flowset N config
+ *	flowset N parameters
+ *
+ * do_pipe = 3 -> sched N config
+ *	sched N parameters (default no pipe)
+ *	optional Pipe N config ...
+ * pipe ==>
+ */
 void
 ipfw_config_pipe(int ac, char **av)
 {
-	int samples[ED_MAX_SAMPLES_NO];
-	struct dn_pipe p;
-	int i;
+	int i, j;
 	char *end;
 	void *par = NULL;
-
-	memset(&p, 0, sizeof p);
-	p.bandwidth = -1;
+	struct dn_id *buf, *base;
+	struct dn_sch *sch = NULL;
+	struct dn_link *p = NULL;
+	struct dn_fs *fs = NULL;
+	struct dn_profile *pf = NULL;
+	struct ipfw_flow_id *mask = NULL;
+	int lmax;
+	uint32_t _foo = 0, *flags = &_foo , *buckets = &_foo;
+
+	/*
+	 * allocate space for 1 header,
+	 * 1 scheduler, 1 link, 1 flowset, 1 profile
+	 */
+	lmax = sizeof(struct dn_id);	/* command header */
+	lmax += sizeof(struct dn_sch) + sizeof(struct dn_link) +
+		sizeof(struct dn_fs) + sizeof(struct dn_profile);
 
 	av++; ac--;
 	/* Pipe number */
 	if (ac && isdigit(**av)) {
 		i = atoi(*av); av++; ac--;
-		if (co.do_pipe == 1)
-			p.pipe_nr = i;
-		else
-			p.fs.fs_nr = i;
+	} else
+		i = -1;
+	if (i <= 0)
+		errx(EX_USAGE, "need a pipe/flowset/sched number");
+	base = buf = safe_calloc(1, lmax);
+	/* all commands start with a 'CONFIGURE' and a version */
+	o_next(&buf, sizeof(struct dn_id), DN_CMD_CONFIG);
+	base->id = DN_API_VERSION;
+
+	switch (co.do_pipe) {
+	case 1: /* "pipe N config ..." */
+		/* Allocate space for the WF2Q+ scheduler, its link
+		 * and the FIFO flowset. Set the number, but leave
+		 * the scheduler subtype and other parameters to 0
+		 * so the kernel will use appropriate defaults.
+		 * XXX todo: add a flag to record if a parameter
+		 * is actually configured.
+		 * If we do a 'pipe config' mask -> sched_mask.
+		 * The FIFO scheduler and link are derived from the
+		 * WF2Q+ one in the kernel.
+		 */
+		sch = o_next(&buf, sizeof(*sch), DN_SCH);
+		p = o_next(&buf, sizeof(*p), DN_LINK);
+		fs = o_next(&buf, sizeof(*fs), DN_FS);
+
+		sch->sched_nr = i;
+		sch->oid.subtype = 0;	/* defaults to WF2Q+ */
+		mask = &sch->sched_mask;
+		flags = &sch->flags;
+		buckets = &sch->buckets;
+		*flags |= DN_PIPE_CMD;
+
+		p->link_nr = i;
+
+		/* This flowset is only for the FIFO scheduler */
+		fs->fs_nr = i + 2*DN_MAX_ID;
+		fs->sched_nr = i + DN_MAX_ID;
+		break;
+
+	case 2: /* "queue N config ... " */
+		fs = o_next(&buf, sizeof(*fs), DN_FS);
+		fs->fs_nr = i;
+		mask = &fs->flow_mask;
+		flags = &fs->flags;
+		buckets = &fs->buckets;
+		break;
+
+	case 3: /* "sched N config ..." */
+		sch = o_next(&buf, sizeof(*sch), DN_SCH);
+		fs = o_next(&buf, sizeof(*fs), DN_FS);
+		sch->sched_nr = i;
+		mask = &sch->sched_mask;
+		flags = &sch->flags;
+		buckets = &sch->buckets;
+		/* fs is used only with !MULTIQUEUE schedulers */
+		fs->fs_nr = i + DN_MAX_ID;
+		fs->sched_nr = i;
+		break;
 	}
+	/* set to -1 those fields for which we want to reuse existing
+	 * values from the kernel.
+	 * Also, *_nr and subtype = 0 mean reuse the value from the kernel.
+	 * XXX todo: support reuse of the mask.
+	 */
+	if (p)
+		p->bandwidth = -1;
+	for (j = 0; j < sizeof(fs->par)/sizeof(fs->par[0]); j++)
+		fs->par[j] = -1;
 	while (ac > 0) {
 		double d;
 		int tok = match_token(dummynet_params, *av);
@@ -722,41 +847,48 @@ ipfw_config_pipe(int ac, char **av)
 
 		switch(tok) {
 		case TOK_NOERROR:
-			p.fs.flags_fs |= DN_NOERROR;
+			NEED(fs, "noerror is only for pipes");
+			fs->flags |= DN_NOERROR;
 			break;
 
 		case TOK_PLR:
+			NEED(fs, "plr is only for pipes");
 			NEED1("plr needs argument 0..1\n");
 			d = strtod(av[0], NULL);
 			if (d > 1)
 				d = 1;
 			else if (d < 0)
 				d = 0;
-			p.fs.plr = (int)(d*0x7fffffff);
+			fs->plr = (int)(d*0x7fffffff);
 			ac--; av++;
 			break;
 
 		case TOK_QUEUE:
+			NEED(fs, "queue is only for pipes or flowsets");
 			NEED1("queue needs queue size\n");
 			end = NULL;
-			p.fs.qsize = strtoul(av[0], &end, 0);
+			fs->qsize = strtoul(av[0], &end, 0);
 			if (*end == 'K' || *end == 'k') {
-				p.fs.flags_fs |= DN_QSIZE_IS_BYTES;
-				p.fs.qsize *= 1024;
+				fs->flags |= DN_QSIZE_BYTES;
+				fs->qsize *= 1024;
 			} else if (*end == 'B' ||
 			    _substrcmp2(end, "by", "bytes") == 0) {
-				p.fs.flags_fs |= DN_QSIZE_IS_BYTES;
+				fs->flags |= DN_QSIZE_BYTES;
 			}
 			ac--; av++;
 			break;
 
 		case TOK_BUCKETS:
+			NEED(fs, "buckets is only for pipes or flowsets");
 			NEED1("buckets needs argument\n");
-			p.fs.rq_size = strtoul(av[0], NULL, 0);
+			*buckets = strtoul(av[0], NULL, 0);
 			ac--; av++;
 			break;
 
+		case TOK_FLOW_MASK:
+		case TOK_SCHED_MASK:
 		case TOK_MASK:
+			NEED(mask, "tok_mask");
 			NEED1("mask needs mask specifier\n");
 			/*
 			 * per-flow queue, mask is dst_ip, dst_port,
@@ -764,7 +896,7 @@ ipfw_config_pipe(int ac, char **av)
 			 */
 			par = NULL;
 
-			bzero(&p.fs.flow_mask, sizeof(p.fs.flow_mask));
+			bzero(mask, sizeof(*mask));
 			end = NULL;
 
 			while (ac >= 1) {
@@ -781,43 +913,48 @@ ipfw_config_pipe(int ac, char **av)
 				    /*
 				     * special case, all bits significant
 				     */
-				    p.fs.flow_mask.dst_ip = ~0;
-				    p.fs.flow_mask.src_ip = ~0;
-				    p.fs.flow_mask.dst_port = ~0;
-				    p.fs.flow_mask.src_port = ~0;
-				    p.fs.flow_mask.proto = ~0;
-				    n2mask(&(p.fs.flow_mask.dst_ip6), 128);
-				    n2mask(&(p.fs.flow_mask.src_ip6), 128);
-				    p.fs.flow_mask.flow_id6 = ~0;
-				    p.fs.flags_fs |= DN_HAVE_FLOW_MASK;
+				    mask->dst_ip = ~0;
+				    mask->src_ip = ~0;
+				    mask->dst_port = ~0;
+				    mask->src_port = ~0;
+				    mask->proto = ~0;
+				    n2mask(&mask->dst_ip6, 128);
+				    n2mask(&mask->src_ip6, 128);
+				    mask->flow_id6 = ~0;
+				    *flags |= DN_HAVE_MASK;
 				    goto end_mask;
 
 			    case TOK_DSTIP:
-				    p32 = &p.fs.flow_mask.dst_ip;
+				    mask->addr_type = 4;
+				    p32 = &mask->dst_ip;
 				    break;
 
 			    case TOK_SRCIP:
-				    p32 = &p.fs.flow_mask.src_ip;
+				    mask->addr_type = 4;
+				    p32 = &mask->src_ip;
 				    break;
 
 			    case TOK_DSTIP6:
-				    pa6 = &(p.fs.flow_mask.dst_ip6);
+				    mask->addr_type = 6;
+				    pa6 = &mask->dst_ip6;
 				    break;
 			    
 			    case TOK_SRCIP6:
-				    pa6 = &(p.fs.flow_mask.src_ip6);
+				    mask->addr_type = 6;
+				    pa6 = &mask->src_ip6;
 				    break;
 
 			    case TOK_FLOWID:
-				    p20 = &p.fs.flow_mask.flow_id6;
+				    mask->addr_type = 6;
+				    p20 = &mask->flow_id6;
 				    break;
 
 			    case TOK_DSTPORT:
-				    p16 = &p.fs.flow_mask.dst_port;
+				    p16 = &mask->dst_port;
 				    break;
 
 			    case TOK_SRCPORT:
-				    p16 = &p.fs.flow_mask.src_port;
+				    p16 = &mask->src_port;
 				    break;
 
 			    case TOK_PROTO:
@@ -857,10 +994,10 @@ ipfw_config_pipe(int ac, char **av)
 				    if (a > 0xFF)
 					    errx(EX_DATAERR,
 						"proto mask must be 8 bit");
-				    p.fs.flow_mask.proto = (uint8_t)a;
+				    fs->flow_mask.proto = (uint8_t)a;
 			    }
 			    if (a != 0)
-				    p.fs.flags_fs |= DN_HAVE_FLOW_MASK;
+				    *flags |= DN_HAVE_MASK;
 			    ac--; av++;
 			} /* end while, config masks */
 end_mask:
@@ -869,9 +1006,9 @@ end_mask:
 		case TOK_RED:
 		case TOK_GRED:
 			NEED1("red/gred needs w_q/min_th/max_th/max_p\n");
-			p.fs.flags_fs |= DN_IS_RED;
+			fs->flags |= DN_IS_RED;
 			if (tok == TOK_GRED)
-				p.fs.flags_fs |= DN_IS_GENTLE_RED;
+				fs->flags |= DN_IS_GENTLE_RED;
 			/*
 			 * the format for parameters is w_q/min_th/max_th/max_p
 			 */
@@ -879,82 +1016,108 @@ end_mask:
 			    double w_q = strtod(end, NULL);
 			    if (w_q > 1 || w_q <= 0)
 				errx(EX_DATAERR, "0 < w_q <= 1");
-			    p.fs.w_q = (int) (w_q * (1 << SCALE_RED));
+			    fs->w_q = (int) (w_q * (1 << SCALE_RED));
 			}
 			if ((end = strsep(&av[0], "/"))) {
-			    p.fs.min_th = strtoul(end, &end, 0);
+			    fs->min_th = strtoul(end, &end, 0);
 			    if (*end == 'K' || *end == 'k')
-				p.fs.min_th *= 1024;
+				fs->min_th *= 1024;
 			}
 			if ((end = strsep(&av[0], "/"))) {
-			    p.fs.max_th = strtoul(end, &end, 0);
+			    fs->max_th = strtoul(end, &end, 0);
 			    if (*end == 'K' || *end == 'k')
-				p.fs.max_th *= 1024;
+				fs->max_th *= 1024;
 			}
 			if ((end = strsep(&av[0], "/"))) {
 			    double max_p = strtod(end, NULL);
 			    if (max_p > 1 || max_p <= 0)
 				errx(EX_DATAERR, "0 < max_p <= 1");
-			    p.fs.max_p = (int)(max_p * (1 << SCALE_RED));
+			    fs->max_p = (int)(max_p * (1 << SCALE_RED));
 			}
 			ac--; av++;
 			break;
 
 		case TOK_DROPTAIL:
-			p.fs.flags_fs &= ~(DN_IS_RED|DN_IS_GENTLE_RED);
+			NEED(fs, "droptail is only for flowsets");
+			fs->flags &= ~(DN_IS_RED|DN_IS_GENTLE_RED);
 			break;
 
 		case TOK_BW:
+			NEED(p, "bw is only for links");
 			NEED1("bw needs bandwidth or interface\n");
-			if (co.do_pipe != 1)
-			    errx(EX_DATAERR, "bandwidth only valid for pipes");
-			read_bandwidth(av[0], &p.bandwidth, p.if_name, sizeof(p.if_name));
+			read_bandwidth(av[0], &p->bandwidth, NULL, 0);
 			ac--; av++;
 			break;
 
 		case TOK_DELAY:
-			if (co.do_pipe != 1)
-				errx(EX_DATAERR, "delay only valid for pipes");
+			NEED(p, "delay is only for links");
 			NEED1("delay needs argument 0..10000ms\n");
-			p.delay = strtoul(av[0], NULL, 0);
+			p->delay = strtoul(av[0], NULL, 0);
+			ac--; av++;
+			break;
+
+		case TOK_TYPE: {
+			int l;
+			NEED(sch, "type is only for schedulers");
+			NEED1("type needs a string");
+			l = strlen(av[0]);
+			if (l == 0 || l > 15)
+				errx(1, "type %s too long\n", av[0]);
+			strcpy(sch->name, av[0]);
+			sch->oid.subtype = 0; /* use string */
 			ac--; av++;
 			break;
+		    }
 
 		case TOK_WEIGHT:
-			if (co.do_pipe == 1)
-				errx(EX_DATAERR,"weight only valid for queues");
-			NEED1("weight needs argument 0..100\n");
-			p.fs.weight = strtoul(av[0], &end, 0);
+			NEED(fs, "weight is only for flowsets");
+			NEED1("weight needs argument\n");

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-all mailing list