ECN marking implenetation for dummynet

Midori Kato katoon at sfc.wide.ad.jp
Wed Apr 2 11:48:20 UTC 2014


Hi FreeBSD developers,

I'm Midori Kato. I was working with Lars Eggert about DCTCP.
I would like to share our patch for an ECN marking mechanism on
dummynet, which I used for DCTCP testing.

My implementation allows to set ECN with RED as an AQM scheme. The
following command is an example:
$ ipfw pipe 9999 config red 1/10/10/0.0 ecn

Our implementation includes both DCTCP and RFC 3168 ECN marking methodology.

If you are interested in our ECN implemention, I'm very happy to receive
your review! (I have already submitted my patch to Luigi and hope he
will merge ours in near future.)

Regards,
-- Midori
-------------- next part --------------
diff --git a/sbin/ipfw/dummynet.c b/sbin/ipfw/dummynet.c
index 28dc2c7..cb62853 100644
--- a/sbin/ipfw/dummynet.c
+++ b/sbin/ipfw/dummynet.c
@@ -56,6 +56,7 @@ static struct _s_x dummynet_params[] = {
 	{ "sched_mask",		TOK_SCHED_MASK },
 	{ "flow_mask",		TOK_FLOW_MASK },
 	{ "droptail",		TOK_DROPTAIL },
+	{ "ecn",		TOK_ECN },
 	{ "red",		TOK_RED },
 	{ "gred",		TOK_GRED },
 	{ "bw",			TOK_BW },
@@ -239,7 +240,7 @@ print_flowset_parms(struct dn_fs *fs, char *prefix)
 	else
 		plr[0] = '\0';
 
-	if (fs->flags & DN_IS_RED)	/* RED parameters */
+	if (fs->flags & DN_IS_RED) {	/* RED parameters */
 		sprintf(red,
 		    "\n\t %cRED w_q %f min_th %d max_th %d max_p %f",
 		    (fs->flags & DN_IS_GENTLE_RED) ? 'G' : ' ',
@@ -247,7 +248,9 @@ print_flowset_parms(struct dn_fs *fs, char *prefix)
 		    fs->min_th,
 		    fs->max_th,
 		    1.0 * fs->max_p / (double)(1 << SCALE_RED));
-	else
+		if (fs->flags & DN_IS_ECN)
+			strncat(red, " (ecn)", 6);
+	} else
 		sprintf(red, "droptail");
 
 	if (prefix[0]) {
@@ -1046,13 +1049,17 @@ end_mask:
 			}
 			if ((end = strsep(&av[0], "/"))) {
 			    double max_p = strtod(end, NULL);
-			    if (max_p > 1 || max_p <= 0)
-				errx(EX_DATAERR, "0 < max_p <= 1");
+			    if (max_p > 1 || max_p < 0)
+				errx(EX_DATAERR, "0 <= max_p <= 1");
 			    fs->max_p = (int)(max_p * (1 << SCALE_RED));
 			}
 			ac--; av++;
 			break;
 
+		case TOK_ECN:
+			fs->flags |= DN_IS_ECN;
+			break;
+
 		case TOK_DROPTAIL:
 			NEED(fs, "droptail is only for flowsets");
 			fs->flags &= ~(DN_IS_RED|DN_IS_GENTLE_RED);
@@ -1175,13 +1182,20 @@ end_mask:
 			errx(EX_DATAERR, "2 <= queue size <= %ld", limit);
 	    }
 
+	    if ((fs->flags & DN_IS_ECN) && !(fs->flags & DN_IS_RED))
+		errx(EX_USAGE, "enable red/gred for ECN");
+
 	    if (fs->flags & DN_IS_RED) {
 		size_t len;
 		int lookup_depth, avg_pkt_size;
 
-		if (fs->min_th >= fs->max_th)
+		if (!(fs->flags & DN_IS_ECN) && (fs->min_th >= fs->max_th))
 		    errx(EX_DATAERR, "min_th %d must be < than max_th %d",
 			fs->min_th, fs->max_th);
+		else if ((fs->flags & DN_IS_ECN) && (fs->min_th > fs->max_th))
+		    errx(EX_DATAERR, "min_th %d must be =< than max_th %d",
+			fs->min_th, fs->max_th);
+
 		if (fs->max_th == 0)
 		    errx(EX_DATAERR, "max_th must be > 0");
 
diff --git a/sbin/ipfw/ipfw2.h b/sbin/ipfw/ipfw2.h
index d592930..b50361f 100644
--- a/sbin/ipfw/ipfw2.h
+++ b/sbin/ipfw/ipfw2.h
@@ -165,6 +165,7 @@ enum tokens {
 	TOK_BURST,
 	TOK_RED,
 	TOK_GRED,
+	TOK_ECN,
 	TOK_DROPTAIL,
 	TOK_PROTO,
 	/* dummynet tokens */
diff --git a/sys/netinet/ip_dummynet.h b/sys/netinet/ip_dummynet.h
index 1c09197..0b37e71 100644
--- a/sys/netinet/ip_dummynet.h
+++ b/sys/netinet/ip_dummynet.h
@@ -102,6 +102,7 @@ enum {	/* user flags */
 	DN_QHT_HASH	= 0x0004,	/* qht is a hash table */
 	DN_QSIZE_BYTES	= 0x0008,	/* queue size is in bytes */
 	DN_HAS_PROFILE	= 0x0010,	/* a link has a profile */
+	DN_IS_ECN	= 0x0080,
 	DN_IS_RED	= 0x0020,
 	DN_IS_GENTLE_RED= 0x0040,
 	DN_PIPE_CMD	= 0x1000,	/* pipe config... */
diff --git a/sys/netpfil/ipfw/ip_dn_glue.c b/sys/netpfil/ipfw/ip_dn_glue.c
index 7d7e695..095758f 100644
--- a/sys/netpfil/ipfw/ip_dn_glue.c
+++ b/sys/netpfil/ipfw/ip_dn_glue.c
@@ -83,6 +83,7 @@ struct dn_flow_set {
 #define DNOLD_QSIZE_IS_BYTES   0x0008  /* queue size is measured in bytes */
 #define DNOLD_NOERROR      0x0010  /* do not report ENOBUFS on drops  */
 #define DNOLD_HAS_PROFILE      0x0020  /* the pipe has a delay profile. */
+#define DNOLD_IS_ECN       0x0040
 #define DNOLD_IS_PIPE      0x4000
 #define DNOLD_IS_QUEUE     0x8000
 
@@ -338,6 +339,8 @@ convertflags2new(int src)
 		dst |= DN_IS_RED;
 	if (src & DNOLD_IS_GENTLE_RED)
 		dst |= DN_IS_GENTLE_RED;
+	if (src & DNOLD_IS_ECN)
+		dst |= DN_IS_ECN;
 	if (src & DNOLD_HAS_PROFILE)
 		dst |= DN_HAS_PROFILE;
 
diff --git a/sys/netpfil/ipfw/ip_dn_io.c b/sys/netpfil/ipfw/ip_dn_io.c
index 9a4b486..446253b 100644
--- a/sys/netpfil/ipfw/ip_dn_io.c
+++ b/sys/netpfil/ipfw/ip_dn_io.c
@@ -302,6 +302,7 @@ red_drops (struct dn_queue *q, int len)
 	 */
 
 	struct dn_fsk *fs = q->fs;
+	struct dn_fs *f = &(q->fs->fs);
 	int64_t p_b = 0;
 
 	/* Queue in bytes or packets? */
@@ -337,6 +338,8 @@ red_drops (struct dn_queue *q, int len)
 		return (0);	/* accept packet */
 	}
 	if (q->avg >= fs->max_th) {	/* average queue >=  max threshold */
+		if (fs->fs.flags & DN_IS_ECN)
+			return (1);
 		if (fs->fs.flags & DN_IS_GENTLE_RED) {
 			/*
 			 * According to Gentle-RED, if avg is greater than
@@ -352,6 +355,8 @@ red_drops (struct dn_queue *q, int len)
 			return (1);
 		}
 	} else if (q->avg > fs->min_th) {
+		if (fs->fs.flags & DN_IS_ECN)
+			return (1);
 		/*
 		 * We compute p_b using the linear dropping function
 		 *	 p_b = c_1 * avg - c_2
@@ -384,6 +389,71 @@ red_drops (struct dn_queue *q, int len)
 }
 
 /*
+ * ECN Processing
+ * The part of this function is adopted from altq
+ */
+static int
+ecn_mark(struct mbuf* m)
+{
+	struct ip *ip;
+	ip = mtod(m, struct ip *);
+
+	switch (ip->ip_v) {
+	case IPVERSION:
+	{
+		u_int8_t otos;
+		int sum;
+
+		if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
+			return (0);	/* not-ECT */
+		if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
+			return (1);	/* already marked */
+
+		/*
+		 * ecn-capable but not marked,
+		 * mark CE and update checksum
+		 */
+		otos = ip->ip_tos;
+		ip->ip_tos |= IPTOS_ECN_CE;
+		/*
+		 * update checksum (from RFC1624)
+		 *	   HC' = ~(~HC + ~m + m')
+		 */
+		sum = ~ntohs(ip->ip_sum) & 0xffff;
+		sum += (~otos & 0xffff) + ip->ip_tos;
+		sum = (sum >> 16) + (sum & 0xffff);
+		sum += (sum >> 16);  /* add carry */
+		ip->ip_sum = htons(~sum & 0xffff);
+		return (1);
+	}
+#ifdef INET6
+	case (IPV6_VERSION >> 4):
+	{
+		struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
+		u_int32_t flowlabel;
+
+		flowlabel = ntohl(ip6->ip6_flow);
+		if ((flowlabel >> 28) != 6)
+			return (0);	/* version mismatch! */
+		if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
+		    (IPTOS_ECN_NOTECT << 20))
+			return (0);	/* not-ECT */
+		if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
+		    (IPTOS_ECN_CE << 20))
+			return (1);	/* already marked */
+		/*
+		 * ecn-capable but not marked, mark CE
+		 */
+		flowlabel |= (IPTOS_ECN_CE << 20);
+		ip6->ip6_flow = htonl(flowlabel);
+		return (1);
+	}
+#endif
+	}
+	return (0);
+}
+
+/*
  * Enqueue a packet in q, subject to space and queue management policy
  * (whose parameters are in q->fs).
  * Update stats for the queue and the scheduler.
@@ -414,8 +484,13 @@ dn_enqueue(struct dn_queue *q, struct mbuf* m, int drop)
 		goto drop;
 	if (f->plr && random() < f->plr)
 		goto drop;
-	if (f->flags & DN_IS_RED && red_drops(q, m->m_pkthdr.len))
-		goto drop;
+	if (f->flags & DN_IS_RED && red_drops(q, m->m_pkthdr.len)) {
+		if (f->flags & DN_IS_ECN) {
+			if (!ecn_mark(m))
+				goto drop;
+		} else
+			goto drop;
+	}
 	if (f->flags & DN_QSIZE_BYTES) {
 		if (q->ni.len_bytes > f->qsize)
 			goto drop;
diff --git a/sys/netpfil/ipfw/ip_dummynet.c b/sys/netpfil/ipfw/ip_dummynet.c
index 4de2156..accbb07 100644
--- a/sys/netpfil/ipfw/ip_dummynet.c
+++ b/sys/netpfil/ipfw/ip_dummynet.c
@@ -1070,7 +1070,7 @@ config_red(struct dn_fsk *fs)
 	fs->min_th = SCALE(fs->fs.min_th);
 	fs->max_th = SCALE(fs->fs.max_th);
 
-	fs->c_1 = fs->max_p / (fs->fs.max_th - fs->fs.min_th);
+	fs->c_1 = fs->max_p / max(fs->fs.max_th - fs->fs.min_th, 1);
 	fs->c_2 = SCALE_MUL(fs->c_1, SCALE(fs->fs.min_th));
 
 	if (fs->fs.flags & DN_IS_GENTLE_RED) {


More information about the freebsd-net mailing list