svn commit: r205173 - in head: sbin/ipfw sys/netinet sys/netinet/ipfw

Luigi Rizzo luigi at FreeBSD.org
Mon Mar 15 17:14:27 UTC 2010


Author: luigi
Date: Mon Mar 15 17:14:27 2010
New Revision: 205173
URL: http://svn.freebsd.org/changeset/base/205173

Log:
  + implement (two lines) the kernel side of 'lookup dscp N' to use the
    dscp as a search key in table lookups;
  
  + (re)implement a sysctl variable to control the expire frequency of
    pipes and queues when they become empty;
  
  + add 'queue number' as optional part of the flow_id. This can be
    enabled with the command
  
          queue X config mask queue ...
  
    and makes it possible to support priority-based schedulers, where
    packets should be grouped according to the priority and not some
    fields in the 5-tuple.
    This is implemented as follows:
    - redefine a field in the ipfw_flow_id (in sys/netinet/ip_fw.h) but
      without changing the size or shape of the structure, so there are
      no ABI changes. On passing, also document how other fields are
      used, and remove some useless assignments in ip_fw2.c
  
    - implement small changes in the userland code to set/read the field;
  
    - revise the functions in ip_dummynet.c to manipulate masks so they
      also handle the additional field;
  
  There are no ABI changes in this commit.

Modified:
  head/sbin/ipfw/dummynet.c
  head/sys/netinet/ip_fw.h
  head/sys/netinet/ipfw/ip_dn_io.c
  head/sys/netinet/ipfw/ip_dn_private.h
  head/sys/netinet/ipfw/ip_dummynet.c
  head/sys/netinet/ipfw/ip_fw2.c
  head/sys/netinet/ipfw/ip_fw_dynamic.c
  head/sys/netinet/ipfw/ip_fw_log.c

Modified: head/sbin/ipfw/dummynet.c
==============================================================================
--- head/sbin/ipfw/dummynet.c	Mon Mar 15 16:53:09 2010	(r205172)
+++ head/sbin/ipfw/dummynet.c	Mon Mar 15 17:14:27 2010	(r205173)
@@ -141,7 +141,8 @@ print_mask(struct ipfw_flow_id *id)
 {
 	if (!IS_IP6_FLOW_ID(id)) {
 		printf("    "
-		    "mask: 0x%02x 0x%08x/0x%04x -> 0x%08x/0x%04x\n",
+		    "mask: %s 0x%02x 0x%08x/0x%04x -> 0x%08x/0x%04x\n",
+		    id->extra ? "queue," : "",
 		    id->proto,
 		    id->src_ip, id->src_port,
 		    id->dst_ip, id->dst_port);
@@ -151,7 +152,8 @@ print_mask(struct ipfw_flow_id *id)
 		    "Tot_pkt/bytes Pkt/Byte Drp\n");
 	} else {
 		char buf[255];
-		printf("\n        mask: proto: 0x%02x, flow_id: 0x%08x,  ",
+		printf("\n        mask: %sproto: 0x%02x, flow_id: 0x%08x,  ",
+		    id->extra ? "queue," : "",
 		    id->proto, id->flow_id6);
 		inet_ntop(AF_INET6, &(id->src_ip6), buf, sizeof(buf));
 		printf("%s/0x%04x -> ", buf, id->src_port);
@@ -175,7 +177,8 @@ list_flow(struct dn_flow *ni)
 
 	pe = getprotobynumber(id->proto);
 		/* XXX: Should check for IPv4 flows */
-	printf("%3u ", (ni->oid.id) & 0xff);
+	printf("%3u%c", (ni->oid.id) & 0xff,
+		id->extra ? '*' : ' ');
 	if (!IS_IP6_FLOW_ID(id)) {
 		if (pe)
 			printf("%-4s ", pe->p_name);
@@ -910,6 +913,7 @@ ipfw_config_pipe(int ac, char **av)
 			    case TOK_ALL:
 				    /*
 				     * special case, all bits significant
+				     * except 'extra' (the queue number)
 				     */
 				    mask->dst_ip = ~0;
 				    mask->src_ip = ~0;
@@ -922,6 +926,11 @@ ipfw_config_pipe(int ac, char **av)
 				    *flags |= DN_HAVE_MASK;
 				    goto end_mask;
 
+			    case TOK_QUEUE:
+				    mask->extra = ~0;
+				    *flags |= DN_HAVE_MASK;
+				    goto end_mask;
+
 			    case TOK_DSTIP:
 				    mask->addr_type = 4;
 				    p32 = &mask->dst_ip;
@@ -992,7 +1001,7 @@ ipfw_config_pipe(int ac, char **av)
 				    if (a > 0xFF)
 					    errx(EX_DATAERR,
 						"proto mask must be 8 bit");
-				    fs->flow_mask.proto = (uint8_t)a;
+				    mask->proto = (uint8_t)a;
 			    }
 			    if (a != 0)
 				    *flags |= DN_HAVE_MASK;

Modified: head/sys/netinet/ip_fw.h
==============================================================================
--- head/sys/netinet/ip_fw.h	Mon Mar 15 16:53:09 2010	(r205172)
+++ head/sys/netinet/ip_fw.h	Mon Mar 15 17:14:27 2010	(r205173)
@@ -487,24 +487,27 @@ struct ip_fw {
 #define RULESIZE(rule)  (sizeof(struct ip_fw) + \
 	((struct ip_fw *)(rule))->cmd_len * 4 - 4)
 
-#if 1 // moved to in.h
+#if 1 // should be moved to in.h
 /*
  * This structure is used as a flow mask and a flow id for various
  * parts of the code.
+ * addr_type is used in userland and kernel to mark the address type.
+ * fib is used in the kernel to record the fib in use.
+ * _flags is used in the kernel to store tcp flags for dynamic rules.
  */
 struct ipfw_flow_id {
 	uint32_t	dst_ip;
 	uint32_t	src_ip;
 	uint16_t	dst_port;
 	uint16_t	src_port;
-	uint8_t	fib;
-	uint8_t	proto;
-	uint8_t	flags;	/* protocol-specific flags */
-	uint8_t		addr_type; /* 4 = ipv4, 6 = ipv6, 1=ether ? */
+	uint8_t		fib;
+	uint8_t		proto;
+	uint8_t		_flags;	/* protocol-specific flags */
+	uint8_t		addr_type; /* 4=ip4, 6=ip6, 1=ether ? */
 	struct in6_addr dst_ip6;
 	struct in6_addr src_ip6;
 	uint32_t	flow_id6;
-	uint32_t	frag_id6;
+	uint32_t	extra; /* queue/pipe or frag_id */
 };
 #endif
 

Modified: head/sys/netinet/ipfw/ip_dn_io.c
==============================================================================
--- head/sys/netinet/ipfw/ip_dn_io.c	Mon Mar 15 16:53:09 2010	(r205172)
+++ head/sys/netinet/ipfw/ip_dn_io.c	Mon Mar 15 17:14:27 2010	(r205173)
@@ -113,6 +113,10 @@ SYSCTL_INT(_net_inet_ip_dummynet, OID_AU
     CTLFLAG_RW, &dn_cfg.io_fast, 0, "Enable fast dummynet io.");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, debug,
     CTLFLAG_RW, &dn_cfg.debug, 0, "Dummynet debug level");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire,
+    CTLFLAG_RW, &dn_cfg.expire, 0, "Expire empty queues/pipes");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire_cycle,
+    CTLFLAG_RD, &dn_cfg.expire_cycle, 0, "Expire cycle for queues/pipes");
 
 /* RED parameters */
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth,
@@ -546,8 +550,11 @@ dummynet_task(void *context, int pending
 			transmit_event(&q, (struct delay_line *)p, dn_cfg.curr_time);
 		}
 	}
-	dn_drain_scheduler();
-	dn_drain_queue();
+	if (dn_cfg.expire && ++dn_cfg.expire_cycle >= dn_cfg.expire) {
+		dn_cfg.expire_cycle = 0;
+		dn_drain_scheduler();
+		dn_drain_queue();
+	}
 
 	DN_BH_WUNLOCK();
 	dn_reschedule();

Modified: head/sys/netinet/ipfw/ip_dn_private.h
==============================================================================
--- head/sys/netinet/ipfw/ip_dn_private.h	Mon Mar 15 16:53:09 2010	(r205172)
+++ head/sys/netinet/ipfw/ip_dn_private.h	Mon Mar 15 17:14:27 2010	(r205173)
@@ -141,10 +141,14 @@ struct dn_parms {
 	struct dn_alg_head	schedlist;	/* list of algorithms */
 
 	/* Store the fs/sch to scan when draining. The value is the
-	 * bucket number of the hash table 
+	 * bucket number of the hash table. Expire can be disabled
+	 * with net.inet.ip.dummynet.expire=0, or it happens every
+	 * expire ticks.
 	 **/
 	int drain_fs;
 	int drain_sch;
+	uint32_t expire;
+	uint32_t expire_cycle;	/* tick count */
 	
 	/* if the upper half is busy doing something long,
 	 * can set the busy flag and we will enqueue packets in

Modified: head/sys/netinet/ipfw/ip_dummynet.c
==============================================================================
--- head/sys/netinet/ipfw/ip_dummynet.c	Mon Mar 15 16:53:09 2010	(r205172)
+++ head/sys/netinet/ipfw/ip_dummynet.c	Mon Mar 15 17:14:27 2010	(r205173)
@@ -122,6 +122,12 @@ ipdn_bound_var(int *v, int dflt, int lo,
 }
 
 /*---- flow_id mask, hash and compare functions ---*/
+/*
+ * The flow_id includes the 5-tuple, the queue/pipe number
+ * which we store in the extra area in host order,
+ * and for ipv6 also the flow_id6.
+ * XXX see if we want the tos byte (can store in 'flags')
+ */
 static struct ipfw_flow_id *
 flow_id_mask(struct ipfw_flow_id *mask, struct ipfw_flow_id *id)
 {
@@ -130,7 +136,7 @@ flow_id_mask(struct ipfw_flow_id *mask, 
 	id->dst_port &= mask->dst_port;
 	id->src_port &= mask->src_port;
 	id->proto &= mask->proto;
-	id->flags = 0; /* we don't care about this one */
+	id->extra &= mask->extra;
 	if (is_v6) {
 		APPLY_MASK(&id->dst_ip6, &mask->dst_ip6);
 		APPLY_MASK(&id->src_ip6, &mask->src_ip6);
@@ -151,7 +157,7 @@ flow_id_or(struct ipfw_flow_id *src, str
 	dst->dst_port |= src->dst_port;
 	dst->src_port |= src->src_port;
 	dst->proto |= src->proto;
-	dst->flags = 0; /* we don't care about this one */
+	dst->extra |= src->extra;
 	if (is_v6) {
 #define OR_MASK(_d, _s)                          \
     (_d)->__u6_addr.__u6_addr32[0] |= (_s)->__u6_addr.__u6_addr32[0]; \
@@ -172,7 +178,7 @@ flow_id_or(struct ipfw_flow_id *src, str
 static int
 nonzero_mask(struct ipfw_flow_id *m)
 {
-	if (m->dst_port || m->src_port || m->proto)
+	if (m->dst_port || m->src_port || m->proto || m->extra)
 		return 1;
 	if (IS_IP6_FLOW_ID(m)) {
 		return
@@ -208,10 +214,12 @@ flow_id_hash(struct ipfw_flow_id *id)
             (s[0] << 16) ^ (s[1] << 16) ^
             (s[2] << 16) ^ (s[3] << 16) ^
             (id->dst_port << 1) ^ (id->src_port) ^
+	    (id->extra) ^
             (id->proto ) ^ (id->flow_id6);
     } else {
         i = (id->dst_ip)        ^ (id->dst_ip >> 15) ^
             (id->src_ip << 1)   ^ (id->src_ip >> 16) ^
+	    (id->extra) ^
             (id->dst_port << 1) ^ (id->src_port)     ^ (id->proto);
     }
     return i;
@@ -223,29 +231,26 @@ flow_id_cmp(struct ipfw_flow_id *id1, st
 {
 	int is_v6 = IS_IP6_FLOW_ID(id1);
 
-	if (is_v6 != IS_IP6_FLOW_ID(id2))
-		return 1; /* a ipv4 and a ipv6 flow */
-
-	if (!is_v6 && id1->dst_ip == id2->dst_ip &&
-	    id1->src_ip == id2->src_ip &&
-	    id1->dst_port == id2->dst_port &&
-	    id1->src_port == id2->src_port &&
-	    id1->proto == id2->proto &&
-	    id1->flags == id2->flags)
-		return 0;
-	    
-	if (is_v6 &&
+	if (!is_v6) {
+	    if (IS_IP6_FLOW_ID(id2))
+		return 1; /* different address families */
+
+	    return (id1->dst_ip == id2->dst_ip &&
+		    id1->src_ip == id2->src_ip &&
+		    id1->dst_port == id2->dst_port &&
+		    id1->src_port == id2->src_port &&
+		    id1->proto == id2->proto &&
+		    id1->extra == id2->extra) ? 0 : 1;
+	}
+	/* the ipv6 case */
+	return (
 	    !bcmp(&id1->dst_ip6,&id2->dst_ip6, sizeof(id1->dst_ip6)) &&
 	    !bcmp(&id1->src_ip6,&id2->src_ip6, sizeof(id1->src_ip6)) &&
 	    id1->dst_port == id2->dst_port &&
 	    id1->src_port == id2->src_port &&
 	    id1->proto == id2->proto &&
-	    id1->flags == id2->flags &&
-	    id1->flow_id6 == id2->flow_id6)
-		return 0;
-     
-	/* Masks differ */
-	return 1;
+	    id1->extra == id2->extra &&
+	    id1->flow_id6 == id2->flow_id6) ? 0 : 1;
 }
 /*--------- end of flow-id mask, hash and compare ---------*/
 
@@ -2111,10 +2116,13 @@ ip_dn_init(void)
 	if (bootverbose)
 		printf("DUMMYNET with IPv6 initialized (100131)\n");
 
-	/* init defaults here, MSVC does not accept initializers */
+	/* Set defaults here. MSVC does not accept initializers,
+	 * and this is also useful for vimages
+	 */
 	/* queue limits */
 	dn_cfg.slot_limit = 100; /* Foot shooting limit for queues. */
 	dn_cfg.byte_limit = 1024 * 1024;
+	dn_cfg.expire = 1;
 
 	/* RED parameters */
 	dn_cfg.red_lookup_depth = 256;	/* default lookup table depth */

Modified: head/sys/netinet/ipfw/ip_fw2.c
==============================================================================
--- head/sys/netinet/ipfw/ip_fw2.c	Mon Mar 15 16:53:09 2010	(r205172)
+++ head/sys/netinet/ipfw/ip_fw2.c	Mon Mar 15 17:14:27 2010	(r205173)
@@ -886,10 +886,13 @@ ipfw_chk(struct ip_fw_args *args)
 	 * ulp is NULL if not found.
 	 */
 	void *ulp = NULL;		/* upper layer protocol pointer. */
+
 	/* XXX ipv6 variables */
 	int is_ipv6 = 0;
-	u_int16_t ext_hd = 0;	/* bits vector for extension header filtering */
+	uint8_t	icmp6_type = 0;
+	uint16_t ext_hd = 0;	/* bits vector for extension header filtering */
 	/* end of ipv6 variables */
+
 	int is_ipv4 = 0;
 
 	int done = 0;		/* flag to exit the outer loop */
@@ -941,14 +944,15 @@ do {								\
 			switch (proto) {
 			case IPPROTO_ICMPV6:
 				PULLUP_TO(hlen, ulp, struct icmp6_hdr);
-				args->f_id.flags = ICMP6(ulp)->icmp6_type;
+				icmp6_type = ICMP6(ulp)->icmp6_type;
 				break;
 
 			case IPPROTO_TCP:
 				PULLUP_TO(hlen, ulp, struct tcphdr);
 				dst_port = TCP(ulp)->th_dport;
 				src_port = TCP(ulp)->th_sport;
-				args->f_id.flags = TCP(ulp)->th_flags;
+				/* save flags for dynamic rules */
+				args->f_id._flags = TCP(ulp)->th_flags;
 				break;
 
 			case IPPROTO_SCTP:
@@ -1012,7 +1016,7 @@ do {								\
 					    return (IP_FW_DENY);
 					break;
 				}
-				args->f_id.frag_id6 =
+				args->f_id.extra =
 				    ntohl(((struct ip6_frag *)ulp)->ip6f_ident);
 				ulp = NULL;
 				break;
@@ -1115,7 +1119,8 @@ do {								\
 				PULLUP_TO(hlen, ulp, struct tcphdr);
 				dst_port = TCP(ulp)->th_dport;
 				src_port = TCP(ulp)->th_sport;
-				args->f_id.flags = TCP(ulp)->th_flags;
+				/* save flags for dynamic rules */
+				args->f_id._flags = TCP(ulp)->th_flags;
 				break;
 
 			case IPPROTO_UDP:
@@ -1126,7 +1131,7 @@ do {								\
 
 			case IPPROTO_ICMP:
 				PULLUP_TO(hlen, ulp, struct icmphdr);
-				args->f_id.flags = ICMP(ulp)->icmp_type;
+				//args->f_id.flags = ICMP(ulp)->icmp_type;
 				break;
 
 			default:
@@ -1362,6 +1367,8 @@ do {								\
 					    key = dst_ip.s_addr;
 					else if (v == 1)
 					    key = src_ip.s_addr;
+					else if (v == 6) /* dscp */
+					    key = (ip->ip_tos >> 2) & 0x3f;
 					else if (offset != 0)
 					    break;
 					else if (proto != IPPROTO_TCP &&
@@ -2034,7 +2041,7 @@ do {								\
 				if (hlen > 0 && is_ipv6 &&
 				    ((offset & IP6F_OFF_MASK) == 0) &&
 				    (proto != IPPROTO_ICMPV6 ||
-				     (is_icmp6_query(args->f_id.flags) == 1)) &&
+				     (is_icmp6_query(icmp6_type) == 1)) &&
 				    !(m->m_flags & (M_BCAST|M_MCAST)) &&
 				    !IN6_IS_ADDR_MULTICAST(&args->f_id.dst_ip6)) {
 					send_reject6(

Modified: head/sys/netinet/ipfw/ip_fw_dynamic.c
==============================================================================
--- head/sys/netinet/ipfw/ip_fw_dynamic.c	Mon Mar 15 16:53:09 2010	(r205172)
+++ head/sys/netinet/ipfw/ip_fw_dynamic.c	Mon Mar 15 17:14:27 2010	(r205173)
@@ -476,7 +476,7 @@ next:
 		V_ipfw_dyn_v[i] = q;
 	}
 	if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */
-		u_char flags = pkt->flags & (TH_FIN|TH_SYN|TH_RST);
+		u_char flags = pkt->_flags & (TH_FIN|TH_SYN|TH_RST);
 
 #define BOTH_SYN	(TH_SYN | (TH_SYN << 8))
 #define BOTH_FIN	(TH_FIN | (TH_FIN << 8))

Modified: head/sys/netinet/ipfw/ip_fw_log.c
==============================================================================
--- head/sys/netinet/ipfw/ip_fw_log.c	Mon Mar 15 16:53:09 2010	(r205172)
+++ head/sys/netinet/ipfw/ip_fw_log.c	Mon Mar 15 17:14:27 2010	(r205173)
@@ -395,7 +395,7 @@ ipfw_log(struct ip_fw *f, u_int hlen, st
 			if (offset & (IP6F_OFF_MASK | IP6F_MORE_FRAG))
 				snprintf(SNPARGS(fragment, 0),
 				    " (frag %08x:%d@%d%s)",
-				    args->f_id.frag_id6,
+				    args->f_id.extra,
 				    ntohs(ip6->ip6_plen) - hlen,
 				    ntohs(offset & IP6F_OFF_MASK) << 3,
 				    (offset & IP6F_MORE_FRAG) ? "+" : "");


More information about the svn-src-head mailing list