PERFORCE change 140645 for review

John Birrell jb at FreeBSD.org
Fri Apr 25 23:08:10 UTC 2008


http://perforce.freebsd.org/chv.cgi?CH=140645

Change 140645 by jb at freebsd3 on 2008/04/25 23:07:20

	IF6

Affected files ...

.. //depot/projects/dtrace6/src/sbin/ipfw/ipfw.8#4 integrate
.. //depot/projects/dtrace6/src/sbin/ipfw/ipfw2.c#4 integrate
.. //depot/projects/dtrace6/src/sys/net/if_bridge.c#4 integrate
.. //depot/projects/dtrace6/src/sys/net/if_ethersubr.c#2 integrate
.. //depot/projects/dtrace6/src/sys/netinet/ip_dummynet.c#2 integrate
.. //depot/projects/dtrace6/src/sys/netinet/ip_dummynet.h#2 integrate
.. //depot/projects/dtrace6/src/sys/netinet/ip_fw_pfil.c#2 integrate

Differences ...

==== //depot/projects/dtrace6/src/sbin/ipfw/ipfw.8#4 (text+ko) ====

@@ -1,5 +1,5 @@
 .\"
-.\" $FreeBSD: src/sbin/ipfw/ipfw.8,v 1.175.2.14 2008/04/04 18:10:50 julian Exp $
+.\" $FreeBSD: src/sbin/ipfw/ipfw.8,v 1.175.2.15 2008/04/25 10:29:26 oleg Exp $
 .\"
 .Dd May 4, 2007
 .Dt IPFW 8
@@ -1750,6 +1750,16 @@
 TCP connection, or from/to a given host, or entire subnet, or a
 protocol type, etc.
 .Pp
+There are two modes of dummynet operation: normal and fast.
+Normal mode tries to emulate real link: dummynet scheduler ensures packet will
+not leave pipe faster than it would be on real link with given bandwidth.
+Fast mode allows certain packets to bypass dummynet scheduler (if packet flow
+does not exceed pipe's bandwidth). Thus fast mode requires less cpu cycles
+per packet (in average) but packet latency can be significantly lower comparing
+to real link with same bandwidth. Default is normal mode, fast mode can be
+enabled by setting net.inet.ip.dummynet.io_fast sysctl(8) variable to non-zero
+value.
+.Pp
 Packets belonging to the same flow are then passed to either of two
 different objects, which implement the traffic regulation:
 .Bl -hang -offset XXXX
@@ -2062,6 +2072,14 @@
 This value is used when no
 .Cm buckets
 option is specified when configuring a pipe/queue.
+.It Em net.inet.ip.dummynet.io_fast : No 0
+If set to non-zero value enables "fast" mode of dummynet operation (see above).
+.It Em net.inet.ip.dummynet.io_pkt
+Number of packets passed to by dummynet.
+.It Em net.inet.ip.dummynet.io_pkt_drop
+Number of packets dropped by dummynet.
+.It Em net.inet.ip.dummynet.io_pkt_fast
+Number of packets bypassed dummynet scheduler.
 .It Em net.inet.ip.dummynet.max_chain_len : No 16
 Target value for the maximum number of pipes/queues in a hash bucket.
 The product

==== //depot/projects/dtrace6/src/sbin/ipfw/ipfw2.c#4 (text+ko) ====

@@ -17,7 +17,7 @@
  *
  * NEW command line interface for IP firewall facility
  *
- * $FreeBSD: src/sbin/ipfw/ipfw2.c,v 1.76.2.21 2008/04/04 18:10:50 julian Exp $
+ * $FreeBSD: src/sbin/ipfw/ipfw2.c,v 1.76.2.22 2008/04/25 10:35:53 oleg Exp $
  */
 
 #include <sys/param.h>
@@ -3541,7 +3541,7 @@
 		if (p.bandwidth==0) /* this is a WF2Q+ queue */
 			s = 0;
 		else
-			s = ck.hz * avg_pkt_size * 8 / p.bandwidth;
+			s = (double)ck.hz * avg_pkt_size * 8 / p.bandwidth;
 
 		/*
 		 * max idle time (in ticks) before avg queue size becomes 0.
@@ -3554,8 +3554,8 @@
 		if (!p.fs.lookup_step)
 			p.fs.lookup_step = 1;
 		weight = 1 - w_q;
-		for (t = p.fs.lookup_step; t > 0; --t)
-			weight *= weight;
+		for (t = p.fs.lookup_step; t > 1; --t)
+			weight *= 1 - w_q;
 		p.fs.lookup_weight = (int)(weight * (1 << SCALE_RED));
 	}
 	i = do_cmd(IP_DUMMYNET_CONFIGURE, &p, sizeof p);

==== //depot/projects/dtrace6/src/sys/net/if_bridge.c#4 (text+ko) ====

@@ -80,7 +80,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/net/if_bridge.c,v 1.11.2.54 2007/12/21 05:30:15 thompsa Exp $");
+__FBSDID("$FreeBSD: src/sys/net/if_bridge.c,v 1.11.2.55 2008/04/25 10:29:26 oleg Exp $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
@@ -2949,7 +2949,7 @@
 			 * packet will return to us via bridge_dummynet().
 			 */
 			args.oif = ifp;
-			ip_dn_io_ptr(*mp, DN_TO_IFB_FWD, &args);
+			ip_dn_io_ptr(mp, DN_TO_IFB_FWD, &args);
 			return (error);
 		}
 

==== //depot/projects/dtrace6/src/sys/net/if_ethersubr.c#2 (text+ko) ====

@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)if_ethersubr.c	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/net/if_ethersubr.c,v 1.193.2.15 2007/09/17 17:50:49 julian Exp $
+ * $FreeBSD: src/sys/net/if_ethersubr.c,v 1.193.2.16 2008/04/25 10:29:26 oleg Exp $
  */
 
 #include "opt_atalk.h"
@@ -497,7 +497,7 @@
 			 */
 			*m0 = NULL ;
 		}
-		ip_dn_io_ptr(m, dst ? DN_TO_ETH_OUT: DN_TO_ETH_DEMUX, &args);
+		ip_dn_io_ptr(&m, dst ? DN_TO_ETH_OUT: DN_TO_ETH_DEMUX, &args);
 		return 0;
 	}
 	/*

==== //depot/projects/dtrace6/src/sys/netinet/ip_dummynet.c#2 (text+ko) ====

@@ -24,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/netinet/ip_dummynet.c,v 1.93.2.6 2007/03/21 17:25:15 oleg Exp $
+ * $FreeBSD: src/sys/netinet/ip_dummynet.c,v 1.93.2.7 2008/04/25 10:29:26 oleg Exp $
  */
 
 #define	DUMMYNET_DEBUG
@@ -55,6 +55,7 @@
  * include files marked with XXX are probably not needed
  */
 
+#include <sys/limits.h>
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
@@ -110,6 +111,11 @@
 /* Adjusted vs non-adjusted curr_time difference (ticks). */
 static long tick_diff;
 
+static int		io_fast;
+static unsigned long	io_pkt;
+static unsigned long	io_pkt_fast;
+static unsigned long	io_pkt_drop;
+
 /*
  * Three heaps contain queues and pipes that the scheduler handles:
  *
@@ -181,6 +187,17 @@
 SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_lost,
     CTLFLAG_RD, &tick_lost, 0,
     "Number of ticks coalesced by dummynet taskqueue.");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, io_fast,
+    CTLFLAG_RW, &io_fast, 0, "Enable fast dummynet io.");
+SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt,
+    CTLFLAG_RD, &io_pkt, 0,
+    "Number of packets passed to dummynet.");
+SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_fast,
+    CTLFLAG_RD, &io_pkt_fast, 0,
+    "Number of packets bypassed dummynet scheduler.");
+SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_drop,
+    CTLFLAG_RD, &io_pkt_drop, 0,
+    "Number of packets dropped by dummynet.");
 #endif
 
 #ifdef DUMMYNET_DEBUG
@@ -209,15 +226,15 @@
 	NET_ASSERT_GIANT();					\
 } while (0)
 
-static int config_pipe(struct dn_pipe *p);
-static int ip_dn_ctl(struct sockopt *sopt);
+static int	config_pipe(struct dn_pipe *p);
+static int	ip_dn_ctl(struct sockopt *sopt);
 
-static void dummynet(void *);
-static void dummynet_flush(void);
-static void dummynet_send(struct mbuf *);
-void dummynet_drain(void);
+static void	dummynet(void *);
+static void	dummynet_flush(void);
+static void	dummynet_send(struct mbuf *);
+void		dummynet_drain(void);
 static ip_dn_io_t dummynet_io;
-static void dn_rule_delete(void *);
+static void	dn_rule_delete(void *);
 
 /*
  * Heap management functions.
@@ -486,7 +503,7 @@
 	if ((m = pipe->head) != NULL) {
 		pkt = dn_tag_get(m);
 		/*
-		 * XXX: Should check errors on heap_insert, by draining the
+		 * XXX Should check errors on heap_insert, by draining the
 		 * whole pipe p and hoping in the future we are more successful.
 		 */
 		heap_insert(&extract_heap, pkt->output_time, pipe);
@@ -499,8 +516,8 @@
  * either a pipe (WF2Q) or a flow_queue (per-flow queueing)
  */
 #define SET_TICKS(_m, q, p)	\
-    ((_m)->m_pkthdr.len*8*hz - (q)->numbytes + p->bandwidth - 1 ) / \
-	    p->bandwidth ;
+    ((_m)->m_pkthdr.len * 8 * hz - (q)->numbytes + p->bandwidth - 1) / \
+    p->bandwidth;
 
 /*
  * extract pkt from queue, compute output time (could be now)
@@ -536,59 +553,61 @@
 static void
 ready_event(struct dn_flow_queue *q, struct mbuf **head, struct mbuf **tail)
 {
-    struct mbuf *pkt;
-    struct dn_pipe *p = q->fs->pipe ;
-    int p_was_empty ;
+	struct mbuf *pkt;
+	struct dn_pipe *p = q->fs->pipe;
+	int p_was_empty;
+
+	DUMMYNET_LOCK_ASSERT();
+
+	if (p == NULL) {
+		printf("dummynet: ready_event- pipe is gone\n");
+		return;
+	}
+	p_was_empty = (p->head == NULL);
+
+	/*
+	 * Schedule fixed-rate queues linked to this pipe:
+	 * account for the bw accumulated since last scheduling, then
+	 * drain as many pkts as allowed by q->numbytes and move to
+	 * the delay line (in p) computing output time.
+	 * bandwidth==0 (no limit) means we can drain the whole queue,
+	 * setting len_scaled = 0 does the job.
+	 */
+	q->numbytes += (curr_time - q->sched_time) * p->bandwidth;
+	while ((pkt = q->head) != NULL) {
+		int len = pkt->m_pkthdr.len;
+		int len_scaled = p->bandwidth ? len * 8 * hz : 0;
 
-    DUMMYNET_LOCK_ASSERT();
+		if (len_scaled > q->numbytes)
+			break;
+		q->numbytes -= len_scaled;
+		move_pkt(pkt, q, p, len);
+	}
+	/*
+	 * If we have more packets queued, schedule next ready event
+	 * (can only occur when bandwidth != 0, otherwise we would have
+	 * flushed the whole queue in the previous loop).
+	 * To this purpose we record the current time and compute how many
+	 * ticks to go for the finish time of the packet.
+	 */
+	if ((pkt = q->head) != NULL) {	/* this implies bandwidth != 0 */
+		dn_key t = SET_TICKS(pkt, q, p); /* ticks i have to wait */
 
-    if (p == NULL) {
-	printf("dummynet: ready_event- pipe is gone\n");
-	return ;
-    }
-    p_was_empty = (p->head == NULL) ;
+		q->sched_time = curr_time;
+		heap_insert(&ready_heap, curr_time + t, (void *)q);
+		/*
+		 * XXX Should check errors on heap_insert, and drain the whole
+		 * queue on error hoping next time we are luckier.
+		 */
+	} else		/* RED needs to know when the queue becomes empty. */
+		q->q_time = curr_time;
 
-    /*
-     * schedule fixed-rate queues linked to this pipe:
-     * Account for the bw accumulated since last scheduling, then
-     * drain as many pkts as allowed by q->numbytes and move to
-     * the delay line (in p) computing output time.
-     * bandwidth==0 (no limit) means we can drain the whole queue,
-     * setting len_scaled = 0 does the job.
-     */
-    q->numbytes += ( curr_time - q->sched_time ) * p->bandwidth;
-    while ( (pkt = q->head) != NULL ) {
-	int len = pkt->m_pkthdr.len;
-	int len_scaled = p->bandwidth ? len*8*hz : 0 ;
-	if (len_scaled > q->numbytes )
-	    break ;
-	q->numbytes -= len_scaled ;
-	move_pkt(pkt, q, p, len);
-    }
-    /*
-     * If we have more packets queued, schedule next ready event
-     * (can only occur when bandwidth != 0, otherwise we would have
-     * flushed the whole queue in the previous loop).
-     * To this purpose we record the current time and compute how many
-     * ticks to go for the finish time of the packet.
-     */
-    if ( (pkt = q->head) != NULL ) { /* this implies bandwidth != 0 */
-	dn_key t = SET_TICKS(pkt, q, p); /* ticks i have to wait */
-	q->sched_time = curr_time ;
-	heap_insert(&ready_heap, curr_time + t, (void *)q );
-	/* XXX should check errors on heap_insert, and drain the whole
-	 * queue on error hoping next time we are luckier.
+	/*
+	 * If the delay line was empty call transmit_event() now.
+	 * Otherwise, the scheduler will take care of it.
 	 */
-    } else {	/* RED needs to know when the queue becomes empty */
-	q->q_time = curr_time;
-	q->numbytes = 0;
-    }
-    /*
-     * If the delay line was empty call transmit_event() now.
-     * Otherwise, the scheduler will take care of it.
-     */
-    if (p_was_empty)
-	transmit_event(p, head, tail);
+	if (p_was_empty)
+		transmit_event(p, head, tail);
 }
 
 /*
@@ -596,123 +615,147 @@
  * the queues at their start time, and enqueue into the delay line.
  * Packets are drained until p->numbytes < 0. As long as
  * len_scaled >= p->numbytes, the packet goes into the delay line
- * with a deadline p->delay. For the last packet, if p->numbytes<0,
+ * with a deadline p->delay. For the last packet, if p->numbytes < 0,
  * there is an additional delay.
  */
 static void
 ready_event_wfq(struct dn_pipe *p, struct mbuf **head, struct mbuf **tail)
 {
-    int p_was_empty = (p->head == NULL) ;
-    struct dn_heap *sch = &(p->scheduler_heap);
-    struct dn_heap *neh = &(p->not_eligible_heap) ;
+	int p_was_empty = (p->head == NULL);
+	struct dn_heap *sch = &(p->scheduler_heap);
+	struct dn_heap *neh = &(p->not_eligible_heap);
+	int64_t p_numbytes = p->numbytes;
 
-    DUMMYNET_LOCK_ASSERT();
+	DUMMYNET_LOCK_ASSERT();
 
-    if (p->if_name[0] == 0) /* tx clock is simulated */
-	p->numbytes += ( curr_time - p->sched_time ) * p->bandwidth;
-    else { /* tx clock is for real, the ifq must be empty or this is a NOP */
-	if (p->ifp && p->ifp->if_snd.ifq_head != NULL)
-	    return ;
-	else {
-	    DPRINTF(("dummynet: pipe %d ready from %s --\n",
-		p->pipe_nr, p->if_name));
+	if (p->if_name[0] == 0)		/* tx clock is simulated */
+		/*
+		 * Since result may not fit into p->numbytes (32bit) we
+		 * are using 64bit var here.
+		 */
+		p_numbytes += (curr_time - p->sched_time) * p->bandwidth;
+	else {	/*
+		 * tx clock is for real,
+		 * the ifq must be empty or this is a NOP.
+		 */
+		if (p->ifp && p->ifp->if_snd.ifq_head != NULL)
+			return;
+		else {
+			DPRINTF(("dummynet: pipe %d ready from %s --\n",
+			    p->pipe_nr, p->if_name));
+		}
 	}
-    }
+
+	/*
+	 * While we have backlogged traffic AND credit, we need to do
+	 * something on the queue.
+	 */
+	while (p_numbytes >= 0 && (sch->elements > 0 || neh->elements > 0)) {
+		if (sch->elements > 0) {
+			/* Have some eligible pkts to send out. */
+			struct dn_flow_queue *q = sch->p[0].object;
+			struct mbuf *pkt = q->head;
+			struct dn_flow_set *fs = q->fs;
+			uint64_t len = pkt->m_pkthdr.len;
+			int len_scaled = p->bandwidth ? len * 8 * hz : 0;
 
-    /*
-     * While we have backlogged traffic AND credit, we need to do
-     * something on the queue.
-     */
-    while ( p->numbytes >=0 && (sch->elements>0 || neh->elements >0) ) {
-	if (sch->elements > 0) { /* have some eligible pkts to send out */
-	    struct dn_flow_queue *q = sch->p[0].object ;
-	    struct mbuf *pkt = q->head;
-	    struct dn_flow_set *fs = q->fs;
-	    u_int64_t len = pkt->m_pkthdr.len;
-	    int len_scaled = p->bandwidth ? len*8*hz : 0 ;
+			heap_extract(sch, NULL); /* Remove queue from heap. */
+			p_numbytes -= len_scaled;
+			move_pkt(pkt, q, p, len);
 
-	    heap_extract(sch, NULL); /* remove queue from heap */
-	    p->numbytes -= len_scaled ;
-	    move_pkt(pkt, q, p, len);
+			p->V += (len << MY_M) / p->sum;	/* Update V. */
+			q->S = q->F;			/* Update start time. */
+			if (q->len == 0) {
+				/* Flow not backlogged any more. */
+				fs->backlogged--;
+				heap_insert(&(p->idle_heap), q->F, q);
+			} else {
+				/* Still backlogged. */
 
-	    p->V += (len<<MY_M) / p->sum ; /* update V */
-	    q->S = q->F ; /* update start time */
-	    if (q->len == 0) { /* Flow not backlogged any more */
-		fs->backlogged-- ;
-		heap_insert(&(p->idle_heap), q->F, q);
-	    } else { /* still backlogged */
+				/*
+				 * Update F and position in backlogged queue,
+				 * then put flow in not_eligible_heap
+				 * (we will fix this later).
+				 */
+				len = (q->head)->m_pkthdr.len;
+				q->F += (len << MY_M) / (uint64_t)fs->weight;
+				if (DN_KEY_LEQ(q->S, p->V))
+					heap_insert(neh, q->S, q);
+				else
+					heap_insert(sch, q->F, q);
+			}
+		}
 		/*
-		 * update F and position in backlogged queue, then
-		 * put flow in not_eligible_heap (we will fix this later).
+		 * Now compute V = max(V, min(S_i)). Remember that all elements
+		 * in sch have by definition S_i <= V so if sch is not empty,
+		 * V is surely the max and we must not update it. Conversely,
+		 * if sch is empty we only need to look at neh.
 		 */
-		len = (q->head)->m_pkthdr.len;
-		q->F += (len<<MY_M)/(u_int64_t) fs->weight ;
-		if (DN_KEY_LEQ(q->S, p->V))
-		    heap_insert(neh, q->S, q);
-		else
-		    heap_insert(sch, q->F, q);
-	    }
+		if (sch->elements == 0 && neh->elements > 0)
+			p->V = MAX64(p->V, neh->p[0].key);
+		/* Move from neh to sch any packets that have become eligible */
+		while (neh->elements > 0 && DN_KEY_LEQ(neh->p[0].key, p->V)) {
+			struct dn_flow_queue *q = neh->p[0].object;
+			heap_extract(neh, NULL);
+			heap_insert(sch, q->F, q);
+		}
+
+		if (p->if_name[0] != '\0') { /* Tx clock is from a real thing */
+			p_numbytes = -1;	/* Mark not ready for I/O. */
+			break;
+		}
 	}
-	/*
-	 * now compute V = max(V, min(S_i)). Remember that all elements in sch
-	 * have by definition S_i <= V so if sch is not empty, V is surely
-	 * the max and we must not update it. Conversely, if sch is empty
-	 * we only need to look at neh.
-	 */
-	if (sch->elements == 0 && neh->elements > 0)
-	    p->V = MAX64 ( p->V, neh->p[0].key );
-	/* move from neh to sch any packets that have become eligible */
-	while (neh->elements > 0 && DN_KEY_LEQ(neh->p[0].key, p->V) ) {
-	    struct dn_flow_queue *q = neh->p[0].object ;
-	    heap_extract(neh, NULL);
-	    heap_insert(sch, q->F, q);
-	}
+	if (sch->elements == 0 && neh->elements == 0 && p_numbytes >= 0 &&
+	    p->idle_heap.elements > 0) {
+		/*
+		 * No traffic and no events scheduled.
+		 * We can get rid of idle-heap.
+		 */
+		int i;
+
+		for (i = 0; i < p->idle_heap.elements; i++) {
+			struct dn_flow_queue *q = p->idle_heap.p[i].object;
 
-	if (p->if_name[0] != '\0') {/* tx clock is from a real thing */
-	    p->numbytes = -1 ; /* mark not ready for I/O */
-	    break ;
+			q->F = 0;
+			q->S = q->F + 1;
+		}
+		p->sum = 0;
+		p->V = 0;
+		p->idle_heap.elements = 0;
 	}
-    }
-    if (sch->elements == 0 && neh->elements == 0 && p->numbytes >= 0
-	    && p->idle_heap.elements > 0) {
 	/*
-	 * no traffic and no events scheduled. We can get rid of idle-heap.
+	 * If we are getting clocks from dummynet (not a real interface) and
+	 * If we are under credit, schedule the next ready event.
+	 * Also fix the delivery time of the last packet.
 	 */
-	int i ;
+	if (p->if_name[0]==0 && p_numbytes < 0) { /* This implies bw > 0. */
+		dn_key t = 0;		/* Number of ticks i have to wait. */
 
-	for (i = 0 ; i < p->idle_heap.elements ; i++) {
-	    struct dn_flow_queue *q = p->idle_heap.p[i].object ;
+		if (p->bandwidth > 0)
+			t = (p->bandwidth - 1 - p_numbytes) / p->bandwidth;
+		dn_tag_get(p->tail)->output_time += t;
+		p->sched_time = curr_time;
+		heap_insert(&wfq_ready_heap, curr_time + t, (void *)p);
+		/*
+		 * XXX Should check errors on heap_insert, and drain the whole
+		 * queue on error hoping next time we are luckier.
+		 */
+	}
 
-	    q->F = 0 ;
-	    q->S = q->F + 1 ;
-	}
-	p->sum = 0 ;
-	p->V = 0 ;
-	p->idle_heap.elements = 0 ;
-    }
-    /*
-     * If we are getting clocks from dummynet (not a real interface) and
-     * If we are under credit, schedule the next ready event.
-     * Also fix the delivery time of the last packet.
-     */
-    if (p->if_name[0]==0 && p->numbytes < 0) { /* this implies bandwidth >0 */
-	dn_key t=0 ; /* number of ticks i have to wait */
+	/* Fit (adjust if necessary) 64bit result into 32bit variable. */
+	if (p_numbytes > INT_MAX)
+		p->numbytes = INT_MAX;
+	else if (p_numbytes < INT_MIN)
+		p->numbytes = INT_MIN;
+	else
+		p->numbytes = p_numbytes;
 
-	if (p->bandwidth > 0)
-	    t = ( p->bandwidth -1 - p->numbytes) / p->bandwidth ;
-	dn_tag_get(p->tail)->output_time += t ;
-	p->sched_time = curr_time ;
-	heap_insert(&wfq_ready_heap, curr_time + t, (void *)p);
-	/* XXX should check errors on heap_insert, and drain the whole
-	 * queue on error hoping next time we are luckier.
+	/*
+	 * If the delay line was empty call transmit_event() now.
+	 * Otherwise, the scheduler will take care of it.
 	 */
-    }
-    /*
-     * If the delay line was empty call transmit_event() now.
-     * Otherwise, the scheduler will take care of it.
-     */
-    if (p_was_empty)
-	transmit_event(p, head, tail);
+	if (p_was_empty)
+		transmit_event(p, head, tail);
 }
 
 /*
@@ -956,29 +999,28 @@
 static struct dn_flow_queue *
 create_queue(struct dn_flow_set *fs, int i)
 {
-    struct dn_flow_queue *q ;
+	struct dn_flow_queue *q;
 
-    if (fs->rq_elements > fs->rq_size * dn_max_ratio &&
+	if (fs->rq_elements > fs->rq_size * dn_max_ratio &&
 	    expire_queues(fs) == 0) {
-	/*
-	 * No way to get room, use or create overflow queue.
-	 */
-	i = fs->rq_size ;
-	if ( fs->rq[i] != NULL )
-	    return fs->rq[i] ;
-    }
-    q = malloc(sizeof(*q), M_DUMMYNET, M_NOWAIT | M_ZERO);
-    if (q == NULL) {
-	printf("dummynet: sorry, cannot allocate queue for new flow\n");
-	return NULL ;
-    }
-    q->fs = fs ;
-    q->hash_slot = i ;
-    q->next = fs->rq[i] ;
-    q->S = q->F + 1;   /* hack - mark timestamp as invalid */
-    fs->rq[i] = q ;
-    fs->rq_elements++ ;
-    return q ;
+		/* No way to get room, use or create overflow queue. */
+		i = fs->rq_size;
+		if (fs->rq[i] != NULL)
+		    return fs->rq[i];
+	}
+	q = malloc(sizeof(*q), M_DUMMYNET, M_NOWAIT | M_ZERO);
+	if (q == NULL) {
+		printf("dummynet: sorry, cannot allocate queue for new flow\n");
+		return (NULL);
+	}
+	q->fs = fs;
+	q->hash_slot = i;
+	q->next = fs->rq[i];
+	q->S = q->F + 1;	/* hack - mark timestamp as invalid. */
+	q->numbytes = io_fast ? fs->pipe->bandwidth : 0;
+	fs->rq[i] = q;
+	fs->rq_elements++;
+	return (q);
 }
 
 /*
@@ -1233,185 +1275,201 @@
  *		NULL in ip_input, destination interface in ip_output,
  *		real_dst in bdg_forward
  * rule		matching rule, in case of multiple passes
- *
  */
 static int
-dummynet_io(struct mbuf *m, int dir, struct ip_fw_args *fwa)
+dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa)
 {
-    struct mbuf *head = NULL, *tail = NULL;
-    struct dn_pkt_tag *pkt;
-    struct m_tag *mtag;
-    struct dn_flow_set *fs = NULL;
-    struct dn_pipe *pipe ;
-    u_int64_t len = m->m_pkthdr.len ;
-    struct dn_flow_queue *q = NULL ;
-    int is_pipe;
-    ipfw_insn *cmd = ACTION_PTR(fwa->rule);
+	struct mbuf *m = *m0, *head = NULL, *tail = NULL;
+	struct dn_pkt_tag *pkt;
+	struct m_tag *mtag;
+	struct dn_flow_set *fs = NULL;
+	struct dn_pipe *pipe;
+	uint64_t len = m->m_pkthdr.len;
+	struct dn_flow_queue *q = NULL;
+	int is_pipe;
+	ipfw_insn *cmd = ACTION_PTR(fwa->rule);
 
-    KASSERT(m->m_nextpkt == NULL,
-	("dummynet_io: mbuf queue passed to dummynet"));
+	KASSERT(m->m_nextpkt == NULL,
+	    ("dummynet_io: mbuf queue passed to dummynet"));
 
-    if (cmd->opcode == O_LOG)
-	cmd += F_LEN(cmd);
-    if (cmd->opcode == O_ALTQ)
-	cmd += F_LEN(cmd);
-    if (cmd->opcode == O_TAG)
-	cmd += F_LEN(cmd);
-    is_pipe = (cmd->opcode == O_PIPE);
+	if (cmd->opcode == O_LOG)
+		cmd += F_LEN(cmd);
+	if (cmd->opcode == O_ALTQ)
+		cmd += F_LEN(cmd);
+	if (cmd->opcode == O_TAG)
+		cmd += F_LEN(cmd);
+	is_pipe = (cmd->opcode == O_PIPE);
 
-    DUMMYNET_LOCK();
-    /*
-     * This is a dummynet rule, so we expect an O_PIPE or O_QUEUE rule.
-     *
-     * XXXGL: probably the pipe->fs and fs->pipe logic here
-     * below can be simplified.
-     */
-    if (is_pipe) {
-	pipe = locate_pipe(fwa->cookie);
-	if (pipe != NULL)
-		fs = &(pipe->fs);
-    } else
-	fs = locate_flowset(fwa->cookie);
+	DUMMYNET_LOCK();
+	io_pkt++;
+	/*
+	 * This is a dummynet rule, so we expect an O_PIPE or O_QUEUE rule.
+	 *
+	 * XXXGL: probably the pipe->fs and fs->pipe logic here
+	 * below can be simplified.
+	 */
+	if (is_pipe) {
+		pipe = locate_pipe(fwa->cookie);
+		if (pipe != NULL)
+			fs = &(pipe->fs);
+	} else
+		fs = locate_flowset(fwa->cookie);
 
-    if (fs == NULL)
-	goto dropit;	/* This queue/pipe does not exist! */
-    pipe = fs->pipe;
-    if (pipe == NULL) { /* Must be a queue, try find a matching pipe. */
-	pipe = locate_pipe(fs->parent_nr);
-	if (pipe != NULL)
-	    fs->pipe = pipe;
-	else {
-	    printf("dummynet: no pipe %d for queue %d, drop pkt\n",
-		fs->parent_nr, fs->fs_nr);
-	    goto dropit ;
+	if (fs == NULL)
+		goto dropit;	/* This queue/pipe does not exist! */
+	pipe = fs->pipe;
+	if (pipe == NULL) {	/* Must be a queue, try find a matching pipe. */
+		pipe = locate_pipe(fs->parent_nr);
+		if (pipe != NULL)
+			fs->pipe = pipe;
+		else {
+			printf("dummynet: no pipe %d for queue %d, drop pkt\n",
+			    fs->parent_nr, fs->fs_nr);
+			goto dropit;
+		}
 	}
-    }
-    q = find_queue(fs, &(fwa->f_id));
-    if ( q == NULL )
-	goto dropit ;		/* cannot allocate queue		*/
-    /*
-     * update statistics, then check reasons to drop pkt
-     */
-    q->tot_bytes += len ;
-    q->tot_pkts++ ;
-    if ( fs->plr && random() < fs->plr )
-	goto dropit ;		/* random pkt drop			*/
-    if ( fs->flags_fs & DN_QSIZE_IS_BYTES) {
-    	if (q->len_bytes > fs->qsize)
-	    goto dropit ;	/* queue size overflow			*/
-    } else {
-	if (q->len >= fs->qsize)
-	    goto dropit ;	/* queue count overflow			*/
-    }
-    if ( fs->flags_fs & DN_IS_RED && red_drops(fs, q, len) )
-	goto dropit ;
+	q = find_queue(fs, &(fwa->f_id));
+	if (q == NULL)
+		goto dropit;		/* Cannot allocate queue. */
 
-    /* XXX expensive to zero, see if we can remove it*/
-    mtag = m_tag_get(PACKET_TAG_DUMMYNET,
-		sizeof(struct dn_pkt_tag), M_NOWAIT|M_ZERO);
-    if ( mtag == NULL )
-	goto dropit ;		/* cannot allocate packet header	*/
-    m_tag_prepend(m, mtag);	/* attach to mbuf chain */
+	/* Update statistics, then check reasons to drop pkt. */
+	q->tot_bytes += len;
+	q->tot_pkts++;
+	if (fs->plr && random() < fs->plr)
+		goto dropit;		/* Random pkt drop. */
+	if (fs->flags_fs & DN_QSIZE_IS_BYTES) {
+		if (q->len_bytes > fs->qsize)
+			goto dropit;	/* Queue size overflow. */
+	} else {
+		if (q->len >= fs->qsize)
+			goto dropit;	/* Queue count overflow. */
+	}
+	if (fs->flags_fs & DN_IS_RED && red_drops(fs, q, len))
+		goto dropit;
 
-    pkt = (struct dn_pkt_tag *)(mtag+1);
-    /* ok, i can handle the pkt now... */
-    /* build and enqueue packet + parameters */
-    pkt->rule = fwa->rule ;
-    pkt->dn_dir = dir ;
+	/* XXX expensive to zero, see if we can remove it. */
+	mtag = m_tag_get(PACKET_TAG_DUMMYNET,
+	    sizeof(struct dn_pkt_tag), M_NOWAIT | M_ZERO);
+	if (mtag == NULL)
+		goto dropit;		/* Cannot allocate packet header. */
+	m_tag_prepend(m, mtag);		/* Attach to mbuf chain. */
 
-    pkt->ifp = fwa->oif;
+	pkt = (struct dn_pkt_tag *)(mtag + 1);
+	/*
+	 * Ok, i can handle the pkt now...
+	 * Build and enqueue packet + parameters.
+	 */
+	pkt->rule = fwa->rule;
+	pkt->dn_dir = dir;
 
-    if (q->head == NULL)
-	q->head = m;
-    else
-	q->tail->m_nextpkt = m;
-    q->tail = m;
-    q->len++;
-    q->len_bytes += len ;
+	pkt->ifp = fwa->oif;
 
-    if ( q->head != m )		/* flow was not idle, we are done */
-	goto done;
-    /*
-     * If we reach this point the flow was previously idle, so we need
-     * to schedule it. This involves different actions for fixed-rate or
-     * WF2Q queues.
-     */
-    if (is_pipe) {
-	/*
-	 * Fixed-rate queue: just insert into the ready_heap.
-	 */
-	dn_key t = 0 ;
-	if (pipe->bandwidth)
-	    t = SET_TICKS(m, q, pipe);
-	q->sched_time = curr_time ;
-	if (t == 0)	/* must process it now */
-	    ready_event(q, &head, &tail);
+	if (q->head == NULL)
+		q->head = m;
 	else
-	    heap_insert(&ready_heap, curr_time + t , q );
-    } else {
+		q->tail->m_nextpkt = m;
+	q->tail = m;
+	q->len++;
+	q->len_bytes += len;
+
+	if (q->head != m)		/* Flow was not idle, we are done. */
+		goto done;
+
+	if (q->q_time < curr_time)
+		q->numbytes = io_fast ? fs->pipe->bandwidth : 0;
+	q->q_time = curr_time;
+
 	/*
-	 * WF2Q. First, compute start time S: if the flow was idle (S=F+1)
-	 * set S to the virtual time V for the controlling pipe, and update
-	 * the sum of weights for the pipe; otherwise, remove flow from
-	 * idle_heap and set S to max(F,V).
-	 * Second, compute finish time F = S + len/weight.
-	 * Third, if pipe was idle, update V=max(S, V).
-	 * Fourth, count one more backlogged flow.
+	 * If we reach this point the flow was previously idle, so we need
+	 * to schedule it. This involves different actions for fixed-rate or
+	 * WF2Q queues.
 	 */
-	if (DN_KEY_GT(q->S, q->F)) { /* means timestamps are invalid */
-	    q->S = pipe->V ;
-	    pipe->sum += fs->weight ; /* add weight of new queue */
+	if (is_pipe) {
+		/* Fixed-rate queue: just insert into the ready_heap. */
+		dn_key t = 0;
+
+		if (pipe->bandwidth && m->m_pkthdr.len * 8 * hz > q->numbytes)
+			t = SET_TICKS(m, q, pipe);
+		q->sched_time = curr_time;
+		if (t == 0)		/* Must process it now. */
+			ready_event(q, &head, &tail);
+		else
+			heap_insert(&ready_heap, curr_time + t , q);
 	} else {
-	    heap_extract(&(pipe->idle_heap), q);
-	    q->S = MAX64(q->F, pipe->V ) ;
-	}
-	q->F = q->S + ( len<<MY_M )/(u_int64_t) fs->weight;
+		/*
+		 * WF2Q. First, compute start time S: if the flow was
+		 * idle (S = F + 1) set S to the virtual time V for the
+		 * controlling pipe, and update the sum of weights for the pipe;
+		 * otherwise, remove flow from idle_heap and set S to max(F,V).
+		 * Second, compute finish time F = S + len / weight.
+		 * Third, if pipe was idle, update V = max(S, V).
+		 * Fourth, count one more backlogged flow.
+		 */
+		if (DN_KEY_GT(q->S, q->F)) { /* Means timestamps are invalid. */
+			q->S = pipe->V;
+			pipe->sum += fs->weight; /* Add weight of new queue. */
+		} else {
+			heap_extract(&(pipe->idle_heap), q);
+			q->S = MAX64(q->F, pipe->V);
+		}
+		q->F = q->S + (len << MY_M) / (uint64_t)fs->weight;
 
-	if (pipe->not_eligible_heap.elements == 0 &&
-		pipe->scheduler_heap.elements == 0)
-	    pipe->V = MAX64 ( q->S, pipe->V );
-	fs->backlogged++ ;
-	/*
-	 * Look at eligibility. A flow is not eligibile if S>V (when
-	 * this happens, it means that there is some other flow already
-	 * scheduled for the same pipe, so the scheduler_heap cannot be
-	 * empty). If the flow is not eligible we just store it in the
-	 * not_eligible_heap. Otherwise, we store in the scheduler_heap
-	 * and possibly invoke ready_event_wfq() right now if there is
-	 * leftover credit.
-	 * Note that for all flows in scheduler_heap (SCH), S_i <= V,
-	 * and for all flows in not_eligible_heap (NEH), S_i > V .
-	 * So when we need to compute max( V, min(S_i) ) forall i in SCH+NEH,
-	 * we only need to look into NEH.
-	 */
-	if (DN_KEY_GT(q->S, pipe->V) ) { /* not eligible */
-	    if (pipe->scheduler_heap.elements == 0)
-		printf("dummynet: ++ ouch! not eligible but empty scheduler!\n");
-	    heap_insert(&(pipe->not_eligible_heap), q->S, q);
-	} else {
-	    heap_insert(&(pipe->scheduler_heap), q->F, q);
-	    if (pipe->numbytes >= 0) { /* pipe is idle */
-		if (pipe->scheduler_heap.elements != 1)
-		    printf("dummynet: OUCH! pipe should have been idle!\n");
-		DPRINTF(("dummynet: waking up pipe %d at %d\n",
-			pipe->pipe_nr, (int)(q->F >> MY_M)));
-		pipe->sched_time = curr_time ;
-		ready_event_wfq(pipe, &head, &tail);
-	    }
+		if (pipe->not_eligible_heap.elements == 0 &&
+		    pipe->scheduler_heap.elements == 0)
+			pipe->V = MAX64(q->S, pipe->V);
+		fs->backlogged++;
+		/*
+		 * Look at eligibility. A flow is not eligibile if S>V (when
+		 * this happens, it means that there is some other flow already
+		 * scheduled for the same pipe, so the scheduler_heap cannot be
+		 * empty). If the flow is not eligible we just store it in the
+		 * not_eligible_heap. Otherwise, we store in the scheduler_heap
+		 * and possibly invoke ready_event_wfq() right now if there is
+		 * leftover credit.
+		 * Note that for all flows in scheduler_heap (SCH), S_i <= V,
+		 * and for all flows in not_eligible_heap (NEH), S_i > V.
+		 * So when we need to compute max(V, min(S_i)) forall i in
+		 * SCH+NEH, we only need to look into NEH.
+		 */
+		if (DN_KEY_GT(q->S, pipe->V)) {		/* Not eligible. */
+			if (pipe->scheduler_heap.elements == 0)
+				printf("dummynet: ++ ouch! not eligible but empty scheduler!\n");
+			heap_insert(&(pipe->not_eligible_heap), q->S, q);
+		} else {
+			heap_insert(&(pipe->scheduler_heap), q->F, q);
+			if (pipe->numbytes >= 0) {	 /* Pipe is idle. */
+				if (pipe->scheduler_heap.elements != 1)
+					printf("dummynet: OUCH! pipe should have been idle!\n");
+				DPRINTF(("dummynet: waking up pipe %d at %d\n",
+				    pipe->pipe_nr, (int)(q->F >> MY_M)));
+				pipe->sched_time = curr_time;
+				ready_event_wfq(pipe, &head, &tail);
+			}
+		}
 	}
-    }
 done:
-    DUMMYNET_UNLOCK();
-    if (head != NULL)
-	dummynet_send(head);
-    return 0;
+	if (head == m && dir != DN_TO_IFB_FWD && dir != DN_TO_ETH_DEMUX &&
+	    dir != DN_TO_ETH_OUT) {	/* Fast io. */
+		io_pkt_fast++;
+		if (m->m_nextpkt != NULL)
+			printf("dummynet: fast io: pkt chain detected!\n");
+		head = m->m_nextpkt = NULL;
+	} else
+		*m0 = NULL;		/* Normal io. */
+
+	DUMMYNET_UNLOCK();
+	if (head != NULL)
+		dummynet_send(head);
+	return (0);
 
 dropit:
-    if (q)
-	q->drops++ ;
-    DUMMYNET_UNLOCK();
-    m_freem(m);
-    return ( (fs && (fs->flags_fs & DN_NOERROR)) ? 0 : ENOBUFS);
+	io_pkt_drop++;
+	if (q)
+		q->drops++;
+	DUMMYNET_UNLOCK();
+	m_freem(m);
+	*m0 = NULL;
+	return ((fs && (fs->flags_fs & DN_NOERROR)) ? 0 : ENOBUFS);
 }
 
 /*
@@ -1729,7 +1787,7 @@
 			/* Flush accumulated credit for all queues. */
 			for (i = 0; i <= pipe->fs.rq_size; i++)
 				for (q = pipe->fs.rq[i]; q; q = q->next)
-					q->numbytes = 0;
+					q->numbytes = io_fast ? p->bandwidth : 0;
 
 		pipe->bandwidth = p->bandwidth;
 		pipe->numbytes = 0;		/* just in case... */

==== //depot/projects/dtrace6/src/sys/netinet/ip_dummynet.h#2 (text+ko) ====

@@ -24,7 +24,7 @@

>>> TRUNCATED FOR MAIL (1000 lines) <<<


More information about the p4-projects mailing list