kern/121955: dummynet panics after 6.2

Oleg Bulyzhin oleg at FreeBSD.org
Thu Apr 3 14:30:03 UTC 2008


The following reply was made to PR kern/121955; it has been noted by GNATS.

From: Oleg Bulyzhin <oleg at FreeBSD.org>
To: bug-followup at FreeBSD.org
Cc:  
Subject: Re: kern/121955: dummynet panics after 6.2
Date: Wed, 2 Apr 2008 20:47:47 +0400

 --rwEMma7ioTxnRzrJ
 Content-Type: text/plain; charset=us-ascii
 Content-Disposition: inline
 
 
 Please test attached patch and let me know if it changes anything for you.
 
 -- 
 Oleg.
 
 ================================================================
 === Oleg Bulyzhin -- OBUL-RIPN -- OBUL-RIPE -- oleg at rinet.ru ===
 ================================================================
 
 
 --rwEMma7ioTxnRzrJ
 Content-Type: text/x-diff; charset=us-ascii
 Content-Disposition: attachment; filename="dummynet_iofast.diff"
 
 Index: sys/netinet/ip_dummynet.h
 ===================================================================
 RCS file: /home/ncvs/src/sys/netinet/ip_dummynet.h,v
 retrieving revision 1.40
 diff -u -r1.40 ip_dummynet.h
 --- sys/netinet/ip_dummynet.h	17 Jun 2007 00:33:34 -0000	1.40
 +++ sys/netinet/ip_dummynet.h	27 Mar 2008 17:19:00 -0000
 @@ -343,7 +343,7 @@
  #ifdef _KERNEL
  typedef	int ip_dn_ctl_t(struct sockopt *); /* raw_ip.c */
  typedef	void ip_dn_ruledel_t(void *); /* ip_fw.c */
 -typedef	int ip_dn_io_t(struct mbuf *m, int dir, struct ip_fw_args *fwa);
 +typedef	int ip_dn_io_t(struct mbuf **m, int dir, struct ip_fw_args *fwa);
  extern	ip_dn_ctl_t *ip_dn_ctl_ptr;
  extern	ip_dn_ruledel_t *ip_dn_ruledel_ptr;
  extern	ip_dn_io_t *ip_dn_io_ptr;
 Index: sys/netinet/ip_dummynet.c
 ===================================================================
 RCS file: /home/ncvs/src/sys/netinet/ip_dummynet.c,v
 retrieving revision 1.110
 diff -u -r1.110 ip_dummynet.c
 --- sys/netinet/ip_dummynet.c	7 Oct 2007 20:44:22 -0000	1.110
 +++ sys/netinet/ip_dummynet.c	27 Mar 2008 17:19:03 -0000
 @@ -56,6 +56,7 @@
   * include files marked with XXX are probably not needed
   */
  
 +#include <sys/limits.h>
  #include <sys/param.h>
  #include <sys/systm.h>
  #include <sys/malloc.h>
 @@ -110,6 +111,11 @@
  /* Adjusted vs non-adjusted curr_time difference (ticks). */
  static long tick_diff;
  
 +static int		io_fast;
 +static unsigned long	io_pkt;
 +static unsigned long	io_pkt_fast;
 +static unsigned long	io_pkt_drop;
 +
  /*
   * Three heaps contain queues and pipes that the scheduler handles:
   *
 @@ -181,6 +187,17 @@
  SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_lost,
      CTLFLAG_RD, &tick_lost, 0,
      "Number of ticks coalesced by dummynet taskqueue.");
 +SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, io_fast,
 +    CTLFLAG_RW, &io_fast, 0, "Enable fast dummynet io.");
 +SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt,
 +    CTLFLAG_RD, &io_pkt, 0,
 +    "Number of packets passed to dummynet.");
 +SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_fast,
 +    CTLFLAG_RD, &io_pkt_fast, 0,
 +    "Number of packets bypassed dummynet scheduler.");
 +SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_drop,
 +    CTLFLAG_RD, &io_pkt_drop, 0,
 +    "Number of packets dropped by dummynet.");
  #endif
  
  #ifdef DUMMYNET_DEBUG
 @@ -206,15 +223,15 @@
  #define	DUMMYNET_UNLOCK()	mtx_unlock(&dummynet_mtx)
  #define	DUMMYNET_LOCK_ASSERT()	mtx_assert(&dummynet_mtx, MA_OWNED)
  
 -static int config_pipe(struct dn_pipe *p);
 -static int ip_dn_ctl(struct sockopt *sopt);
 +static int	config_pipe(struct dn_pipe *p);
 +static int	ip_dn_ctl(struct sockopt *sopt);
  
 -static void dummynet(void *);
 -static void dummynet_flush(void);
 -static void dummynet_send(struct mbuf *);
 -void dummynet_drain(void);
 +static void	dummynet(void *);
 +static void	dummynet_flush(void);
 +static void	dummynet_send(struct mbuf *);
 +void		dummynet_drain(void);
  static ip_dn_io_t dummynet_io;
 -static void dn_rule_delete(void *);
 +static void	dn_rule_delete(void *);
  
  /*
   * Heap management functions.
 @@ -483,7 +500,7 @@
  	if ((m = pipe->head) != NULL) {
  		pkt = dn_tag_get(m);
  		/*
 -		 * XXX: Should check errors on heap_insert, by draining the
 +		 * XXX Should check errors on heap_insert, by draining the
  		 * whole pipe p and hoping in the future we are more successful.
  		 */
  		heap_insert(&extract_heap, pkt->output_time, pipe);
 @@ -496,8 +513,8 @@
   * either a pipe (WF2Q) or a flow_queue (per-flow queueing)
   */
  #define SET_TICKS(_m, q, p)	\
 -    ((_m)->m_pkthdr.len*8*hz - (q)->numbytes + p->bandwidth - 1 ) / \
 -	    p->bandwidth ;
 +    ((_m)->m_pkthdr.len * 8 * hz - (q)->numbytes + p->bandwidth - 1) / \
 +    p->bandwidth;
  
  /*
   * extract pkt from queue, compute output time (could be now)
 @@ -533,59 +550,61 @@
  static void
  ready_event(struct dn_flow_queue *q, struct mbuf **head, struct mbuf **tail)
  {
 -    struct mbuf *pkt;
 -    struct dn_pipe *p = q->fs->pipe ;
 -    int p_was_empty ;
 +	struct mbuf *pkt;
 +	struct dn_pipe *p = q->fs->pipe;
 +	int p_was_empty;
  
 -    DUMMYNET_LOCK_ASSERT();
 +	DUMMYNET_LOCK_ASSERT();
  
 -    if (p == NULL) {
 -	printf("dummynet: ready_event- pipe is gone\n");
 -	return ;
 -    }
 -    p_was_empty = (p->head == NULL) ;
 +	if (p == NULL) {
 +		printf("dummynet: ready_event- pipe is gone\n");
 +		return;
 +	}
 +	p_was_empty = (p->head == NULL);
  
 -    /*
 -     * schedule fixed-rate queues linked to this pipe:
 -     * Account for the bw accumulated since last scheduling, then
 -     * drain as many pkts as allowed by q->numbytes and move to
 -     * the delay line (in p) computing output time.
 -     * bandwidth==0 (no limit) means we can drain the whole queue,
 -     * setting len_scaled = 0 does the job.
 -     */
 -    q->numbytes += ( curr_time - q->sched_time ) * p->bandwidth;
 -    while ( (pkt = q->head) != NULL ) {
 -	int len = pkt->m_pkthdr.len;
 -	int len_scaled = p->bandwidth ? len*8*hz : 0 ;
 -	if (len_scaled > q->numbytes )
 -	    break ;
 -	q->numbytes -= len_scaled ;
 -	move_pkt(pkt, q, p, len);
 -    }
 -    /*
 -     * If we have more packets queued, schedule next ready event
 -     * (can only occur when bandwidth != 0, otherwise we would have
 -     * flushed the whole queue in the previous loop).
 -     * To this purpose we record the current time and compute how many
 -     * ticks to go for the finish time of the packet.
 -     */
 -    if ( (pkt = q->head) != NULL ) { /* this implies bandwidth != 0 */
 -	dn_key t = SET_TICKS(pkt, q, p); /* ticks i have to wait */
 -	q->sched_time = curr_time ;
 -	heap_insert(&ready_heap, curr_time + t, (void *)q );
 -	/* XXX should check errors on heap_insert, and drain the whole
 -	 * queue on error hoping next time we are luckier.
 +	/*
 +	 * Schedule fixed-rate queues linked to this pipe:
 +	 * account for the bw accumulated since last scheduling, then
 +	 * drain as many pkts as allowed by q->numbytes and move to
 +	 * the delay line (in p) computing output time.
 +	 * bandwidth==0 (no limit) means we can drain the whole queue,
 +	 * setting len_scaled = 0 does the job.
  	 */
 -    } else {	/* RED needs to know when the queue becomes empty */
 -	q->q_time = curr_time;
 -	q->numbytes = 0;
 -    }
 -    /*
 -     * If the delay line was empty call transmit_event() now.
 -     * Otherwise, the scheduler will take care of it.
 -     */
 -    if (p_was_empty)
 -	transmit_event(p, head, tail);
 +	q->numbytes += (curr_time - q->sched_time) * p->bandwidth;
 +	while ((pkt = q->head) != NULL) {
 +		int len = pkt->m_pkthdr.len;
 +		int len_scaled = p->bandwidth ? len * 8 * hz : 0;
 +
 +		if (len_scaled > q->numbytes)
 +			break;
 +		q->numbytes -= len_scaled;
 +		move_pkt(pkt, q, p, len);
 +	}
 +	/*
 +	 * If we have more packets queued, schedule next ready event
 +	 * (can only occur when bandwidth != 0, otherwise we would have
 +	 * flushed the whole queue in the previous loop).
 +	 * To this purpose we record the current time and compute how many
 +	 * ticks to go for the finish time of the packet.
 +	 */
 +	if ((pkt = q->head) != NULL) {	/* this implies bandwidth != 0 */
 +		dn_key t = SET_TICKS(pkt, q, p); /* ticks i have to wait */
 +
 +		q->sched_time = curr_time;
 +		heap_insert(&ready_heap, curr_time + t, (void *)q);
 +		/*
 +		 * XXX Should check errors on heap_insert, and drain the whole
 +		 * queue on error hoping next time we are luckier.
 +		 */
 +	} else		/* RED needs to know when the queue becomes empty. */
 +		q->q_time = curr_time;
 +
 +	/*
 +	 * If the delay line was empty call transmit_event() now.
 +	 * Otherwise, the scheduler will take care of it.
 +	 */
 +	if (p_was_empty)
 +		transmit_event(p, head, tail);
  }
  
  /*
 @@ -593,123 +612,147 @@
   * the queues at their start time, and enqueue into the delay line.
   * Packets are drained until p->numbytes < 0. As long as
   * len_scaled >= p->numbytes, the packet goes into the delay line
 - * with a deadline p->delay. For the last packet, if p->numbytes<0,
 + * with a deadline p->delay. For the last packet, if p->numbytes < 0,
   * there is an additional delay.
   */
  static void
  ready_event_wfq(struct dn_pipe *p, struct mbuf **head, struct mbuf **tail)
  {
 -    int p_was_empty = (p->head == NULL) ;
 -    struct dn_heap *sch = &(p->scheduler_heap);
 -    struct dn_heap *neh = &(p->not_eligible_heap) ;
 +	int p_was_empty = (p->head == NULL);
 +	struct dn_heap *sch = &(p->scheduler_heap);
 +	struct dn_heap *neh = &(p->not_eligible_heap);
 +	int64_t p_numbytes = p->numbytes;
  
 -    DUMMYNET_LOCK_ASSERT();
 -
 -    if (p->if_name[0] == 0) /* tx clock is simulated */
 -	p->numbytes += ( curr_time - p->sched_time ) * p->bandwidth;
 -    else { /* tx clock is for real, the ifq must be empty or this is a NOP */
 -	if (p->ifp && p->ifp->if_snd.ifq_head != NULL)
 -	    return ;
 -	else {
 -	    DPRINTF(("dummynet: pipe %d ready from %s --\n",
 -		p->pipe_nr, p->if_name));
 -	}
 -    }
 +	DUMMYNET_LOCK_ASSERT();
  
 -    /*
 -     * While we have backlogged traffic AND credit, we need to do
 -     * something on the queue.
 -     */
 -    while ( p->numbytes >=0 && (sch->elements>0 || neh->elements >0) ) {
 -	if (sch->elements > 0) { /* have some eligible pkts to send out */
 -	    struct dn_flow_queue *q = sch->p[0].object ;
 -	    struct mbuf *pkt = q->head;
 -	    struct dn_flow_set *fs = q->fs;
 -	    u_int64_t len = pkt->m_pkthdr.len;
 -	    int len_scaled = p->bandwidth ? len*8*hz : 0 ;
 -
 -	    heap_extract(sch, NULL); /* remove queue from heap */
 -	    p->numbytes -= len_scaled ;
 -	    move_pkt(pkt, q, p, len);
 -
 -	    p->V += (len<<MY_M) / p->sum ; /* update V */
 -	    q->S = q->F ; /* update start time */
 -	    if (q->len == 0) { /* Flow not backlogged any more */
 -		fs->backlogged-- ;
 -		heap_insert(&(p->idle_heap), q->F, q);
 -	    } else { /* still backlogged */
 +	if (p->if_name[0] == 0)		/* tx clock is simulated */
  		/*
 -		 * update F and position in backlogged queue, then
 -		 * put flow in not_eligible_heap (we will fix this later).
 +		 * Since result may not fit into p->numbytes (32bit) we
 +		 * are using 64bit var here.
  		 */
 -		len = (q->head)->m_pkthdr.len;
 -		q->F += (len<<MY_M)/(u_int64_t) fs->weight ;
 -		if (DN_KEY_LEQ(q->S, p->V))
 -		    heap_insert(neh, q->S, q);
 -		else
 -		    heap_insert(sch, q->F, q);
 -	    }
 +		p_numbytes += (curr_time - p->sched_time) * p->bandwidth;
 +	else {	/*
 +		 * tx clock is for real,
 +		 * the ifq must be empty or this is a NOP.
 +		 */
 +		if (p->ifp && p->ifp->if_snd.ifq_head != NULL)
 +			return;
 +		else {
 +			DPRINTF(("dummynet: pipe %d ready from %s --\n",
 +			    p->pipe_nr, p->if_name));
 +		}
  	}
 +
  	/*
 -	 * now compute V = max(V, min(S_i)). Remember that all elements in sch
 -	 * have by definition S_i <= V so if sch is not empty, V is surely
 -	 * the max and we must not update it. Conversely, if sch is empty
 -	 * we only need to look at neh.
 +	 * While we have backlogged traffic AND credit, we need to do
 +	 * something on the queue.
  	 */
 -	if (sch->elements == 0 && neh->elements > 0)
 -	    p->V = MAX64 ( p->V, neh->p[0].key );
 -	/* move from neh to sch any packets that have become eligible */
 -	while (neh->elements > 0 && DN_KEY_LEQ(neh->p[0].key, p->V) ) {
 -	    struct dn_flow_queue *q = neh->p[0].object ;
 -	    heap_extract(neh, NULL);
 -	    heap_insert(sch, q->F, q);
 +	while (p_numbytes >= 0 && (sch->elements > 0 || neh->elements > 0)) {
 +		if (sch->elements > 0) {
 +			/* Have some eligible pkts to send out. */
 +			struct dn_flow_queue *q = sch->p[0].object;
 +			struct mbuf *pkt = q->head;
 +			struct dn_flow_set *fs = q->fs;
 +			uint64_t len = pkt->m_pkthdr.len;
 +			int len_scaled = p->bandwidth ? len * 8 * hz : 0;
 +
 +			heap_extract(sch, NULL); /* Remove queue from heap. */
 +			p_numbytes -= len_scaled;
 +			move_pkt(pkt, q, p, len);
 +
 +			p->V += (len << MY_M) / p->sum;	/* Update V. */
 +			q->S = q->F;			/* Update start time. */
 +			if (q->len == 0) {
 +				/* Flow not backlogged any more. */
 +				fs->backlogged--;
 +				heap_insert(&(p->idle_heap), q->F, q);
 +			} else {
 +				/* Still backlogged. */
 +
 +				/*
 +				 * Update F and position in backlogged queue,
 +				 * then put flow in not_eligible_heap
 +				 * (we will fix this later).
 +				 */
 +				len = (q->head)->m_pkthdr.len;
 +				q->F += (len << MY_M) / (uint64_t)fs->weight;
 +				if (DN_KEY_LEQ(q->S, p->V))
 +					heap_insert(neh, q->S, q);
 +				else
 +					heap_insert(sch, q->F, q);
 +			}
 +		}
 +		/*
 +		 * Now compute V = max(V, min(S_i)). Remember that all elements
 +		 * in sch have by definition S_i <= V so if sch is not empty,
 +		 * V is surely the max and we must not update it. Conversely,
 +		 * if sch is empty we only need to look at neh.
 +		 */
 +		if (sch->elements == 0 && neh->elements > 0)
 +			p->V = MAX64(p->V, neh->p[0].key);
 +		/* Move from neh to sch any packets that have become eligible */
 +		while (neh->elements > 0 && DN_KEY_LEQ(neh->p[0].key, p->V)) {
 +			struct dn_flow_queue *q = neh->p[0].object;
 +			heap_extract(neh, NULL);
 +			heap_insert(sch, q->F, q);
 +		}
 +
 +		if (p->if_name[0] != '\0') { /* Tx clock is from a real thing */
 +			p_numbytes = -1;	/* Mark not ready for I/O. */
 +			break;
 +		}
  	}
 +	if (sch->elements == 0 && neh->elements == 0 && p_numbytes >= 0 &&
 +	    p->idle_heap.elements > 0) {
 +		/*
 +		 * No traffic and no events scheduled.
 +		 * We can get rid of idle-heap.
 +		 */
 +		int i;
  
 -	if (p->if_name[0] != '\0') {/* tx clock is from a real thing */
 -	    p->numbytes = -1 ; /* mark not ready for I/O */
 -	    break ;
 +		for (i = 0; i < p->idle_heap.elements; i++) {
 +			struct dn_flow_queue *q = p->idle_heap.p[i].object;
 +
 +			q->F = 0;
 +			q->S = q->F + 1;
 +		}
 +		p->sum = 0;
 +		p->V = 0;
 +		p->idle_heap.elements = 0;
  	}
 -    }
 -    if (sch->elements == 0 && neh->elements == 0 && p->numbytes >= 0
 -	    && p->idle_heap.elements > 0) {
  	/*
 -	 * no traffic and no events scheduled. We can get rid of idle-heap.
 +	 * If we are getting clocks from dummynet (not a real interface) and
 +	 * If we are under credit, schedule the next ready event.
 +	 * Also fix the delivery time of the last packet.
  	 */
 -	int i ;
 +	if (p->if_name[0]==0 && p_numbytes < 0) { /* This implies bw > 0. */
 +		dn_key t = 0;		/* Number of ticks i have to wait. */
  
 -	for (i = 0 ; i < p->idle_heap.elements ; i++) {
 -	    struct dn_flow_queue *q = p->idle_heap.p[i].object ;
 -
 -	    q->F = 0 ;
 -	    q->S = q->F + 1 ;
 +		if (p->bandwidth > 0)
 +			t = (p->bandwidth - 1 - p_numbytes) / p->bandwidth;
 +		dn_tag_get(p->tail)->output_time += t;
 +		p->sched_time = curr_time;
 +		heap_insert(&wfq_ready_heap, curr_time + t, (void *)p);
 +		/*
 +		 * XXX Should check errors on heap_insert, and drain the whole
 +		 * queue on error hoping next time we are luckier.
 +		 */
  	}
 -	p->sum = 0 ;
 -	p->V = 0 ;
 -	p->idle_heap.elements = 0 ;
 -    }
 -    /*
 -     * If we are getting clocks from dummynet (not a real interface) and
 -     * If we are under credit, schedule the next ready event.
 -     * Also fix the delivery time of the last packet.
 -     */
 -    if (p->if_name[0]==0 && p->numbytes < 0) { /* this implies bandwidth >0 */
 -	dn_key t=0 ; /* number of ticks i have to wait */
  
 -	if (p->bandwidth > 0)
 -	    t = ( p->bandwidth -1 - p->numbytes) / p->bandwidth ;
 -	dn_tag_get(p->tail)->output_time += t ;
 -	p->sched_time = curr_time ;
 -	heap_insert(&wfq_ready_heap, curr_time + t, (void *)p);
 -	/* XXX should check errors on heap_insert, and drain the whole
 -	 * queue on error hoping next time we are luckier.
 +	/* Fit (adjust if necessary) 64bit result into 32bit variable. */
 +	if (p_numbytes > INT_MAX)
 +		p->numbytes = INT_MAX;
 +	else if (p_numbytes < INT_MIN)
 +		p->numbytes = INT_MIN;
 +	else
 +		p->numbytes = p_numbytes;
 +
 +	/*
 +	 * If the delay line was empty call transmit_event() now.
 +	 * Otherwise, the scheduler will take care of it.
  	 */
 -    }
 -    /*
 -     * If the delay line was empty call transmit_event() now.
 -     * Otherwise, the scheduler will take care of it.
 -     */
 -    if (p_was_empty)
 -	transmit_event(p, head, tail);
 +	if (p_was_empty)
 +		transmit_event(p, head, tail);
  }
  
  /*
 @@ -924,29 +967,28 @@
  static struct dn_flow_queue *
  create_queue(struct dn_flow_set *fs, int i)
  {
 -    struct dn_flow_queue *q ;
 +	struct dn_flow_queue *q;
  
 -    if (fs->rq_elements > fs->rq_size * dn_max_ratio &&
 +	if (fs->rq_elements > fs->rq_size * dn_max_ratio &&
  	    expire_queues(fs) == 0) {
 -	/*
 -	 * No way to get room, use or create overflow queue.
 -	 */
 -	i = fs->rq_size ;
 -	if ( fs->rq[i] != NULL )
 -	    return fs->rq[i] ;
 -    }
 -    q = malloc(sizeof(*q), M_DUMMYNET, M_NOWAIT | M_ZERO);
 -    if (q == NULL) {
 -	printf("dummynet: sorry, cannot allocate queue for new flow\n");
 -	return NULL ;
 -    }
 -    q->fs = fs ;
 -    q->hash_slot = i ;
 -    q->next = fs->rq[i] ;
 -    q->S = q->F + 1;   /* hack - mark timestamp as invalid */
 -    fs->rq[i] = q ;
 -    fs->rq_elements++ ;
 -    return q ;
 +		/* No way to get room, use or create overflow queue. */
 +		i = fs->rq_size;
 +		if (fs->rq[i] != NULL)
 +		    return fs->rq[i];
 +	}
 +	q = malloc(sizeof(*q), M_DUMMYNET, M_NOWAIT | M_ZERO);
 +	if (q == NULL) {
 +		printf("dummynet: sorry, cannot allocate queue for new flow\n");
 +		return (NULL);
 +	}
 +	q->fs = fs;
 +	q->hash_slot = i;
 +	q->next = fs->rq[i];
 +	q->S = q->F + 1;	/* hack - mark timestamp as invalid. */
 +	q->numbytes = io_fast ? fs->pipe->bandwidth : 0;
 +	fs->rq[i] = q;
 +	fs->rq_elements++;
 +	return (q);
  }
  
  /*
 @@ -1200,185 +1242,201 @@
   * ifp		the 'ifp' parameter from the caller.
   *		NULL in ip_input, destination interface in ip_output,
   * rule		matching rule, in case of multiple passes
 - *
   */
  static int
 -dummynet_io(struct mbuf *m, int dir, struct ip_fw_args *fwa)
 +dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa)
  {
 -    struct mbuf *head = NULL, *tail = NULL;
 -    struct dn_pkt_tag *pkt;
 -    struct m_tag *mtag;
 -    struct dn_flow_set *fs = NULL;
 -    struct dn_pipe *pipe ;
 -    u_int64_t len = m->m_pkthdr.len ;
 -    struct dn_flow_queue *q = NULL ;
 -    int is_pipe;
 -    ipfw_insn *cmd = ACTION_PTR(fwa->rule);
 -
 -    KASSERT(m->m_nextpkt == NULL,
 -	("dummynet_io: mbuf queue passed to dummynet"));
 -
 -    if (cmd->opcode == O_LOG)
 -	cmd += F_LEN(cmd);
 -    if (cmd->opcode == O_ALTQ)
 -	cmd += F_LEN(cmd);
 -    if (cmd->opcode == O_TAG)
 -	cmd += F_LEN(cmd);
 -    is_pipe = (cmd->opcode == O_PIPE);
 +	struct mbuf *m = *m0, *head = NULL, *tail = NULL;
 +	struct dn_pkt_tag *pkt;
 +	struct m_tag *mtag;
 +	struct dn_flow_set *fs = NULL;
 +	struct dn_pipe *pipe;
 +	uint64_t len = m->m_pkthdr.len;
 +	struct dn_flow_queue *q = NULL;
 +	int is_pipe;
 +	ipfw_insn *cmd = ACTION_PTR(fwa->rule);
 +
 +	KASSERT(m->m_nextpkt == NULL,
 +	    ("dummynet_io: mbuf queue passed to dummynet"));
 +
 +	if (cmd->opcode == O_LOG)
 +		cmd += F_LEN(cmd);
 +	if (cmd->opcode == O_ALTQ)
 +		cmd += F_LEN(cmd);
 +	if (cmd->opcode == O_TAG)
 +		cmd += F_LEN(cmd);
 +	is_pipe = (cmd->opcode == O_PIPE);
  
 -    DUMMYNET_LOCK();
 -    /*
 -     * This is a dummynet rule, so we expect an O_PIPE or O_QUEUE rule.
 -     *
 -     * XXXGL: probably the pipe->fs and fs->pipe logic here
 -     * below can be simplified.
 -     */
 -    if (is_pipe) {
 -	pipe = locate_pipe(fwa->cookie);
 -	if (pipe != NULL)
 -		fs = &(pipe->fs);
 -    } else
 -	fs = locate_flowset(fwa->cookie);
 +	DUMMYNET_LOCK();
 +	io_pkt++;
 +	/*
 +	 * This is a dummynet rule, so we expect an O_PIPE or O_QUEUE rule.
 +	 *
 +	 * XXXGL: probably the pipe->fs and fs->pipe logic here
 +	 * below can be simplified.
 +	 */
 +	if (is_pipe) {
 +		pipe = locate_pipe(fwa->cookie);
 +		if (pipe != NULL)
 +			fs = &(pipe->fs);
 +	} else
 +		fs = locate_flowset(fwa->cookie);
  
 -    if (fs == NULL)
 -	goto dropit;	/* This queue/pipe does not exist! */
 -    pipe = fs->pipe;
 -    if (pipe == NULL) { /* Must be a queue, try find a matching pipe. */
 -	pipe = locate_pipe(fs->parent_nr);
 -	if (pipe != NULL)
 -	    fs->pipe = pipe;
 -	else {
 -	    printf("dummynet: no pipe %d for queue %d, drop pkt\n",
 -		fs->parent_nr, fs->fs_nr);
 -	    goto dropit ;
 +	if (fs == NULL)
 +		goto dropit;	/* This queue/pipe does not exist! */
 +	pipe = fs->pipe;
 +	if (pipe == NULL) {	/* Must be a queue, try find a matching pipe. */
 +		pipe = locate_pipe(fs->parent_nr);
 +		if (pipe != NULL)
 +			fs->pipe = pipe;
 +		else {
 +			printf("dummynet: no pipe %d for queue %d, drop pkt\n",
 +			    fs->parent_nr, fs->fs_nr);
 +			goto dropit;
 +		}
  	}
 -    }
 -    q = find_queue(fs, &(fwa->f_id));
 -    if ( q == NULL )
 -	goto dropit ;		/* cannot allocate queue		*/
 -    /*
 -     * update statistics, then check reasons to drop pkt
 -     */
 -    q->tot_bytes += len ;
 -    q->tot_pkts++ ;
 -    if ( fs->plr && random() < fs->plr )
 -	goto dropit ;		/* random pkt drop			*/
 -    if ( fs->flags_fs & DN_QSIZE_IS_BYTES) {
 -    	if (q->len_bytes > fs->qsize)
 -	    goto dropit ;	/* queue size overflow			*/
 -    } else {
 -	if (q->len >= fs->qsize)
 -	    goto dropit ;	/* queue count overflow			*/
 -    }
 -    if ( fs->flags_fs & DN_IS_RED && red_drops(fs, q, len) )
 -	goto dropit ;
 -
 -    /* XXX expensive to zero, see if we can remove it*/
 -    mtag = m_tag_get(PACKET_TAG_DUMMYNET,
 -		sizeof(struct dn_pkt_tag), M_NOWAIT|M_ZERO);
 -    if ( mtag == NULL )
 -	goto dropit ;		/* cannot allocate packet header	*/
 -    m_tag_prepend(m, mtag);	/* attach to mbuf chain */
 -
 -    pkt = (struct dn_pkt_tag *)(mtag+1);
 -    /* ok, i can handle the pkt now... */
 -    /* build and enqueue packet + parameters */
 -    pkt->rule = fwa->rule ;
 -    pkt->dn_dir = dir ;
 -
 -    pkt->ifp = fwa->oif;
 +	q = find_queue(fs, &(fwa->f_id));
 +	if (q == NULL)
 +		goto dropit;		/* Cannot allocate queue. */
 +
 +	/* Update statistics, then check reasons to drop pkt. */
 +	q->tot_bytes += len;
 +	q->tot_pkts++;
 +	if (fs->plr && random() < fs->plr)
 +		goto dropit;		/* Random pkt drop. */
 +	if (fs->flags_fs & DN_QSIZE_IS_BYTES) {
 +		if (q->len_bytes > fs->qsize)
 +			goto dropit;	/* Queue size overflow. */
 +	} else {
 +		if (q->len >= fs->qsize)
 +			goto dropit;	/* Queue count overflow. */
 +	}
 +	if (fs->flags_fs & DN_IS_RED && red_drops(fs, q, len))
 +		goto dropit;
  
 -    if (q->head == NULL)
 -	q->head = m;
 -    else
 -	q->tail->m_nextpkt = m;
 -    q->tail = m;
 -    q->len++;
 -    q->len_bytes += len ;
 +	/* XXX expensive to zero, see if we can remove it. */
 +	mtag = m_tag_get(PACKET_TAG_DUMMYNET,
 +	    sizeof(struct dn_pkt_tag), M_NOWAIT | M_ZERO);
 +	if (mtag == NULL)
 +		goto dropit;		/* Cannot allocate packet header. */
 +	m_tag_prepend(m, mtag);		/* Attach to mbuf chain. */
  
 -    if ( q->head != m )		/* flow was not idle, we are done */
 -	goto done;
 -    /*
 -     * If we reach this point the flow was previously idle, so we need
 -     * to schedule it. This involves different actions for fixed-rate or
 -     * WF2Q queues.
 -     */
 -    if (is_pipe) {
 +	pkt = (struct dn_pkt_tag *)(mtag + 1);
  	/*
 -	 * Fixed-rate queue: just insert into the ready_heap.
 +	 * Ok, i can handle the pkt now...
 +	 * Build and enqueue packet + parameters.
  	 */
 -	dn_key t = 0 ;
 -	if (pipe->bandwidth)
 -	    t = SET_TICKS(m, q, pipe);
 -	q->sched_time = curr_time ;
 -	if (t == 0)	/* must process it now */
 -	    ready_event(q, &head, &tail);
 +	pkt->rule = fwa->rule;
 +	pkt->dn_dir = dir;
 +
 +	pkt->ifp = fwa->oif;
 +
 +	if (q->head == NULL)
 +		q->head = m;
  	else
 -	    heap_insert(&ready_heap, curr_time + t , q );
 -    } else {
 -	/*
 -	 * WF2Q. First, compute start time S: if the flow was idle (S=F+1)
 -	 * set S to the virtual time V for the controlling pipe, and update
 -	 * the sum of weights for the pipe; otherwise, remove flow from
 -	 * idle_heap and set S to max(F,V).
 -	 * Second, compute finish time F = S + len/weight.
 -	 * Third, if pipe was idle, update V=max(S, V).
 -	 * Fourth, count one more backlogged flow.
 -	 */
 -	if (DN_KEY_GT(q->S, q->F)) { /* means timestamps are invalid */
 -	    q->S = pipe->V ;
 -	    pipe->sum += fs->weight ; /* add weight of new queue */
 -	} else {
 -	    heap_extract(&(pipe->idle_heap), q);
 -	    q->S = MAX64(q->F, pipe->V ) ;
 -	}
 -	q->F = q->S + ( len<<MY_M )/(u_int64_t) fs->weight;
 +		q->tail->m_nextpkt = m;
 +	q->tail = m;
 +	q->len++;
 +	q->len_bytes += len;
 +
 +	if (q->head != m)		/* Flow was not idle, we are done. */
 +		goto done;
 +
 +	if (q->q_time < curr_time)
 +		q->numbytes = io_fast ? fs->pipe->bandwidth : 0;
 +	q->q_time = curr_time;
  
 -	if (pipe->not_eligible_heap.elements == 0 &&
 -		pipe->scheduler_heap.elements == 0)
 -	    pipe->V = MAX64 ( q->S, pipe->V );
 -	fs->backlogged++ ;
  	/*
 -	 * Look at eligibility. A flow is not eligibile if S>V (when
 -	 * this happens, it means that there is some other flow already
 -	 * scheduled for the same pipe, so the scheduler_heap cannot be
 -	 * empty). If the flow is not eligible we just store it in the
 -	 * not_eligible_heap. Otherwise, we store in the scheduler_heap
 -	 * and possibly invoke ready_event_wfq() right now if there is
 -	 * leftover credit.
 -	 * Note that for all flows in scheduler_heap (SCH), S_i <= V,
 -	 * and for all flows in not_eligible_heap (NEH), S_i > V .
 -	 * So when we need to compute max( V, min(S_i) ) forall i in SCH+NEH,
 -	 * we only need to look into NEH.
 +	 * If we reach this point the flow was previously idle, so we need
 +	 * to schedule it. This involves different actions for fixed-rate or
 +	 * WF2Q queues.
  	 */
 -	if (DN_KEY_GT(q->S, pipe->V) ) { /* not eligible */
 -	    if (pipe->scheduler_heap.elements == 0)
 -		printf("dummynet: ++ ouch! not eligible but empty scheduler!\n");
 -	    heap_insert(&(pipe->not_eligible_heap), q->S, q);
 +	if (is_pipe) {
 +		/* Fixed-rate queue: just insert into the ready_heap. */
 +		dn_key t = 0;
 +
 +		if (pipe->bandwidth && m->m_pkthdr.len * 8 * hz > q->numbytes)
 +			t = SET_TICKS(m, q, pipe);
 +		q->sched_time = curr_time;
 +		if (t == 0)		/* Must process it now. */
 +			ready_event(q, &head, &tail);
 +		else
 +			heap_insert(&ready_heap, curr_time + t , q);
  	} else {
 -	    heap_insert(&(pipe->scheduler_heap), q->F, q);
 -	    if (pipe->numbytes >= 0) { /* pipe is idle */
 -		if (pipe->scheduler_heap.elements != 1)
 -		    printf("dummynet: OUCH! pipe should have been idle!\n");
 -		DPRINTF(("dummynet: waking up pipe %d at %d\n",
 -			pipe->pipe_nr, (int)(q->F >> MY_M)));
 -		pipe->sched_time = curr_time ;
 -		ready_event_wfq(pipe, &head, &tail);
 -	    }
 +		/*
 +		 * WF2Q. First, compute start time S: if the flow was
 +		 * idle (S = F + 1) set S to the virtual time V for the
 +		 * controlling pipe, and update the sum of weights for the pipe;
 +		 * otherwise, remove flow from idle_heap and set S to max(F,V).
 +		 * Second, compute finish time F = S + len / weight.
 +		 * Third, if pipe was idle, update V = max(S, V).
 +		 * Fourth, count one more backlogged flow.
 +		 */
 +		if (DN_KEY_GT(q->S, q->F)) { /* Means timestamps are invalid. */
 +			q->S = pipe->V;
 +			pipe->sum += fs->weight; /* Add weight of new queue. */
 +		} else {
 +			heap_extract(&(pipe->idle_heap), q);
 +			q->S = MAX64(q->F, pipe->V);
 +		}
 +		q->F = q->S + (len << MY_M) / (uint64_t)fs->weight;
 +
 +		if (pipe->not_eligible_heap.elements == 0 &&
 +		    pipe->scheduler_heap.elements == 0)
 +			pipe->V = MAX64(q->S, pipe->V);
 +		fs->backlogged++;
 +		/*
 +		 * Look at eligibility. A flow is not eligibile if S>V (when
 +		 * this happens, it means that there is some other flow already
 +		 * scheduled for the same pipe, so the scheduler_heap cannot be
 +		 * empty). If the flow is not eligible we just store it in the
 +		 * not_eligible_heap. Otherwise, we store in the scheduler_heap
 +		 * and possibly invoke ready_event_wfq() right now if there is
 +		 * leftover credit.
 +		 * Note that for all flows in scheduler_heap (SCH), S_i <= V,
 +		 * and for all flows in not_eligible_heap (NEH), S_i > V.
 +		 * So when we need to compute max(V, min(S_i)) forall i in
 +		 * SCH+NEH, we only need to look into NEH.
 +		 */
 +		if (DN_KEY_GT(q->S, pipe->V)) {		/* Not eligible. */
 +			if (pipe->scheduler_heap.elements == 0)
 +				printf("dummynet: ++ ouch! not eligible but empty scheduler!\n");
 +			heap_insert(&(pipe->not_eligible_heap), q->S, q);
 +		} else {
 +			heap_insert(&(pipe->scheduler_heap), q->F, q);
 +			if (pipe->numbytes >= 0) {	 /* Pipe is idle. */
 +				if (pipe->scheduler_heap.elements != 1)
 +					printf("dummynet: OUCH! pipe should have been idle!\n");
 +				DPRINTF(("dummynet: waking up pipe %d at %d\n",
 +				    pipe->pipe_nr, (int)(q->F >> MY_M)));
 +				pipe->sched_time = curr_time;
 +				ready_event_wfq(pipe, &head, &tail);
 +			}
 +		}
  	}
 -    }
  done:
 -    DUMMYNET_UNLOCK();
 -    if (head != NULL)
 -	dummynet_send(head);
 -    return 0;
 +	if (head == m && dir != DN_TO_IFB_FWD && dir != DN_TO_ETH_DEMUX &&
 +	    dir != DN_TO_ETH_OUT) {	/* Fast io. */
 +		io_pkt_fast++;
 +		if (m->m_nextpkt != NULL)
 +			printf("dummynet: fast io: pkt chain detected!\n");
 +		head = m->m_nextpkt = NULL;
 +	} else
 +		*m0 = NULL;		/* Normal io. */
 +
 +	DUMMYNET_UNLOCK();
 +	if (head != NULL)
 +		dummynet_send(head);
 +	return (0);
  
  dropit:
 -    if (q)
 -	q->drops++ ;
 -    DUMMYNET_UNLOCK();
 -    m_freem(m);
 -    return ( (fs && (fs->flags_fs & DN_NOERROR)) ? 0 : ENOBUFS);
 +	io_pkt_drop++;
 +	if (q)
 +		q->drops++;
 +	DUMMYNET_UNLOCK();
 +	m_freem(m);
 +	*m0 = NULL;
 +	return ((fs && (fs->flags_fs & DN_NOERROR)) ? 0 : ENOBUFS);
  }
  
  /*
 @@ -1696,7 +1754,7 @@
  			/* Flush accumulated credit for all queues. */
  			for (i = 0; i <= pipe->fs.rq_size; i++)
  				for (q = pipe->fs.rq[i]; q; q = q->next)
 -					q->numbytes = 0;
 +					q->numbytes = io_fast ? p->bandwidth : 0;
  
  		pipe->bandwidth = p->bandwidth;
  		pipe->numbytes = 0;		/* just in case... */
 Index: sys/netinet/ip_fw_pfil.c
 ===================================================================
 RCS file: /home/ncvs/src/sys/netinet/ip_fw_pfil.c,v
 retrieving revision 1.25
 diff -u -r1.25 ip_fw_pfil.c
 --- sys/netinet/ip_fw_pfil.c	7 Oct 2007 20:44:23 -0000	1.25
 +++ sys/netinet/ip_fw_pfil.c	27 Mar 2008 17:19:10 -0000
 @@ -104,16 +104,6 @@
  
  	bzero(&args, sizeof(args));
  
 -	dn_tag = m_tag_find(*m0, PACKET_TAG_DUMMYNET, NULL);
 -	if (dn_tag != NULL){
 -		struct dn_pkt_tag *dt;
 -
 -		dt = (struct dn_pkt_tag *)(dn_tag+1);
 -		args.rule = dt->rule;
 -
 -		m_tag_delete(*m0, dn_tag);
 -	}
 -
  	ng_tag = (struct ng_ipfw_tag *)m_tag_locate(*m0, NGM_IPFW_COOKIE, 0,
  	    NULL);
  	if (ng_tag != NULL) {
 @@ -124,6 +114,16 @@
  	}
  
  again:
 +	dn_tag = m_tag_find(*m0, PACKET_TAG_DUMMYNET, NULL);
 +	if (dn_tag != NULL){
 +		struct dn_pkt_tag *dt;
 +
 +		dt = (struct dn_pkt_tag *)(dn_tag+1);
 +		args.rule = dt->rule;
 +
 +		m_tag_delete(*m0, dn_tag);
 +	}
 +
  	args.m = *m0;
  	args.inp = inp;
  	ipfw = ipfw_chk(&args);
 @@ -160,10 +160,11 @@
  		if (!DUMMYNET_LOADED)
  			goto drop;
  		if (mtod(*m0, struct ip *)->ip_v == 4)
 -			ip_dn_io_ptr(*m0, DN_TO_IP_IN, &args);
 +			ip_dn_io_ptr(m0, DN_TO_IP_IN, &args);
  		else if (mtod(*m0, struct ip *)->ip_v == 6)
 -			ip_dn_io_ptr(*m0, DN_TO_IP6_IN, &args);
 -		*m0 = NULL;
 +			ip_dn_io_ptr(m0, DN_TO_IP6_IN, &args);
 +		if (*m0 != NULL)
 +			goto again;
  		return 0;		/* packet consumed */
  
  	case IP_FW_TEE:
 @@ -225,16 +226,6 @@
  
  	bzero(&args, sizeof(args));
  
 -	dn_tag = m_tag_find(*m0, PACKET_TAG_DUMMYNET, NULL);
 -	if (dn_tag != NULL) {
 -		struct dn_pkt_tag *dt;
 -
 -		dt = (struct dn_pkt_tag *)(dn_tag+1);
 -		args.rule = dt->rule;
 -
 -		m_tag_delete(*m0, dn_tag);
 -	}
 -
  	ng_tag = (struct ng_ipfw_tag *)m_tag_locate(*m0, NGM_IPFW_COOKIE, 0,
  	    NULL);
  	if (ng_tag != NULL) {
 @@ -245,6 +236,16 @@
  	}
  
  again:
 +	dn_tag = m_tag_find(*m0, PACKET_TAG_DUMMYNET, NULL);
 +	if (dn_tag != NULL) {
 +		struct dn_pkt_tag *dt;
 +
 +		dt = (struct dn_pkt_tag *)(dn_tag+1);
 +		args.rule = dt->rule;
 +
 +		m_tag_delete(*m0, dn_tag);
 +	}
 +
  	args.m = *m0;
  	args.oif = ifp;
  	args.inp = inp;
 @@ -286,10 +287,11 @@
  		if (!DUMMYNET_LOADED)
  			break;
  		if (mtod(*m0, struct ip *)->ip_v == 4)
 -			ip_dn_io_ptr(*m0, DN_TO_IP_OUT, &args);
 +			ip_dn_io_ptr(m0, DN_TO_IP_OUT, &args);
  		else if (mtod(*m0, struct ip *)->ip_v == 6)
 -			ip_dn_io_ptr(*m0, DN_TO_IP6_OUT, &args);
 -		*m0 = NULL;
 +			ip_dn_io_ptr(m0, DN_TO_IP6_OUT, &args);
 +		if (*m0 != NULL)
 +			goto again;
  		return 0;		/* packet consumed */
  
  		break;
 Index: sys/net/if_bridge.c
 ===================================================================
 RCS file: /home/ncvs/src/sys/net/if_bridge.c,v
 retrieving revision 1.103.2.3
 diff -u -r1.103.2.3 if_bridge.c
 --- sys/net/if_bridge.c	21 Dec 2007 05:29:15 -0000	1.103.2.3
 +++ sys/net/if_bridge.c	27 Mar 2008 17:19:15 -0000
 @@ -2982,7 +2982,7 @@
  			 * packet will return to us via bridge_dummynet().
  			 */
  			args.oif = ifp;
 -			ip_dn_io_ptr(*mp, DN_TO_IFB_FWD, &args);
 +			ip_dn_io_ptr(mp, DN_TO_IFB_FWD, &args);
  			return (error);
  		}
  
 Index: sys/net/if_ethersubr.c
 ===================================================================
 RCS file: /home/ncvs/src/sys/net/if_ethersubr.c,v
 retrieving revision 1.236.2.1
 diff -u -r1.236.2.1 if_ethersubr.c
 --- sys/net/if_ethersubr.c	28 Oct 2007 16:24:16 -0000	1.236.2.1
 +++ sys/net/if_ethersubr.c	27 Mar 2008 17:19:18 -0000
 @@ -491,7 +491,7 @@
  			 */
  			*m0 = NULL ;
  		}
 -		ip_dn_io_ptr(m, dst ? DN_TO_ETH_OUT: DN_TO_ETH_DEMUX, &args);
 +		ip_dn_io_ptr(&m, dst ? DN_TO_ETH_OUT: DN_TO_ETH_DEMUX, &args);
  		return 0;
  	}
  	/*
 Index: sbin/ipfw/ipfw.8
 ===================================================================
 RCS file: /home/ncvs/src/sbin/ipfw/ipfw.8,v
 retrieving revision 1.203.2.1
 diff -u -r1.203.2.1 ipfw.8
 --- sbin/ipfw/ipfw.8	29 Nov 2007 18:42:15 -0000	1.203.2.1
 +++ sbin/ipfw/ipfw.8	27 Mar 2008 17:25:32 -0000
 @@ -1756,6 +1756,16 @@
  TCP connection, or from/to a given host, or entire subnet, or a
  protocol type, etc.
  .Pp
 +There are two modes of dummynet operation: normal and fast.
 +Normal mode tries to emulate real link: dummynet scheduler ensures packet will
 +not leave pipe faster than it would be on real link with given bandwidth.
 +Fast mode allows certain packets to bypass dummynet scheduler (if packet flow
 +does not exceed pipe's bandwidth). Thus fast mode requires less cpu cycles
 +per packet (in average) but packet latency can be significantly lower comparing
 +to real link with same bandwidth. Default is normal mode, fast mode can be
 +enabled by setting net.inet.ip.dummynet.io_fast sysctl(8) variable to non-zero
 +value.
 +.Pp
  Packets belonging to the same flow are then passed to either of two
  different objects, which implement the traffic regulation:
  .Bl -hang -offset XXXX
 @@ -2120,6 +2130,14 @@
  This value is used when no
  .Cm buckets
  option is specified when configuring a pipe/queue.
 +.It Em net.inet.ip.dummynet.io_fast : No 0
 +If set to non-zero value enables "fast" mode of dummynet operation (see above).
 +.It Em net.inet.ip.dummynet.io_pkt
 +Number of packets passed to by dummynet.
 +.It Em net.inet.ip.dummynet.io_pkt_drop
 +Number of packets dropped by dummynet.
 +.It Em net.inet.ip.dummynet.io_pkt_fast
 +Number of packets bypassed dummynet scheduler.
  .It Em net.inet.ip.dummynet.max_chain_len : No 16
  Target value for the maximum number of pipes/queues in a hash bucket.
  The product
 
 --rwEMma7ioTxnRzrJ--
 


More information about the freebsd-ipfw mailing list