svn commit: r202433 - user/luigi/ipfw3-head/sys/netinet/ipfw
Luigi Rizzo
luigi at FreeBSD.org
Sat Jan 16 08:23:09 UTC 2010
Author: luigi
Date: Sat Jan 16 08:23:09 2010
New Revision: 202433
URL: http://svn.freebsd.org/changeset/base/202433
Log:
import initial wf2q+ code ported by Riccardo Panicucci.
Code not working yet.
Modified:
user/luigi/ipfw3-head/sys/netinet/ipfw/dn_sched_wf2q.c
user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c
user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c
Modified: user/luigi/ipfw3-head/sys/netinet/ipfw/dn_sched_wf2q.c
==============================================================================
--- user/luigi/ipfw3-head/sys/netinet/ipfw/dn_sched_wf2q.c Sat Jan 16 08:12:22 2010 (r202432)
+++ user/luigi/ipfw3-head/sys/netinet/ipfw/dn_sched_wf2q.c Sat Jan 16 08:23:09 2010 (r202433)
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
+ * Copyright (c) 2000-2002 Luigi Rizzo, Universita` di Pisa
* All rights reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -24,7 +25,6 @@
* SUCH DAMAGE.
*/
-#ifdef _KERNEL
#include <sys/malloc.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
@@ -35,60 +35,354 @@
#include <netinet/in.h>
#include <netinet/ip_var.h> /* ipfw_rule_ref */
#include <netinet/ip_fw.h> /* flow_id */
-#else
-#include "dn_test.h"
-#endif
#include <netinet/ip_dummynet.h>
#include <netinet/ipfw/dn_heap.h>
#include <netinet/ipfw/ip_dn_private.h>
#include <netinet/ipfw/dn_sched.h>
+#ifndef MAX64
+#define MAX64(x,y) (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x)
+#endif
+
+#ifndef MY_M
+#define MY_M 16 /* shift for fixed point arithmetic */
+#endif
+
+#ifndef isdigit
+#define isdigit(x) ((x) >= '0' && (x) <= '9' )
+#endif
+
+/*
+ * Private information for the scheduler instance:
+ * sch_heap (key is Finish time) returns the next queue to serve
+ * ne_heap (key is Start time) stores not-eligible queues
+ * idle_heap (key=start/finish time) stores idle flows. It must
+ * support extract-from-middle.
+ * A flow is normally only in 1 of the three heaps.
+ * XXX todo: use a more efficient data structure, e.g. a tree sorted
+ * by F with min_subtree(S) in each node
+ */
+struct wf2qp_si {
+ struct dn_heap *sch_heap; /* top extract - key Finish time */
+ struct dn_heap *ne_heap; /* top extract - key Start time */
+ struct dn_heap *idle_heap; /* random extract - key Start=Finish time */
+ dn_key V ; /* virtual time */
+ int sum;
+};
+
+struct wf2qp_queue {
+ struct new_queue g;
+
+ dn_key S,F; /* start time, finish time */
+
+ int heap_pos; /* position (index) of struct in heap */
+// struct wf2qp_queue *next; /* next queue in the bucket */
+};
+
/*
- * This file implements a FIFO scheduler for a single queue.
- * The queue is allocated as part of the scheduler instance,
- * and there is a single flowset is in the template which stores
- * queue-related information.
- * No parameters are used except queue sizes and management policy.
- * Enqueue and dequeue use the default library functions.
+ * This file implements a WF2Q+ scheduler as it has been in dummynet
+ * since 2000.
+ * The scheduler supports per-flow queues and has O(log N) complexity.
*/
+
static int
-fifo_enqueue(struct new_sch_inst *si, struct new_queue *q, struct mbuf *m)
+wf2qp_enqueue(struct new_sch_inst *_si, struct new_queue *q, struct mbuf *m)
{
- return dn_enqueue((struct new_queue *)(si+1), m, 0);
+ struct new_fsk *fs = q->fs;
+ struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
+ struct wf2qp_queue *alg_fq;
+ uint64_t len = m->m_pkthdr.len;
+
+ int q_was_idle = 0;
+
+ if (q->mq.head == NULL)
+ q_was_idle = 1;
+
+ if (dn_enqueue(q, m, 0)) /* packet was dropped */
+ return 1;
+
+ if(!q_was_idle)
+ return 0;
+
+ /* If reach this point, queue q was idle */
+ alg_fq = (struct wf2qp_queue *)q;
+
+ if (DN_KEY_LT(alg_fq->F, alg_fq->S)) {
+ /* F<S means timestamps are invalid ->brand new queue. */
+ alg_fq->S = si->V;
+ si->sum += fs->fs.weight; /* Add weight of new queue. */
+ } else { /* if it was idle then it was in the idle heap */
+ heap_extract(si->idle_heap, q);
+ alg_fq->S = MAX64(alg_fq->F, si->V);
+ }
+ // XXX use div64
+ alg_fq->F = alg_fq->S + (len << MY_M) / (uint64_t)fs->fs.weight;
+
+ if (si->ne_heap->elements == 0 && si->sch_heap->elements == 0)
+ si->V = MAX64(alg_fq->S, si->V);
+
+ /*
+ * Look at eligibility. A flow is not eligibile if S>V (when
+ * this happens, it means that there is some other flow already
+ * scheduled for the same pipe, so the sch_heap cannot be
+ * empty). If the flow is not eligible we just store it in the
+ * ne_heap. Otherwise, we store in the sch_heap.
+ * Note that for all flows in sch_heap (SCH), S_i <= V,
+ * and for all flows in ne_heap (NEH), S_i > V.
+ * So when we need to compute max(V, min(S_i)) forall i in
+ * SCH+NEH, we only need to look into NEH.
+ */
+ if (DN_KEY_LT(si->V, alg_fq->S)) {
+ /* S>V means flow Not eligible. */
+ if (si->sch_heap->elements == 0)
+ printf("dummynet: ++ ouch! not eligible but empty scheduler!\n");
+ heap_insert(si->ne_heap, alg_fq->S, q);
+ }
+ else {
+ heap_insert(si->sch_heap, alg_fq->F, q);
+ /* Pipe *must* be idle. */
+#if 0
+ if ((si->sch_heap.elements != 1) && (s->numbytes >= 0))
+ printf("dummynet: OUCH! pipe should have been idle!\n");
+ DPRINTF(("dummynet: waking up pipe %d at %d\n",
+ s->sched_nr, (int)(alg_fq->F >> MY_M)));
+#endif
+ }
+ return 0;
}
static struct mbuf *
-fifo_dequeue(struct new_sch_inst *si)
+wf2qp_dequeue(struct new_sch_inst *_si)
+{
+ /* Access scheduler instance private data */
+ struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
+
+ struct mbuf *pkt = NULL;
+ struct new_queue *qq, *q1, *q = NULL;
+
+ struct dn_heap *sch = si->sch_heap;
+ struct dn_heap *neh = si->ne_heap;
+
+ struct wf2qp_queue *alg_fq = NULL;
+
+ uint64_t len;
+
+ /* XXX:
+ * This block in the previous version were done in the dummynet_task()
+ * function. Check if this is the appropriate function to do it.
+ */
+ if (si->idle_heap->elements > 0 &&
+ DN_KEY_LT(HEAP_TOP(si->idle_heap)->key, si->V)) {
+ qq = HEAP_TOP(si->idle_heap)->object;
+ alg_fq = (struct wf2qp_queue *) qq;
+
+ heap_extract(si->idle_heap, NULL);
+ /* Mark timestamp as invalid. */
+ /* XXX why don't delete the queue now? */
+ alg_fq->S = alg_fq->F + 1;
+ si->sum -= qq->fs->fs.weight;
+ }
+
+
+#if 0
+ if (p->if_name[0] == 0) { /* tx clock is simulated */
+ s->numbytes += (curr_time - s->sched_time) * p->bandwidth;
+ }
+ else {
+ /* tx clock is for real, the ifq must be empty or this is a NOP. */
+ if (p->ifp && p->ifp->if_snd.ifq_head != NULL)
+ return NULL;
+ else {
+ DPRINTF(("dummynet: pipe %d ready from %s --\n",
+ p->pipe_nr, p->if_name));
+ }
+ }
+#endif
+ /*
+ * While we have backlogged traffic AND credit, we need to do
+ * something on the queue.
+ * XXX cannot be a 'while --
+ */
+ while ( sch->elements > 0 || neh->elements > 0) {
+ if (sch->elements > 0) {
+ /* Have some eligible pkts to send out. */
+ q = HEAP_TOP(sch)->object;
+ alg_fq = (struct wf2qp_queue *)q;
+
+ pkt = q->mq.head;
+
+ heap_extract(sch, NULL); /* Remove queue from heap. */
+
+ si->V += (pkt->m_pkthdr.len << MY_M) / si->sum; /* Update V. */
+ alg_fq->S = alg_fq->F; /* Update start time. */
+
+ if (q->ni.length == 1) /* Flow not backlogged any more. */
+ heap_insert(si->idle_heap, alg_fq->F, q);
+ else {
+ /* Still backlogged. */
+
+ /*
+ * Update F and position in backlogged queue,
+ * then put flow in ne_heap
+ * (we will fix this later).
+ */
+ /* len = (q->head)->m_pkthdr.len; */
+ len = ((q->mq.head)->m_nextpkt)->m_pkthdr.len;
+ alg_fq->F += (len << MY_M) / (uint64_t)q->fs->fs.weight;
+ if (DN_KEY_LEQ(alg_fq->S, si->V)) {
+ heap_insert(neh, alg_fq->S, q);
+ }
+ else {
+ heap_insert(sch, alg_fq->F, q);
+ }
+ }
+ }
+ /*
+ * Now compute V = max(V, min(S_i)). Remember that all elements
+ * in sch have by definition S_i <= V so if sch is not empty,
+ * V is surely the max and we must not update it. Conversely,
+ * if sch is empty we only need to look at neh.
+ */
+ if (sch->elements == 0 && neh->elements > 0)
+ si->V = MAX64(si->V, HEAP_TOP(neh)->key);
+ /* Move from neh to sch any packets that have become eligible */
+ while (neh->elements > 0 && DN_KEY_LEQ(HEAP_TOP(neh)->key, si->V)) {
+ qq = HEAP_TOP(neh)->object;
+ alg_fq = (struct wf2qp_queue *)qq;
+ heap_extract(neh, NULL);
+ heap_insert(sch, alg_fq->F, qq);
+ }
+#if 0
+ if (p->if_name[0] != '\0') { /* Tx clock is from a real thing */
+ s->numbytes = -1; /* Mark not ready for I/O. */ /* WARNING*/
+ debMes(" %s f\n", __FUNCTION__);
+ break;
+ }
+#endif
+ if (pkt != NULL)
+ return dn_dequeue(q);
+
+ } /* end of while */
+
+ if (sch->elements == 0 && neh->elements == 0 &&
+ si->idle_heap->elements > 0) {
+ /*
+ * No traffic and no events scheduled.
+ * We can get rid of idle-heap.
+ */
+ int i;
+ int elem = si->idle_heap->elements;
+
+ for (i = 0; i < elem; i++) {
+ /* XXX NOTE: before the heap changed, this block extracted
+ * all elements
+ */
+ q1 = HEAP_TOP(si->idle_heap)->object;
+ heap_extract(si->idle_heap, NULL);
+ alg_fq = (struct wf2qp_queue *)q1;
+
+ alg_fq->F = 0;
+ alg_fq->S = alg_fq->F + 1;
+ /* XXX: why don't call delete queue? */
+ }
+ si->sum = 0;
+ si->V = 0;
+// si->idle_heap.elements = 0;
+ }
+ /*
+ * If we are getting clocks from dummynet (not a real interface) and
+ * If we are under credit, schedule the next ready event.
+ * Also fix the delivery time of the last packet.
+ */
+
+#if 0
+ if (p->if_name[0]==0 && s->numbytes < 0)
+ ...
+#endif
+ return NULL;
+}
+
+static int
+wf2qp_new_sched(struct new_sch_inst *_si)
+{
+ struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
+ int ofs = offsetof(struct wf2qp_queue, heap_pos);
+
+ /* only idle-heap supports extract from middle */
+ if (heap_init(si->idle_heap, 16, ofs) ||
+ heap_init(si->sch_heap, 16, -1) ||
+ heap_init(si->ne_heap, 16, -1)) {
+ heap_free(si->ne_heap);
+ heap_free(si->sch_heap);
+ heap_free(si->idle_heap);
+ return ENOMEM;
+ }
+ return 0;
+}
+
+static int
+wf2qp_free_sched(struct new_sch_inst *_si)
+{
+ struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
+
+ heap_free(si->sch_heap);
+ heap_free(si->ne_heap);
+ heap_free(si->idle_heap);
+
+ return 0;
+}
+
+static int
+wf2qp_new_queue (struct new_queue *q)
{
- return dn_dequeue((struct new_queue *)(si + 1));
+ struct wf2qp_queue *tmp = (struct wf2qp_queue *) q;
+
+ q->ni.oid.subtype = DN_SCHED_WF2QP;
+
+ tmp->S = tmp->F + 1; /* hack - mark timestamp as invalid. */
+
+ return 0;
}
static int
-wf2qp_new_sched(struct new_sch_inst *si)
+wf2qp_free_queue(struct new_queue *q)
{
- /* This scheduler instance contains the queue */
- struct new_queue *q = (struct new_queue *)(si + 1);
+ struct wf2qp_si *si = (struct wf2qp_si *)q->si + 1;
+ struct wf2qp_queue *alg_fq = (struct wf2qp_queue *)q;
+
+ /* If the queue was valid, decrement the sum value
+ * else this was done by dequeue()
+ */
+ if(alg_fq->S != alg_fq->F + 1)
+ si->sum -= q->fs->fs.weight;
- set_oid(&q->ni.oid, DN_QUEUE, sizeof(*q));
- q->si = si;
- q->fs = si->sched->fs;
- return 0;
+ return 0;
}
/*
- * FIFO scheduler descriptor
- * contains the type of the scheduler, the name, the size of extra
- * data structures, and function pointers.
+ * WF2Q+ scheduler descriptor
+ * contains the type of the scheduler, the name, the size of the various
+ * structures and function pointers. If a function is not implemented,
+ * the pointer is initialized to NULL
*/
-static struct dn_sched fifo_desc = {
- .type = DN_SCHED_WF2QP,
- .name = "WF2QP",
-
- .sch_inst_len = sizeof(struct new_queue),
-
- .enqueue = fifo_enqueue,
- .dequeue = fifo_dequeue,
- .new_sched = wf2qp_new_sched,
+static struct dn_sched wf2qp_desc = {
+ .type = DN_SCHED_WF2QP,
+ .name = "WF2Q+",
+ .flags = DN_MULTIQUEUE,
+
+ .sch_inst_len = sizeof(struct wf2qp_si),
+ .queue_len = sizeof(struct wf2qp_queue),
+
+ .enqueue = wf2qp_enqueue,
+ .dequeue = wf2qp_dequeue,
+
+ .new_sched = wf2qp_new_sched,
+ .free_sched = wf2qp_free_sched,
+
+ .new_queue = wf2qp_new_queue,
+ .free_queue = wf2qp_free_queue,
+
};
-DECLARE_DNSCHED_MODULE(dn_wf2qp, &fifo_desc);
+
+DECLARE_DNSCHED_MODULE(dn_wf2qp, &wf2qp_desc);
Modified: user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c
==============================================================================
--- user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c Sat Jan 16 08:12:22 2010 (r202432)
+++ user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c Sat Jan 16 08:23:09 2010 (r202433)
@@ -1,6 +1,5 @@
/*-
- * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
- * Copyright (c) 2010 Luigi Rizzo, Universita` di Pisa
+ * Copyright (c) 2010 Luigi Rizzo, Riccardo Panicucci, Universita` di Pisa
* All rights reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -305,9 +304,6 @@ extra_bits(struct mbuf *m, struct new_sc
return bits;
}
-
-
-
/*
* Send traffic from a scheduler instance due by 'now'.
* Return a pointer to the head of the queue.
@@ -338,7 +334,7 @@ serve_sched(struct mq *q, struct new_sch
while (si->credit >= 0 && (m = s->fp->dequeue(si)) != NULL) {
uint64_t len_scaled;
done++;
- len_scaled = bw == 0 ? 0 : hz *
+ len_scaled = (bw == 0) ? 0 : hz *
(m->m_pkthdr.len * 8 + extra_bits(m, s));
si->credit -= len_scaled;
/* Move packet in the delay line */
@@ -355,7 +351,7 @@ serve_sched(struct mq *q, struct new_sch
} else {
dn_key t;
KASSERT (bw > 0, ("bw=0 and credit<0 ?"));
- t = (bw - 1 - si->credit) / bw;
+ t = div64(bw - 1 - si->credit, bw);
if (m)
dn_tag_get(m)->output_time += t;
si->kflags |= DN_ACTIVE;
@@ -617,17 +613,16 @@ dummynet_io(struct mbuf **m0, int dir, s
}
/* compute the initial allowance */
- {
- struct new_pipe *pipe = &fs->sched->pipe;
- si->credit = dn_cfg.io_fast ? pipe->bandwidth : 0;
- if (pipe->burst) {
- uint64_t burst = (now - si->idle_time) *
- pipe->bandwidth;
- if (burst > pipe->burst)
- burst = pipe->burst;
+ {
+ struct new_pipe *p = &fs->sched->pipe;
+ si->credit = dn_cfg.io_fast ? p->bandwidth : 0;
+ if (p->burst) {
+ uint64_t burst = (now - si->idle_time) * p->bandwidth;
+ if (burst > p->burst)
+ burst = p->burst;
si->credit += burst;
- }
- }
+ }
+ }
/* pass through scheduler and delay line */
m = serve_sched(NULL, si, now);
Modified: user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c
==============================================================================
--- user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c Sat Jan 16 08:12:22 2010 (r202432)
+++ user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c Sat Jan 16 08:23:09 2010 (r202433)
@@ -447,14 +447,9 @@ fsk_destroy_list(struct new_fsk_head *h,
struct new_fsk *fs;
while ((fs = SLIST_FIRST(h))) {
- /* remember if the flowset is dying */
- printf("%s unlink flowset %d\n",
- __FUNCTION__, fs->fs.fs_nr);
SLIST_REMOVE_HEAD(h, sch_chain);
if (fs == int_fs) {
- /* free the internal flowset.
- * XXX maybe do it same as others
- */
+ /* free the internal flowset. */
free(fs, M_DUMMYNET);
dn_cfg.fsk_count--;
continue;
@@ -633,7 +628,6 @@ static void
update_fs(struct new_schk *s)
{
struct new_fsk *fs, *tmp;
- printf("%s XXX chech be implemented\n", __FUNCTION__);
SLIST_FOREACH_SAFE(fs, &dn_cfg.fsu, sch_chain, tmp) {
if (s->sch.sched_nr != fs->fs.fs_nr) {
printf("fs %d still unlinked\n", fs->fs.fs_nr);
@@ -645,10 +639,6 @@ update_fs(struct new_schk *s)
fs->sched = s;
SLIST_INSERT_HEAD(&s->fsk_list, fs, sch_chain);
}
-#if 0 // XXX to be completed
- scan the children of s and see if they still apply.
- scan fsunlinked and link all schedulers to s;
-#endif
}
/*
@@ -825,7 +815,10 @@ again: /* run twice, for wfq and fifo */
printf("sched %d type changed from %s to %s"
" let it drain and reallocate\n",
i, s->fp->name, fp->name);
- /* XXX detach flowsets */
+ /* mark delete so it won't be matched.
+ * XXX todo: make it die when its queues are done.
+ * XXX otherwise do a real delete.
+ */
s->kflags = DN_DELETE;
notify_fs = 1;
goto again;
@@ -879,7 +872,6 @@ config_profile(struct new_profile *pf, s
if (i <= 0 || i >= DN_MAX_ID)
return EINVAL;
/* XXX other sanity checks */
-
DUMMYNET_LOCK();
s = locate_scheduler(i);
@@ -1118,10 +1110,8 @@ dummynet_get(struct sockopt *sopt)
a.flags = to_copy;
a.type = DN_SCH;
dn_ht_scan(dn_cfg.schedhash, copy_data_helper, &a);
-#if 0 // XXX temporarily disable
a.type = DN_FS;
dn_ht_scan(dn_cfg.fshash, copy_data_helper, &a);
-#endif
}
DUMMYNET_UNLOCK();
error = sooptcopyout(sopt, start, buf - start);
More information about the svn-src-user
mailing list