svn commit: r202746 - in user/luigi/ipfw3-head: sbin/ipfw
sys/netinet sys/netinet/ipfw
Luigi Rizzo
luigi at FreeBSD.org
Thu Jan 21 13:00:28 UTC 2010
Author: luigi
Date: Thu Jan 21 13:00:28 2010
New Revision: 202746
URL: http://svn.freebsd.org/changeset/base/202746
Log:
- remove stale data structures
- remove old, incorrect documentation and add correct one;
- adjust the handling of masks. When both flow_mask and sched_mask are
present, we must do the initial grouping by (flow_mask|sched_mask)
otherwise a queue might end up to two different schedulers.
TODO: find better names for 'things' -- especially the naming of
queue-related structures is very confusing.
Modified:
user/luigi/ipfw3-head/sbin/ipfw/dummynet.c
user/luigi/ipfw3-head/sys/netinet/ip_dummynet.h
user/luigi/ipfw3-head/sys/netinet/ipfw/dn_sched_wf2q.c
user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c
user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_private.h
user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c
Modified: user/luigi/ipfw3-head/sbin/ipfw/dummynet.c
==============================================================================
--- user/luigi/ipfw3-head/sbin/ipfw/dummynet.c Thu Jan 21 12:18:29 2010 (r202745)
+++ user/luigi/ipfw3-head/sbin/ipfw/dummynet.c Thu Jan 21 13:00:28 2010 (r202746)
@@ -87,12 +87,12 @@ static struct _s_x dummynet_params[] = {
#define O_NEXT(p, len) ((void *)(char *)(p) + len)
static void
-oid_fill(struct dn_id *oid, int len, int type)
+oid_fill(struct dn_id *oid, int len, int type, uintptr_t id)
{
oid->len = len;
oid->type = type;
oid->subtype = 0;
- oid->id = 0;
+ oid->id = id;
}
/* make room in the buffer and move the pointer forward */
@@ -100,7 +100,7 @@ static void *
o_next(struct dn_id **o, int len, int type)
{
struct dn_id *ret = *o;
- oid_fill(ret, len, type);
+ oid_fill(ret, len, type, 0);
*o = O_NEXT(*o, len);
return ret;
}
@@ -379,12 +379,15 @@ list_pipes(struct dn_id *oid, struct dn_
int
ipfw_delete_pipe(int pipe_or_queue, int i)
{
- struct dn_id oid;
- oid_fill(&oid, sizeof(oid), DN_CMD_DELETE);
- oid.subtype = (co.do_pipe == 1) ? DN_PIPE :
+ struct {
+ struct dn_id oid;
+ uint32_t a[1]; /* more if we want a list */
+ } cmd;
+ oid_fill((void *)&cmd, sizeof(cmd), DN_CMD_DELETE, DN_API_VERSION);
+ cmd.oid.subtype = (co.do_pipe == 1) ? DN_PIPE :
( (co.do_pipe == 2) ? DN_FS : DN_SCH);
- oid.id = i;
- i = do_cmd(IP_DUMMYNET3, &oid, oid.len);
+ cmd.a[0] = i;
+ i = do_cmd(IP_DUMMYNET3, &cmd, cmd.oid.len);
if (i) {
i = 1;
warn("rule %u: setsockopt(IP_DUMMYNET_DEL)", i);
@@ -750,13 +753,18 @@ ipfw_config_pipe(int ac, char **av)
struct new_pipe *p = NULL;
struct new_fs *fs = NULL;
struct new_profile *pf = NULL;
- struct new_cmd *cmd = NULL;
struct ipfw_flow_id *mask = NULL;
- int lmax = sizeof(*cmd); /* always present */
+ int lmax;
int _foo = 0, *flags = &_foo;
- /* worst case: 2 schedulers, 1 profile, 1 pipe, 1 flowset */
- lmax += 2*sizeof(*sch) + 2*sizeof(*p) + sizeof(*fs) + sizeof(*pf);
+ /*
+ * allocate space for 1 header,
+ * 1 scheduler, 1 pipe, 1 flowset, 1 profile
+ */
+ lmax = sizeof(struct dn_id); /* command header */
+ lmax += sizeof(struct new_sch) + sizeof(struct new_pipe) +
+ sizeof(struct new_fs) +
+ sizeof(struct new_profile);
av++; ac--;
/* Pipe number */
@@ -769,8 +777,9 @@ ipfw_config_pipe(int ac, char **av)
if (buf == NULL) {
errx(1, "no memory for pipe buffer");
}
- cmd = o_next(&buf, sizeof(*cmd), DN_CMD_CONFIGURE);
- cmd->entries = 0; /* no explicit arguments */
+ /* all commands start with a 'CONFIGURE' and a version */
+ o_next(&buf, sizeof(struct dn_id), DN_CMD_CONFIGURE);
+ base->id = DN_API_VERSION;
switch (co.do_pipe) {
case 1:
@@ -1213,7 +1222,7 @@ void
dummynet_flush(void)
{
struct dn_id oid;
- oid_fill(&oid, sizeof(oid), DN_CMD_FLUSH);
+ oid_fill(&oid, sizeof(oid), DN_CMD_FLUSH, DN_API_VERSION);
do_cmd(IP_DUMMYNET3, &oid, oid.len);
}
@@ -1225,7 +1234,7 @@ dummynet_list(int ac, char *av[], int sh
oid.type = DN_CMD_GET;
oid.len = l;
- oid.id = 0;
+ oid.id = DN_API_VERSION;
switch (co.do_pipe) {
case 1:
oid.subtype = DN_PIPE; /* list pipe */
@@ -1237,7 +1246,6 @@ dummynet_list(int ac, char *av[], int sh
oid.subtype = DN_SCH; /* list sched */
break;
}
- /* XXX we could use oid.id for the filter */
ret = do_cmd(-IP_DUMMYNET3, &oid, (uintptr_t)&l);
// printf("%s returns %d need %d\n", __FUNCTION__, ret, oid.id);
if (ret != 0 || oid.id <= sizeof(oid))
Modified: user/luigi/ipfw3-head/sys/netinet/ip_dummynet.h
==============================================================================
--- user/luigi/ipfw3-head/sys/netinet/ip_dummynet.h Thu Jan 21 12:18:29 2010 (r202745)
+++ user/luigi/ipfw3-head/sys/netinet/ip_dummynet.h Thu Jan 21 13:00:28 2010 (r202746)
@@ -36,14 +36,21 @@
* Setsockopt() and getsockopt() pass a batch of objects, each
* of them starting with a "struct dn_id" which should fully identify
* the object and its relation with others in the sequence.
- * objects in a batch of requests.
- * This struct store in the type field an identifier of the type of object
- * passed (for example a pipe, a scheduler...). The subtype
- * field contains more detail info, if needed.
+ * The first object in each request should have
+ * type= DN_CMD_*, id = DN_API_VERSION.
+ * For other objects, type and subtype specify the object, len indicates
+ * the total length including the header, and 'id' identifies the specific
+ * object.
+ *
+ * Most objects are numbered with an identifier in the range 1..65535.
+ * DN_MAX_ID indicates the first value outside the range.
*/
+#define DN_API_VERSION 12500000
+#define DN_MAX_ID 0x10000
+
struct dn_id {
- uint16_t len; /* total len including this header */
+ uint16_t len; /* total obj len including this header */
uint8_t type;
uint8_t subtype;
uintptr_t id; /* generic id or pointer */
@@ -63,10 +70,10 @@ enum {
DN_QUEUE,
DN_DELAY_LINE,
DN_PROFILE,
- DN_NI, /* new_inst */
+ DN_NI, /* struct new_inst */
//DN_FS_EXT,
//DN_QUEUE_EXT,
- DN_TEXT, /* subtype is the object */
+ DN_TEXT, /* opaque text is the object */
DN_CMD_CONFIGURE, /* objects follow */
DN_CMD_DELETE, /* subtype + list of entries */
DN_CMD_GET, /* subtype + list of entries */
@@ -82,36 +89,12 @@ enum { /* subtype for schedulers, flowse
};
enum { /* user flags */
- DN_HAVE_MASK = 0x0001,
- DN_NOERROR = 0x0002,
- DN_QSIZE_BYTES = 0x0008,
+ DN_HAVE_MASK = 0x0001, /* fs or sched has a mask */
+ DN_NOERROR = 0x0002, /* do not report errors */
+ DN_QSIZE_BYTES = 0x0008, /* queue size is in bytes */
+ DN_HAS_PROFILE = 0x0010, /* a pipe has a profile */
DN_IS_RED = 0x0020,
DN_IS_GENTLE_RED= 0x0040,
-#if 0
-#define DN_HAS_PROFILE 0x0020 /* the pipe has a delay profile. */
-#endif
-};
-
-typedef uint64_t dn_key;
-
-struct new_cmd { /* header for all sockopt */
- struct dn_id oid;
- int entries;
- uint32_t data[0]; /* actually, entries elements */
-};
-
-/* A delay profile is attached to a pipe */
-#define ED_MAX_SAMPLES_NO 1024
-struct new_profile {
- struct dn_id oid;
- /* fields to simulate a delay profile */
-#define ED_MAX_NAME_LEN 32
- char name[ED_MAX_NAME_LEN];
- int pipe_nr;
- int loss_level;
- int bandwidth;
- int samples_no;
- int samples[ED_MAX_SAMPLES_NO]; /* this has actually samples_no slots */
};
/*
@@ -187,77 +170,66 @@ struct new_sch {
};
-/*
- * "queue N" and "pipe N" accept 1<=N<=65535.
- * So valid names are from 1 to DN_MAXID-1
- */
-#define DN_MAX_ID 0x10000
+/* A delay profile is attached to a pipe */
+#define ED_MAX_SAMPLES_NO 1024
+struct new_profile {
+ struct dn_id oid;
+ /* fields to simulate a delay profile */
+#define ED_MAX_NAME_LEN 32
+ char name[ED_MAX_NAME_LEN];
+ int pipe_nr;
+ int loss_level;
+ int bandwidth;
+ int samples_no; /* actual length of samples[] */
+ int samples[ED_MAX_SAMPLES_NO]; /* may be shorter */
+};
-/*
- * The maximum hash table size for queues (unused ?)
- */
-#define DN_MAX_HASH_SIZE 65536
/*
- * Overall structure of dummynet (with WF2Q+):
+ * Overall structure of dummynet
In dummynet, packets are selected with the firewall rules, and passed
-to two different objects: PIPE or QUEUE.
+to two different objects: PIPE or QUEUE (bad name).
-A QUEUE is just a queue with configurable size and queue management
-policy. It is also associated with a mask (to discriminate among
-different flows), a weight (used to give different shares of the
-bandwidth to different flows) and a "pipe", which essentially
-supplies the transmit clock for all queues associated with that
-pipe.
-
-A PIPE emulates a fixed-bandwidth link, whose bandwidth is
-configurable. The "clock" for a pipe can come from either an
-internal timer, or from the transmit interrupt of an interface.
-A pipe is also associated with one (or more, if masks are used)
-queue, where all packets for that pipe are stored.
-
-The bandwidth available on the pipe is shared by the queues
-associated with that pipe (only one in case the packet is sent
-to a PIPE) according to the WF2Q+ scheduling algorithm and the
-configured weights.
-
-In general, incoming packets are stored in the appropriate queue,
-which is then placed into one of a few heaps managed by a scheduler
-to decide when the packet should be extracted.
-The scheduler (a function called dummynet()) is run at every timer
-tick, and grabs queues from the head of the heaps when they are
-ready for processing.
+A QUEUE defines a classifier, which groups packets into flows
+according to a 'mask', puts them into independent queues (one
+per flow) with configurable size and queue management policy,
+and passes flows to a scheduler:
+
+ (flow_mask|sched_mask) sched_mask
+ +---------+ weight Wx +-------------+
+ | |->-[flow]-->--| |-+
+ -->--| QUEUE x | ... | | |
+ | |->-[flow]-->--| SCHEDuler N | |
+ +---------+ | | |
+ ... | +--[LINK N]-->--
+ +---------+ weight Wy | | +--[LINK N]-->--
+ | |->-[flow]-->--| | |
+ -->--| QUEUE y | ... | | |
+ | |->-[flow]-->--| | |
+ +---------+ +-------------+ |
+ +-------------+
+
+Many QUEUE objects can connect to the same scheduler, each
+QUEUE object can have its own set of parameters.
+
+In turn, the SCHEDuler 'forks' multiple instances according
+to a 'sched_mask', each instance manages its own set of queues
+and transmits on a private instance of a configurable LINK.
+
+A PIPE is a simplified version of the above, where there
+is no flow_mask, and each scheduler instance handles a single queue.
There are three data structures definining a pipe and associated queues:
+ dn_pipe, which contains the main configuration parameters related
to delay and bandwidth;
- + dn_flow_set, which contains WF2Q+ configuration, flow
- masks, plr and RED configuration;
- + dn_flow_queue, which is the per-flow queue (containing the packets)
-
-Multiple dn_flow_set can be linked to the same pipe, and multiple
-dn_flow_queue can be linked to the same dn_flow_set.
-All data structures are linked in a linear list which is used for
-housekeeping purposes.
-
-During configuration, we create and initialize the dn_flow_set
-and dn_pipe structures (a dn_pipe also contains a dn_flow_set).
-
-At runtime: packets are sent to the appropriate dn_flow_set (either
-WFQ ones, or the one embedded in the dn_pipe for fixed-rate flows),
-which in turn dispatches them to the appropriate dn_flow_queue
-(created dynamically according to the masks).
-
-The transmit clock for fixed rate flows (ready_event()) selects the
-dn_flow_queue to be used to transmit the next packet. For WF2Q,
-wfq_ready_event() extract a pipe which in turn selects the right
-flow using a number of heaps defined into the pipe itself.
+ + dn_flowset, which contains flow masks, weights and queue
+ parameters;
+ + dn_flow, which contains the queue status (flow id, statistics)
*
*/
-
#endif /* _IP_DUMMYNET_H */
Modified: user/luigi/ipfw3-head/sys/netinet/ipfw/dn_sched_wf2q.c
==============================================================================
--- user/luigi/ipfw3-head/sys/netinet/ipfw/dn_sched_wf2q.c Thu Jan 21 12:18:29 2010 (r202745)
+++ user/luigi/ipfw3-head/sys/netinet/ipfw/dn_sched_wf2q.c Thu Jan 21 13:00:28 2010 (r202746)
@@ -62,13 +62,13 @@ struct wf2qp_si {
struct dn_heap sch_heap; /* top extract - key Finish time */
struct dn_heap ne_heap; /* top extract - key Start time */
struct dn_heap idle_heap; /* random extract - key Start=Finish time */
- dn_key V ; /* virtual time */
+ uint64_t V; /* virtual time */
uint32_t sum; /* sum of weights */
};
struct wf2qp_queue {
- dn_key S,F; /* start time, finish time */
- int heap_pos; /* position (index) of struct in heap */
+ uint64_t S, F; /* start time, finish time */
+ int32_t heap_pos; /* position (index) of struct in heap */
};
/*
Modified: user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c
==============================================================================
--- user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c Thu Jan 21 12:18:29 2010 (r202745)
+++ user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c Thu Jan 21 13:00:28 2010 (r202746)
@@ -66,7 +66,7 @@ __FBSDID("$FreeBSD$");
* We keep a private variable for the simulation time, but we could
* probably use an existing one ("softticks" in sys/kern/kern_timeout.c)
*/
-static dn_key curr_time = 0 ; /* current simulation time */
+static uint64_t curr_time = 0; /* current simulation time */
struct dn_parms dn_cfg = {
.pipe_slot_limit = 100, /* Foot shooting limit for pipe queues. */
@@ -176,7 +176,7 @@ struct dn_pkt_tag {
/* second part, dummynet specific */
int dn_dir; /* action when packet comes out.*/
/* see ip_fw_private.h */
- dn_key output_time; /* when the pkt is due for delivery*/
+ uint64_t output_time; /* when the pkt is due for delivery*/
struct ifnet *ifp; /* interface, for ip_output */
struct _ip6dn_args ip6opt; /* XXX ipv6 options */
};
@@ -279,7 +279,7 @@ drop:
* Runs under scheduler lock.
*/
static void
-transmit_event(struct mq *q, struct delay_line *dline, dn_key now)
+transmit_event(struct mq *q, struct delay_line *dline, uint64_t now)
{
struct mbuf *m;
struct dn_pkt_tag *pkt = NULL;
@@ -327,7 +327,7 @@ extra_bits(struct mbuf *m, struct new_sc
* Return a pointer to the head of the queue.
*/
static struct mbuf *
-serve_sched(struct mq *q, struct new_sch_inst *si, dn_key now)
+serve_sched(struct mq *q, struct new_sch_inst *si, uint64_t now)
{
struct mq def_q;
struct new_schk *s = si->sched;
@@ -367,7 +367,7 @@ serve_sched(struct mq *q, struct new_sch
if (si->credit >= 0) {
si->idle_time = now;
} else {
- dn_key t;
+ uint64_t t;
KASSERT (bw > 0, ("bw=0 and credit<0 ?"));
t = div64(bw - 1 - si->credit, bw);
if (m)
@@ -611,9 +611,8 @@ dummynet_io(struct mbuf **m0, int dir, s
if (si == NULL)
goto dropit;
/*
- * If the support multiple queues, find the right one
+ * If the scheduler supports multiple queues, find the right one
* (otherwise it will be ignored by enqueue).
- * We cannot pass si as an argument :(
*/
if (fs->sched->fp->flags & DN_MULTIQUEUE) {
q = ipdn_q_find(fs, si, &(fwa->f_id));
Modified: user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_private.h
==============================================================================
--- user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_private.h Thu Jan 21 12:18:29 2010 (r202745)
+++ user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_private.h Thu Jan 21 13:00:28 2010 (r202746)
@@ -150,6 +150,9 @@ struct delay_line {
* The kernel side of a flowset. It is linked in a hash table
* of flowsets, and in a list of children of their parent scheduler.
* qht is either the queue or (if HAVE_MASK) a hash table queues.
+ * Note that the mask to use is the (flow_mask|sched_mask), which
+ * changes as we attach/detach schedulers. So we store it here.
+ *
* XXX If we want to add scheduler-specific parameters, we need to
* put them in external storage because the scheduler may not be
* available when the fsk is created.
@@ -158,6 +161,8 @@ struct new_fsk { /* kernel side of a flo
struct new_fs fs;
SLIST_ENTRY(new_fsk) fsk_next; /* hash chain list */
+ struct ipfw_flow_id fsk_mask;
+
/* hash table of queues, or just single queue */
struct dn_ht *_qht;
struct new_schk *sched; /* Sched we are linked to */
@@ -220,8 +225,8 @@ struct new_sch_inst {
int kflags; /* DN_ACTIVE */
int64_t credit; /* bits I can transmit (more or less). */
- dn_key sched_time; /* time pipe was scheduled in ready_heap */
- dn_key idle_time; /* start of scheduler instance idle time */
+ uint64_t sched_time; /* time pipe was scheduled in ready_heap */
+ uint64_t idle_time; /* start of scheduler instance idle time */
};
/* kernel-side flags */
Modified: user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c
==============================================================================
--- user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c Thu Jan 21 12:18:29 2010 (r202745)
+++ user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c Thu Jan 21 13:00:28 2010 (r202746)
@@ -142,6 +142,33 @@ flow_id_mask(struct ipfw_flow_id *mask,
return id;
}
+/* computes an OR of two masks, result in dst and also returned */
+static struct ipfw_flow_id *
+flow_id_or(struct ipfw_flow_id *src, struct ipfw_flow_id *dst)
+{
+ int is_v6 = IS_IP6_FLOW_ID(dst);
+
+ dst->dst_port |= src->dst_port;
+ dst->src_port |= src->src_port;
+ dst->proto |= src->proto;
+ dst->flags = 0; /* we don't care about this one */
+ if (is_v6) {
+#define OR_MASK(_d, _s) \
+ (_d)->__u6_addr.__u6_addr32[0] |= (_s)->__u6_addr.__u6_addr32[0]; \
+ (_d)->__u6_addr.__u6_addr32[1] |= (_s)->__u6_addr.__u6_addr32[1]; \
+ (_d)->__u6_addr.__u6_addr32[2] |= (_s)->__u6_addr.__u6_addr32[2]; \
+ (_d)->__u6_addr.__u6_addr32[3] |= (_s)->__u6_addr.__u6_addr32[3];
+ OR_MASK(&dst->dst_ip6, &src->dst_ip6);
+ OR_MASK(&dst->src_ip6, &src->src_ip6);
+#undef OR_MASK
+ dst->flow_id6 |= src->flow_id6;
+ } else {
+ dst->dst_ip |= src->dst_ip;
+ dst->src_ip |= src->src_ip;
+ }
+ return dst;
+}
+
/* XXX we may want a better hash function */
static uint32_t
flow_id_hash(struct ipfw_flow_id *id)
@@ -338,7 +365,7 @@ ipdn_q_find(struct new_fsk *fs, struct n
return NULL;
}
masked_id = *id;
- flow_id_mask(&fs->fs.flow_mask, &masked_id);
+ flow_id_mask(&fs->fsk_mask, &masked_id);
return dn_ht_find(fs->_qht, (uintptr_t)&masked_id,
DNHT_INSERT, &template);
} else {
@@ -839,6 +866,10 @@ fsk_attach(struct new_fsk *fs, struct ne
SLIST_INSERT_HEAD(&s->fsk_list, fs, sch_chain);
if (s->fp->new_fsk)
s->fp->new_fsk(fs);
+ /* XXX compute fsk_mask */
+ fs->fsk_mask = fs->fs.flow_mask;
+ if (fs->sched->sch.flags & DN_HAVE_MASK)
+ flow_id_or(&fs->fsk_mask, &fs->sched->sch.sched_mask);
if (!fs->_qht)
return;
D("XXX TODO requeue from fs %d to sch %d",
@@ -1221,7 +1252,8 @@ dummynet_flush(void)
}
/*
- * Main handler for configuration. Rules of the game:
+ * Main handler for configuration. We are guaranteed to be called
+ * with an oid which is at least a dn_id.
* - the first object is the command (config, delete, flush, ...)
* - config_pipe must be issued after the corresponding config_sched
* - parameters (DN_TXT) for an object must preceed the object
@@ -1234,10 +1266,13 @@ do_config(void *p, int l)
int err = 0, err2 = 0;
struct dn_id *arg = NULL;
- /* XXX TODO require the first block to be a 'CONFIGURE'
- * or at least carry with a version number
- */
- for (o = p; l >= sizeof(*o); o = next) {
+ o = p;
+ if (o->id != DN_API_VERSION) {
+ D("invalid api version got %d need %d",
+ o->id, DN_API_VERSION);
+ return EINVAL;
+ }
+ for (; l >= sizeof(*o); o = next) {
struct dn_id *prev = arg;
if (o->len < sizeof(*o) || l < o->len) {
D("bad len o->len %d len %d", o->len, l);
@@ -1451,7 +1486,7 @@ ip_dn_ctl(struct sockopt *sopt)
break;
}
l = sopt->sopt_valsize;
- if (l < 0 || l > 12000) {
+ if (l < sizeof(struct dn_id) || l > 12000) {
D("argument len %d invalid", l);
break;
}
More information about the svn-src-user
mailing list