svn commit: r204324 - user/luigi/ipfw3-head/sys/netinet/ipfw

Luigi Rizzo luigi at FreeBSD.org
Thu Feb 25 16:40:08 UTC 2010


Author: luigi
Date: Thu Feb 25 16:40:08 2010
New Revision: 204324
URL: http://svn.freebsd.org/changeset/base/204324

Log:
  add code to drain idle queues and schedulers

Modified:
  user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c
  user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c

Modified: user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c
==============================================================================
--- user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c	Thu Feb 25 16:39:28 2010	(r204323)
+++ user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c	Thu Feb 25 16:40:08 2010	(r204324)
@@ -540,6 +540,9 @@ dummynet_task(void *context, int pending
 			transmit_event(&q, (struct delay_line *)p, dn_cfg.curr_time);
 		}
 	}
+	dn_drain_scheduler();
+	dn_drain_queue();
+
 	DN_BH_WUNLOCK();
 	dn_reschedule();
 	if (q.head != NULL)
@@ -741,7 +744,11 @@ dummynet_io(struct mbuf **m0, int dir, s
 	m = serve_sched(NULL, si, dn_cfg.curr_time);
 
 	/* optimization -- pass it back to ipfw for immediate send */
-	if (dn_cfg.io_fast && m == *m0 && (dir & PROTO_LAYER2) == 0 ) {
+	/* XXX Don't call dummynet_send() if scheduler return the packet
+	 *     just enqueued. This avoid a lock order reversal.
+	 *     
+	 */
+	if (/*dn_cfg.io_fast &&*/ m == *m0 && (dir & PROTO_LAYER2) == 0 ) {
 		/* fast io */
 		io_pkt_fast++;
 		if (m->m_nextpkt != NULL) {

Modified: user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c
==============================================================================
--- user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c	Thu Feb 25 16:39:28 2010	(r204323)
+++ user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c	Thu Feb 25 16:40:08 2010	(r204324)
@@ -76,16 +76,6 @@ static struct callout dn_timeout;
 static struct task	dn_task;
 static struct taskqueue	*dn_tq = NULL;
 
-/*
- * XXX max_qlen is used as a temporary measure to store the
- * max size of 'struct dn_queue' plus scheduler-specific extensions.
- * This is used to determine how much space is needed on a
- * getsockopt() to copy queues up.
- * Eventually this h should go away as we only want to copy the
- * basic dn_queue.
- */
-static int max_qlen = 0;
-
 static void
 dummynet(void * __unused unused)
 {
@@ -309,6 +299,7 @@ q_new(uintptr_t key, int flags, void *ar
 		q->ni.fid = *(struct ipfw_flow_id *)key;
 	q->fs = fs;
 	q->_si = template->_si;
+	q->_si->q_count++;
 
 	if (fs->sched->fp->new_queue)
 		fs->sched->fp->new_queue(q);
@@ -330,6 +321,7 @@ dn_delete_queue(struct dn_queue *q, int 
 	/* notify the parent scheduler that the queue is going away */
 	if (fs && fs->sched->fp->free_queue)
 		fs->sched->fp->free_queue(q);
+	q->_si->q_count--;
 	q->_si = NULL;
 	if (flags & DN_DESTROY) {
 		if (q->mq.head)
@@ -576,6 +568,7 @@ fsk_new(uintptr_t key, int flags, void *
 	if (fs) {
 		set_oid(&fs->fs.oid, DN_FS, sizeof(fs->fs));
 		dn_cfg.fsk_count++;
+		fs->drain_bucket = 0;
 		SLIST_INSERT_HEAD(&dn_cfg.fsu, fs, sch_chain);
 	}
 	return fs;
@@ -707,6 +700,7 @@ schk_new(uintptr_t key, int flags, void 
 	SLIST_INIT(&s->fsk_list);
 	/* initialize the hash table or create the single instance */
 	s->fp = a->fp;	/* si_new needs this */
+	s->drain_bucket = 0;
 	if (s->sch.flags & DN_HAVE_MASK) {
 		s->siht = dn_ht_init(NULL, s->sch.buckets,
 			offsetof(struct dn_sch_inst, si_next),
@@ -796,19 +790,43 @@ copy_obj(char **start, char *end, void *
 	if (o->type == DN_LINK) {
 		/* Adjust burst parameter for link */
 		struct dn_link *l = (struct dn_link *)*start;
+		/* XXX marta: check what is meant here, div64 or what */
 		l->burst =  div64(l->burst, 8 * hz);
 	}
 	*start += o->len;
 	return 0;
 }
 
+/* Specific function to copy a queue.
+ * It copies only the common part of a queue, and correctly set
+ * the length
+ */
+static int
+copy_obj_q(char **start, char *end, void *_o, const char *msg, int i)
+{
+	struct dn_id *o = _o;
+	int have = end - *start;
+	int len = sizeof(struct dn_queue);
+
+	if (have < len || o->len == 0 || o->type != DN_QUEUE) {
+		D("ERROR type %d %s %d have %d need %d",
+			o->type, msg, i, have, len);
+		return 1;
+	}
+	ND("type %d %s %d len %d", o->type, msg, i, len);
+	bcopy(_o, *start, len);
+	((struct dn_id*)(*start))->len = len;
+	*start += len;
+	return 0;
+}
+
 static int
 copy_q_cb(void *obj, void *arg)
 {
 	struct dn_queue *q = obj;
 	struct copy_args *a = arg;
 	struct dn_flow *ni = (struct dn_flow *)(*a->start);
-        if (copy_obj(a->start, a->end, &q->ni, "queue", -1))
+        if (copy_obj_q(a->start, a->end, &q->ni, "queue", -1))
                 return DNHT_SCAN_END;
         ni->oid.type = DN_FLOW; /* override the DN_QUEUE */
         ni->oid.id = si_hash((uintptr_t)&ni->fid, 0, NULL);
@@ -1679,8 +1697,6 @@ compute_space(struct dn_id *cmd, int *to
 	 * - ipfw queue show
 	 *   (NF * dn_fs) all flowset
 	 *   (NQ * dn_queue) all queues
-	 * I use 'max_qlen' instead of sizeof(dn_queue) because
-	 *   a queue can be of variable size, so use the max queue size.
 	 */
 	switch (cmd->subtype) {
 	default:
@@ -1690,7 +1706,7 @@ compute_space(struct dn_id *cmd, int *to
 		x = DN_C_LINK | DN_C_SCH | DN_C_FLOW;
 		need += dn_cfg.schk_count *
 			(sizeof(struct dn_fs) + profile_size) / 2;
-		need += dn_cfg.si_count * max_qlen;
+		need += dn_cfg.si_count * sizeof(struct dn_queue);
 		need += dn_cfg.fsk_count * sizeof(uint32_t);
 		break;
 	case DN_SCH:	/* sched show */
@@ -1719,7 +1735,7 @@ compute_space(struct dn_id *cmd, int *to
 	}
 	/* XXX queue space might be variable */
 	if (x & DN_C_QUEUE)
-		need += dn_cfg.queue_count * max_qlen;
+		need += dn_cfg.queue_count * sizeof(struct dn_queue);
 	if (x & DN_C_FLOW)
 		need += dn_cfg.si_count * (sizeof(struct dn_flow));
 	return need;
@@ -1836,6 +1852,105 @@ dummynet_get(struct sockopt *sopt, void 
 	return error;
 }
 
+/* Callback called on scheduler instance to delete it if idle */
+static int
+drain_scheduler_cb(void *_si, void *arg)
+{
+	struct dn_sch_inst *si = _si;
+
+	if ((si->kflags & DN_ACTIVE) || si->dline.mq.head != NULL)
+		return 0;
+
+	if (si->sched->fp->flags & DN_MULTIQUEUE) {
+		if (si->q_count == 0)
+			return si_destroy(si, NULL);
+		else
+			return 0;
+	} else { /* !DN_MULTIQUEUE */
+		if ((si+1)->ni.length == 0)
+			return si_destroy(si, NULL);
+		else
+			return 0;
+	}
+	return 0; /* unreachable */
+}
+
+/* Callback called on scheduler to check if it has instances */
+static int
+drain_scheduler_sch_cb(void *_s, void *arg)
+{
+	struct dn_schk *s = _s;
+
+	if (s->sch.flags & DN_HAVE_MASK) {
+		dn_ht_scan_bucket(s->siht, &s->drain_bucket,
+				drain_scheduler_cb, NULL);
+		s->drain_bucket++;
+	} else {
+		if (s->siht) {
+			if (drain_scheduler_cb(s->siht, NULL) == DNHT_SCAN_DEL)
+				s->siht = NULL;
+		}
+	}
+	return 0;
+}
+
+/* Called every tick, try to delete a 'bucket' of scheduler */
+void
+dn_drain_scheduler(void)
+{
+	dn_ht_scan_bucket(dn_cfg.schedhash, &dn_cfg.drain_sch,
+			   drain_scheduler_sch_cb, NULL);
+	dn_cfg.drain_sch++;
+}
+
+/* Callback called on queue to delete if it is idle */
+static int
+drain_queue_cb(void *_q, void *arg)
+{
+	struct dn_queue *q = _q;
+
+	if (q->ni.length == 0) {
+		dn_delete_queue(q, DN_DESTROY);
+		return DNHT_SCAN_DEL; /* queue is deleted */
+	}
+
+	return 0; /* queue isn't deleted */
+}
+
+/* Callback called on flowset used to check if it has queues */
+static int
+drain_queue_fs_cb(void *_fs, void *arg)
+{
+	struct dn_fsk *fs = _fs;
+
+	if (fs->fs.flags & DN_QHT_HASH) {
+		/* Flowset has a hash table for queues */
+		dn_ht_scan_bucket(fs->qht, &fs->drain_bucket,
+				drain_queue_cb, NULL);
+		fs->drain_bucket++;
+	}
+	else {
+		/* No hash table for this flowset, null the pointer 
+		 * if the queue is deleted
+		 */
+		if (fs->qht) {
+			if (drain_queue_cb(fs->qht, NULL) == DNHT_SCAN_DEL)
+				fs->qht = NULL;
+		}
+	}
+	return 0;
+}
+
+/* Called every tick, try to delete a 'bucket' of queue */
+void
+dn_drain_queue(void)
+{
+	/* scan a bucket of flowset */
+	dn_ht_scan_bucket(dn_cfg.fshash, &dn_cfg.drain_fs,
+                               drain_queue_fs_cb, NULL);
+	dn_cfg.drain_fs++;
+}
+
 /*
  * Handler for the various dummynet socket options
  */
@@ -1930,6 +2045,10 @@ ip_dn_init(void)
 		offsetof(struct dn_fsk, fsk_next),
 		fsk_hash, fsk_match, fsk_new);
 
+	/* bucket index to drain object */
+	dn_cfg.drain_fs = 0;
+	dn_cfg.drain_sch = 0;
+
 	heap_init(&dn_cfg.evheap, 16, offsetof(struct dn_id, id));
 	SLIST_INIT(&dn_cfg.fsu);
 	SLIST_INIT(&dn_cfg.schedlist);
@@ -2000,17 +2119,11 @@ static int
 load_dn_sched(struct dn_alg *d)
 {
 	struct dn_alg *s;
-	int q_len = 0;
 
 	if (d == NULL)
 		return 1; /* error */
 	ip_dn_init();	/* just in case, we need the lock */
 
-	/* check the max queue lenght */
-	q_len = sizeof(struct dn_queue) + d->q_datalen;
-	if (max_qlen <= q_len) {
-		max_qlen = q_len;
-	}
 	/* Check that mandatory funcs exists */
 	if (d->enqueue == NULL || d->dequeue == NULL) {
 		D("missing enqueue or dequeue for %s", d->name);


More information about the svn-src-user mailing list