PERFORCE change 126768 for review

Marko Zec zec at FreeBSD.org
Mon Sep 24 05:16:46 PDT 2007


http://perforce.freebsd.org/chv.cgi?CH=126768

Change 126768 by zec at zec_tpx32 on 2007/09/24 12:16:00

	Refactor the locking scheme in ng_pipe so that recursing on
	a mutex no longer occurs, which is basically accomplished
	by releasing the ng_pipe_giant lock before propagating a
	mbuf / call to a downstream netgraph hook.  All ng_pipe
	state is still protected by a single mutex.
	
	Hold the ng_pipe_giant mutex during rcvmsg processing and
	during hook removal.
	
	In polling loop, restart iteration over active hooks if
	a hook was removed from this list while the loop was running,
	which is signalled by bumping a generation id.  We don't
	need to abort / restart the current loop if a node was
	added to the list during the iteration, given that the hooks
	added to the active list are inserted at list head, i.e. past
	the current iterator position.
	
	Attempt to more correctly clean up per-hook state on ng_pipe
	node removal.
	
	Staticize symbols that don't need to be globally visible.
	
	Remove an "__inline" modifier to the ip_hash() function,
	given that the compiler should be smart enough to do
	inlining automatically.
	
	Misc. whitespace / indentation cleanups, and fix a few
	comments.
	
	Suggested by: julian@, kmacy@

Affected files ...

.. //depot/projects/vimage/src/sys/netgraph/ng_pipe.c#4 edit

Differences ...

==== //depot/projects/vimage/src/sys/netgraph/ng_pipe.c#4 (text+ko) ====

@@ -82,11 +82,11 @@
 
 /* Per hook info */
 struct hookinfo {
-	hook_p	hook;
-	int			noqueue;
-	LIST_ENTRY(hookinfo)	hook_le;	/* all active instances */
-	TAILQ_HEAD(, ngp_fifo)	fifo_head;	/* this hooks's FIFO queues */
+	hook_p			hook;
+	int			noqueue;	/* bypass any processing */
+	TAILQ_HEAD(, ngp_fifo)	fifo_head;	/* FIFO queues */
 	TAILQ_HEAD(, ngp_hdr)	qout_head;	/* delay queue head */
+	LIST_ENTRY(hookinfo)	active_le;	/* active hooks */
 	struct timeval		qin_utime;
 	struct ng_pipe_hookcfg	cfg;
 	struct ng_pipe_hookrun	run;
@@ -95,16 +95,15 @@
 };
 
 /* Per node info */
-struct privdata {
+struct node_priv {
 	node_p			node;
-	LIST_ENTRY(privdata)	node_le;
 	u_int64_t		delay;
 	u_int32_t		overhead;
 	u_int32_t		header_offset;
 	struct hookinfo		lower;
 	struct hookinfo		upper;
 };
-typedef struct privdata *priv_p;
+typedef struct node_priv *priv_p;
 
 /* Macro for calculating the virtual time for packet dequeueing in WFQ */
 #define FIFO_VTIME_SORT(plen)						\
@@ -131,21 +130,19 @@
 static void	parse_cfg(struct ng_pipe_hookcfg *, struct ng_pipe_hookcfg *,
 			struct hookinfo *, priv_p);
 static void	pipe_dequeue(struct hookinfo *, struct timeval *);
-static void	pipe_scheduler(void);
+static void	pipe_scheduler(void *);
 static void	pipe_poll(void);
 static int	ngp_modevent(module_t, int, void *);
 
-/* linked list of all "pipe" nodes */
-LIST_HEAD(pipe_node_head, privdata) node_head;
-
 /* linked list of active "pipe" hooks */
-LIST_HEAD(pipe_hook_head, hookinfo) hook_head;
+static LIST_HEAD(, hookinfo) active_head;
+static int active_gen_id = 0;
 
 /* timeout handle for pipe_scheduler */
-struct callout_handle	ds_handle;
+static struct callout polling_timer;
 
 /* zone for storing ngp_hdr-s */
-uma_zone_t ngp_zone;
+static uma_zone_t ngp_zone;
 
 /* Netgraph methods */
 static ng_constructor_t	ngp_constructor;
@@ -264,22 +261,11 @@
 {
 	priv_p priv;
 
-	if (LIST_EMPTY(&node_head)) {
-		ngp_zone = uma_zcreate("ng_pipe", max(sizeof(struct ngp_hdr),
-		    sizeof (struct ngp_fifo)), NULL, NULL, NULL, NULL,
-		    UMA_ALIGN_PTR, 0);
-		if (ngp_zone == NULL)
-		    panic("ng_pipe: couldn't allocate descriptor zone");
-	}
-
 	MALLOC(priv, priv_p, sizeof(*priv), M_NG_PIPE, M_ZERO | M_NOWAIT);
 	if (priv == NULL)
 		return (ENOMEM);
 	NG_NODE_SET_PRIVATE(node, priv);
 
-	/* Add new node to the list of all pipe nodes */
-	LIST_INSERT_HEAD(&node_head, priv, node_le);
-
 	return (0);
 }
 
@@ -318,8 +304,13 @@
 	const priv_p priv = NG_NODE_PRIVATE(node);
 	struct ng_mesg *resp = NULL;
 	struct ng_mesg *msg;
+	struct ng_pipe_stats *stats;
+	struct ng_pipe_run *run;
+	struct ng_pipe_cfg *cfg;
 	int error = 0;
 
+	mtx_lock(&ng_pipe_giant);
+
 	NGI_GET_MSG(item, msg);
 	switch (msg->header.typecookie) {
 	case NGM_PIPE_COOKIE:
@@ -327,12 +318,9 @@
 		case NGM_PIPE_GET_STATS:
 		case NGM_PIPE_CLR_STATS:
 		case NGM_PIPE_GETCLR_STATS:
-                    {
-			struct ng_pipe_stats *stats;
-
-                        if (msg->header.cmd != NGM_PIPE_CLR_STATS) {
-                                NG_MKRESPONSE(resp, msg,
-                                    sizeof(*stats), M_NOWAIT);
+			if (msg->header.cmd != NGM_PIPE_CLR_STATS) {
+				NG_MKRESPONSE(resp, msg,
+				    sizeof(*stats), M_NOWAIT);
 				if (resp == NULL) {
 					error = ENOMEM;
 					break;
@@ -342,19 +330,15 @@
 				    sizeof(stats->downstream));
 				bcopy(&priv->lower.stats, &stats->upstream,
 				    sizeof(stats->upstream));
-                        }
-                        if (msg->header.cmd != NGM_PIPE_GET_STATS) {
+			}
+			if (msg->header.cmd != NGM_PIPE_GET_STATS) {
 				bzero(&priv->upper.stats,
 				    sizeof(priv->upper.stats));
 				bzero(&priv->lower.stats,
 				    sizeof(priv->lower.stats));
 			}
-                        break;
-		    }
+			break;
 		case NGM_PIPE_GET_RUN:
-                    {
-			struct ng_pipe_run *run;
-
 			NG_MKRESPONSE(resp, msg, sizeof(*run), M_NOWAIT);
 			if (resp == NULL) {
 				error = ENOMEM;
@@ -365,12 +349,8 @@
 				sizeof(run->downstream));
 			bcopy(&priv->lower.run, &run->upstream,
 				sizeof(run->upstream));
-                        break;
-		    }
+			break;
 		case NGM_PIPE_GET_CFG:
-                    {
-			struct ng_pipe_cfg *cfg;
-
 			NG_MKRESPONSE(resp, msg, sizeof(*cfg), M_NOWAIT);
 			if (resp == NULL) {
 				error = ENOMEM;
@@ -391,12 +371,8 @@
 				cfg->downstream.bandwidth = 0;
 			} else
 				cfg->bandwidth = 0;
-                        break;
-		    }
+			break;
 		case NGM_PIPE_SET_CFG:
-                    {
-			struct ng_pipe_cfg *cfg;
-
 			cfg = (struct ng_pipe_cfg *)msg->data;
 			if (msg->header.arglen != sizeof(*cfg)) {
 				error = EINVAL;
@@ -437,9 +413,7 @@
 				  &priv->upper, priv);
 			parse_cfg(&priv->lower.cfg, &cfg->upstream,
 				  &priv->lower, priv);
-                        break;
-		    }
-
+			break;
 		default:
 			error = EINVAL;
 			break;
@@ -451,6 +425,9 @@
 	}
 	NG_RESPOND_MSG(error, node, item, resp);
 	NG_FREE_MSG(msg);
+
+	mtx_unlock(&ng_pipe_giant);
+
 	return (error);
 }
 
@@ -493,7 +470,7 @@
 				p = (p*(p0&0xffff)>>48) + \
 				    (p*((p0>>16)&0xffff)>>32) + \
 				    (p*(p0>>32)>>16);
-        	}
+		}
 	}
 
 	if (new->qin_size_limit == -1)
@@ -563,7 +540,7 @@
  * NIH sindrome, so probably it would be wise to look around what other
  * folks have found out to be a good and efficient IP hash function...
  */
-__inline static int ip_hash(struct mbuf *m, int offset)
+static int ip_hash(struct mbuf *m, int offset)
 {
 	u_int64_t i;
 	struct ip *ip = (struct ip *)(mtod(m, u_char *) + offset);
@@ -605,15 +582,13 @@
 			dest = &priv->upper;
 		else
 			dest = &priv->lower;
-                NG_FWD_ITEM_HOOK(error, item, dest->hook);
+		NG_FWD_ITEM_HOOK(error, item, dest->hook);
 		return error;
 	}
 
+	mtx_lock(&ng_pipe_giant);
 	microuptime(now);
 
-	/* XXX argh... must... not... lock... everything... */
-	mtx_lock(&ng_pipe_giant);
-
 	/*
 	 * Attach us to the list of active ng_pipes if this was an empty
 	 * one before, and also update the queue service deadline time.
@@ -626,7 +601,7 @@
 			when->tv_usec = now->tv_usec;
 		}
 		if (hinfo->run.qout_frames == 0)
-			LIST_INSERT_HEAD(&hook_head, hinfo, hook_le);
+			LIST_INSERT_HEAD(&active_head, hinfo, active_le);
 	}
 
 	/* Populate the packet header */
@@ -724,10 +699,13 @@
 		hinfo->stats.in_disc_frames++;
 	}
 
-	/* Try to start the dequeuing process immediately */
+	/*
+	 * Try to start the dequeuing process immediately.  We must
+	 * hold the ng_pipe_giant lock here and pipe_dequeue() will
+	 * release it
+	 */
 	pipe_dequeue(hinfo, now);
 
-	mtx_unlock(&ng_pipe_giant);
 	return (0);
 }
 
@@ -737,13 +715,17 @@
  *  1) Try to extract the frame from the inbound (bandwidth) queue;
  *  2) In accordance to BER specified, discard the frame randomly;
  *  3) If the frame survives BER, prepend it with delay info and move it
- *     to outbound (delay) queue, or send directly to the outbound hook;
- *  4) Loop to 2) until bandwidth limit is reached, or inbound queue is
- *     flushed completely;
- *  5) Extract the first frame from the outbound queue, if it's time has come.
- *     Send this frame to the outbound hook;
- *  6) Loop to 6) until outbound queue is flushed completely, or the next
- *     frame in the queue is not scheduled to be dequeued yet
+ *     to outbound (delay) queue;
+ *  4) Loop to 2) until bandwidth quota for this timeslice is reached, or
+ *     inbound queue is flushed completely;
+ *  5) Extract the first frame from the outbound queue, if it's time has
+ *     come.  Queue the frame for transmission on the outbound hook;
+ *  6) Loop to 5) until outbound queue is flushed completely, or the next
+ *     frame in the queue is not scheduled to be dequeued yet;
+ *  7) Transimit all frames queued in 5)
+ *
+ * Note: the caller must hold the ng_pipe_giant lock; this function
+ * returns with the lock released.
  */
 static void
 pipe_dequeue(struct hookinfo *hinfo, struct timeval *now) {
@@ -753,6 +735,8 @@
 	struct ngp_fifo *ngp_f, *ngp_f1;
 	struct ngp_hdr *ngp_h;
 	struct timeval *when;
+	struct mbuf *q_head = NULL;
+	struct mbuf *q_tail = NULL;
 	struct mbuf *m;
 	int error = 0;
 
@@ -877,15 +861,32 @@
 		hinfo->run.qout_frames--;
 		hinfo->run.qout_octets -= m->m_pkthdr.len;
 
-		/* Dequeue/send the packet */
+		/* Dequeue the packet from qout */
 		TAILQ_REMOVE(&hinfo->qout_head, ngp_h, ngp_link);
 		uma_zfree(ngp_zone, ngp_h);
-		NG_SEND_DATA(error, dest->hook, m, meta); /* XXX is this safe?*/
+
+		/* Enqueue locally for sending downstream */
+		if (q_head == NULL)
+			q_head = m;
+		if (q_tail)
+			q_tail->m_nextpkt = m;
+		q_tail = m;
+		m->m_nextpkt = NULL;
 	}
 
 	/* If both queues are empty detach us from the list of active queues */
-	if (hinfo->run.qin_frames + hinfo->run.qout_frames == 0)
-		LIST_REMOVE(hinfo, hook_le);
+	if (hinfo->run.qin_frames + hinfo->run.qout_frames == 0) {
+		LIST_REMOVE(hinfo, active_le);
+		active_gen_id++;
+	}
+
+	mtx_unlock(&ng_pipe_giant);
+
+	while ((m = q_head) != NULL) {
+		q_head = m->m_nextpkt;
+		m->m_nextpkt = NULL;
+		NG_SEND_DATA(error, dest->hook, m, meta);
+	}
 }
 
 
@@ -894,12 +895,12 @@
  * for queued frames by calling pipe_dequeue().
  */
 static void
-pipe_scheduler(void)
+pipe_scheduler(void *arg)
 {
 	pipe_poll();
 
 	/* Reschedule  */
-	ds_handle = timeout((timeout_t *) &pipe_scheduler, NULL, 1);
+	callout_reset(&polling_timer, 1, &pipe_scheduler, NULL);
 }
 
 
@@ -913,13 +914,23 @@
 {
 	struct hookinfo *hinfo;
 	struct timeval now;
+	int old_gen_id = active_gen_id;
 	
+	mtx_lock(&ng_pipe_giant);
 	microuptime(&now);
-	mtx_lock(&ng_pipe_giant);
-	LIST_FOREACH(hinfo, &hook_head, hook_le) {
+	LIST_FOREACH(hinfo, &active_head, active_le) {
 		CURVNET_SET(NG_HOOK_NODE(hinfo->hook)->nd_vnet);
 		pipe_dequeue(hinfo, &now);
 		CURVNET_RESTORE();
+		mtx_lock(&ng_pipe_giant);
+		if (old_gen_id != active_gen_id) {
+			/* the list was updated; restart traversing */
+			hinfo = LIST_FIRST(&active_head);
+			if (hinfo == NULL)
+				break;
+			old_gen_id = active_gen_id;
+			continue;
+		}
 	}
 	mtx_unlock(&ng_pipe_giant);
 }
@@ -940,10 +951,13 @@
 
 	if (priv->lower.hook && priv->upper.hook)
 		ng_bypass(priv->lower.hook, priv->upper.hook);
-	LIST_REMOVE(priv, node_le);
+	else {
+		if (priv->upper.hook != NULL)
+			ng_rmhook_self(priv->upper.hook);
+		if (priv->lower.hook != NULL)
+			ng_rmhook_self(priv->lower.hook);
+	}
 	FREE(priv, M_NG_PIPE);
-	if (LIST_EMPTY(&node_head))
-		uma_zdestroy(ngp_zone);
 	return (0);
 }
 
@@ -959,6 +973,8 @@
 	struct ngp_hdr *ngp_h;
 	int removed = 0;
 
+	mtx_lock(&ng_pipe_giant);
+
 	KASSERT(hinfo != NULL, ("%s: null info", __FUNCTION__));
 	hinfo->hook = NULL;
 
@@ -986,49 +1002,48 @@
 	 * Both queues should be empty by now, so detach us from
 	 * the list of active queues
 	 */
-	if (removed)
-		LIST_REMOVE(hinfo, hook_le);
+	if (removed) {
+		LIST_REMOVE(hinfo, active_le);
+		active_gen_id++;
+	}
 	if (hinfo->run.qin_frames + hinfo->run.qout_frames != removed)
 		printf("Mismatch: queued=%d but removed=%d !?!",
-			hinfo->run.qin_frames + hinfo->run.qout_frames,
-			removed);
+		    hinfo->run.qin_frames + hinfo->run.qout_frames, removed);
 
 	/* Release the packet loss probability table (BER) */
 	if (hinfo->ber_p)
 		FREE(hinfo->ber_p, M_NG_PIPE);
 
+	mtx_unlock(&ng_pipe_giant);
+
 	return (0);
 }
 
 static int
 ngp_modevent(module_t mod, int type, void *unused)
 {
-	priv_p priv;
 	int error = 0;
 
 	switch (type) {
 	case MOD_LOAD:
-		if (ngp_zone)
-			error = EEXIST; 
-		else {
-			mtx_init(&ng_pipe_giant, "ng_pipe_giant", NULL,
-			    MTX_DEF | MTX_RECURSE);
-			LIST_INIT(&node_head);
-			LIST_INIT(&hook_head);
-			ds_handle = timeout((timeout_t *) &pipe_scheduler,
-			    NULL, 1);
-		}
+		ngp_zone = uma_zcreate("ng_pipe", max(sizeof(struct ngp_hdr),
+		    sizeof (struct ngp_fifo)), NULL, NULL, NULL, NULL,
+		    UMA_ALIGN_PTR, 0);
+		if (ngp_zone == NULL)
+			panic("ng_pipe: couldn't allocate descriptor zone");
+
+		mtx_init(&ng_pipe_giant, "ng_pipe_giant", NULL, MTX_DEF);
+		LIST_INIT(&active_head);
+		callout_init(&polling_timer, CALLOUT_MPSAFE);
+		callout_reset(&polling_timer, 1, &pipe_scheduler, NULL);
 		break;
 	case MOD_UNLOAD:
-		LIST_FOREACH(priv, &node_head, node_le)
-			error = EBUSY;
-			
-		if (error == 0)
-			mtx_destroy(&ng_pipe_giant);
-			untimeout((timeout_t *) &pipe_scheduler, NULL,
-			    ds_handle);
+		callout_drain(&polling_timer);
+		uma_zdestroy(ngp_zone);
+		mtx_destroy(&ng_pipe_giant);
 		break;
 	default:
+		error = EOPNOTSUPP;
 		break;
 	}
 


More information about the p4-projects mailing list