cvs commit: src/sys/net netisr.c

Robert Watson rwatson at FreeBSD.org
Wed Oct 1 15:14:27 PDT 2003


On Wed, 1 Oct 2003, Robert Watson wrote:

>   Enable net.isr.enable by default, causing "delivery to completion"
>   (direct dispatch) in interrupt threads when the netisr in question
>   isn't already active.  If a netisr is already active, or direct
>   dispatch is already in progress, we queue the packet for later
>   delivery.  Previously, this option was disabled by default.  I have
>   measured 20%+ performance improvements in IP packet forwarding with
>   this enabled.
>   
>   Please report any problems ASAP, especially relating to stack depth or
>   out-of-order packet processing.
>   
>   Discussed with: jlemon, peter
>   Sponsored by:   DARPA, Network Associates Laboratories

FYI: In my e-mail communication with Jonathan, we concluded this was safe
to commit.  Since then, two concerns have come up: 

(1) Potential lock order interactions.  I believe these are OK given the 
    current model of dropping the driver lock before entering the
    interface input routines.  However, since the ground doesn't appear to
    be firm here quite yet...

(2) Potential out-of-order delivery if a packet is delayed and ends up in
    the netisr queue, and is bypassed by a following packet which is
    delivered first by virtue of direct dispatch, which can have negative
    consequences for TCP in lossy situations if the timing is poor.

    I've attached two patches: one that simply defers the dispatch again
    if there's the potential for reordering.  The other drains the netisr
    queue before processing the new mbuf.  Both have potential downsides,
    and neither is tested.  (I'll do that when I get home this evening),
    but I'm virtually in transit as I write this :-).

I'm going to back out this change for now, but would like to merge it
again once we're sure any potential issues have been resolved.  In the
mean time, I think it would be very useful if people interested in network
performance could run with net.isr.enable enabled, and pick zero or one of
the patches to apply.  This evening I'll run lossy TCP performance
measurements with a packet sniffer and timing tweaks, and see what I bump
into.  I'm hopeful we can enable this by default shortly, as the
performance benefit is substantial. 

Robert N M Watson             FreeBSD Core Team, TrustedBSD Projects
robert at fledge.watson.org      Network Associates Laboratories

-------------- next part --------------
Index: netisr.c
===================================================================
RCS file: /home/ncvs/src/sys/net/netisr.c,v
retrieving revision 1.4
diff -u -r1.4 netisr.c
--- netisr.c	1 Oct 2003 21:31:09 -0000	1.4
+++ netisr.c	1 Oct 2003 22:11:04 -0000
@@ -118,8 +118,6 @@
     &isrstat.isrs_directed, 0, "");
 SYSCTL_INT(_net_isr, OID_AUTO, deferred, CTLFLAG_RD, 
     &isrstat.isrs_deferred, 0, "");
-SYSCTL_INT(_net_isr, OID_AUTO, bypassed, CTLFLAG_RD, 
-    &isrstat.isrs_bypassed, 0, "");
 SYSCTL_INT(_net_isr, OID_AUTO, queued, CTLFLAG_RD, 
     &isrstat.isrs_queued, 0, "");
 SYSCTL_INT(_net_isr, OID_AUTO, swi_count, CTLFLAG_RD, 
@@ -141,7 +139,8 @@
 netisr_dispatch(int num, struct mbuf *m)
 {
 	struct netisr *ni;
-	
+	int defer;
+
 	isrstat.isrs_count++;
 	KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))),
 	    ("bad isr %d", num));
@@ -151,7 +150,6 @@
 		return;
 	}
 	if (netisr_enable && mtx_trylock(&netisr_mtx)) {
-		isrstat.isrs_directed++;
 		/*
 		 * One slight problem here is that packets might bypass
 		 * each other in the stack, if an earlier one happened
@@ -162,12 +160,20 @@
 		 *	b. fallback to queueing the packet,
 		 *	c. sweep the issue under the rug and ignore it.
 		 *
-		 * Currently, we do c), and keep a rough event counter.
+		 * Currently, we do b), at the cost of having grabbed a
+		 * mutex one additional time.
 		 */
-		if (_IF_QLEN(ni->ni_queue) > 0)
-			isrstat.isrs_bypassed++;
-		ni->ni_handler(m);
+		defer = 0;
+		if (_IF_QLEN(ni->ni_queue) > 0) {
+			isrstat.isrs_deferred++;
+			defer = 1;
+		} else {
+			isrstat.isrs_directed++;
+			ni->ni_handler(m);
+		}
 		mtx_unlock(&netisr_mtx);
+		if (defer && IF_HANDOFF(ni->ni_queue, m, NULL))
+			schednetisr(num);
 	} else {
 		isrstat.isrs_deferred++;
 		if (IF_HANDOFF(ni->ni_queue, m, NULL))
-------------- next part --------------
/*-
 * Copyright (c) 2001,2002,2003 Jonathan Lemon <jlemon at FreeBSD.org>
 * Copyright (c) 1997, Stefan Esser <se at freebsd.org>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * $FreeBSD: src/sys/net/netisr.c,v 1.4 2003/10/01 21:31:09 rwatson Exp $
 */

#include <sys/param.h>
#include <sys/bus.h>
#include <sys/rtprio.h>
#include <sys/systm.h>
#include <sys/interrupt.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/proc.h>
#include <sys/random.h>
#include <sys/resourcevar.h>
#include <sys/sysctl.h>
#include <sys/unistd.h>
#include <machine/atomic.h>
#include <machine/cpu.h>
#include <machine/stdarg.h>

#include <sys/mbuf.h>
#include <sys/socket.h>

#include <net/if.h>
#include <net/if_types.h>
#include <net/if_var.h>
#include <net/netisr.h>

volatile unsigned int	netisr;	/* scheduling bits for network */

struct netisr {
	netisr_t	*ni_handler;
	struct ifqueue	*ni_queue;
} netisrs[32];

static struct mtx netisr_mtx;
static void *net_ih;

static void	swi_processqueue(struct netisr *ni);

void
legacy_setsoftnet(void)
{
	swi_sched(net_ih, 0);
}

void
netisr_register(int num, netisr_t *handler, struct ifqueue *inq)
{
	
	KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))),
	    ("bad isr %d", num));
	netisrs[num].ni_handler = handler;
	netisrs[num].ni_queue = inq;
}

void
netisr_unregister(int num)
{
	struct netisr *ni;
	int s;
	
	KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))),
	    ("bad isr %d", num));
	ni = &netisrs[num];
	ni->ni_handler = NULL;
	if (ni->ni_queue != NULL) {
		s = splimp();
		IF_DRAIN(ni->ni_queue);
		splx(s);
	}
}

struct isrstat {
	int	isrs_count;			/* dispatch count */
	int	isrs_directed;			/* ...successfully dispatched */
	int	isrs_deferred;			/* ...queued instead */
	int	isrs_queued;			/* intentionally queueued */
	int	isrs_swi_count;			/* swi_net handlers called */
};
static struct isrstat isrstat;

SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr counters");

static int	netisr_enable = 1;
SYSCTL_INT(_net_isr, OID_AUTO, enable, CTLFLAG_RW, 
    &netisr_enable, 0, "enable direct dispatch");

SYSCTL_INT(_net_isr, OID_AUTO, count, CTLFLAG_RD,
    &isrstat.isrs_count, 0, "");
SYSCTL_INT(_net_isr, OID_AUTO, directed, CTLFLAG_RD, 
    &isrstat.isrs_directed, 0, "");
SYSCTL_INT(_net_isr, OID_AUTO, deferred, CTLFLAG_RD, 
    &isrstat.isrs_deferred, 0, "");
SYSCTL_INT(_net_isr, OID_AUTO, queued, CTLFLAG_RD, 
    &isrstat.isrs_queued, 0, "");
SYSCTL_INT(_net_isr, OID_AUTO, swi_count, CTLFLAG_RD, 
    &isrstat.isrs_swi_count, 0, "");

/*
 * Call the netisr directly instead of queueing the packet, if possible.
 *
 * Ideally, the permissibility of calling the routine would be determined
 * by checking if splnet() was asserted at the time the device interrupt
 * occurred; if so, this indicates that someone is in the network stack.
 *
 * However, bus_setup_intr uses INTR_TYPE_NET, which sets splnet before
 * calling the interrupt handler, so the previous mask is unavailable.
 * Approximate this by checking intr_nesting_level instead; if any SWI
 * handlers are running, the packet is queued instead.
 */
void
netisr_dispatch(int num, struct mbuf *m)
{
	struct netisr *ni;
	
	isrstat.isrs_count++;
	KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))),
	    ("bad isr %d", num));
	ni = &netisrs[num];
	if (ni->ni_queue == NULL) {
		m_freem(m);
		return;
	}
	if (netisr_enable && mtx_trylock(&netisr_mtx)) {
		isrstat.isrs_directed++;
		/*
		 * One slight problem here is that packets might bypass
		 * each other in the stack, if an earlier one happened
		 * to get stuck in the queue.
		 *
		 * we can either:
		 *	a. drain the queue before handling this packet,
		 *	b. fallback to queueing the packet,
		 *	c. sweep the issue under the rug and ignore it.
		 *
		 * Currently, we do a).  Previously we did c).
		 */
		swi_processqueue(ni);
		ni->ni_handler(m);
		mtx_unlock(&netisr_mtx);
	} else {
		isrstat.isrs_deferred++;
		if (IF_HANDOFF(ni->ni_queue, m, NULL))
			schednetisr(num);
	}
}

/*
 * Same as above, but always queue.
 * This is either used in places where we are not confident that
 * direct dispatch is possible, or where queueing is required.
 */
int
netisr_queue(int num, struct mbuf *m)
{
	struct netisr *ni;
	
	KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))),
	    ("bad isr %d", num));
	ni = &netisrs[num];
	if (ni->ni_queue == NULL) {
		m_freem(m);
		return (1);
	}
	isrstat.isrs_queued++;
	if (!IF_HANDOFF(ni->ni_queue, m, NULL))
		return (0);
	schednetisr(num);
	return (1);
}

static void
swi_processqueue(struct netisr *ni)
{
	struct mbuf *m;

	for (;;) {
		IF_DEQUEUE(ni->ni_queue, m);
		if (m == NULL)
			break;
		ni->ni_handler(m);
	}
}

static void
swi_net(void *dummy)
{
	struct netisr *ni;
	u_int bits;
	int i;
#ifdef DEVICE_POLLING
	const int polling = 1;
#else
	const int polling = 0;
#endif

	mtx_lock(&netisr_mtx);
	do {
		bits = atomic_readandclear_int(&netisr);
		if (bits == 0)
			break;
		while ((i = ffs(bits)) != 0) {
			isrstat.isrs_swi_count++;
			i--;
			bits &= ~(1 << i);
			ni = &netisrs[i];
			if (ni->ni_handler == NULL) {
				printf("swi_net: unregistered isr %d.\n", i);
				continue;
			}
			if (ni->ni_queue == NULL)
				ni->ni_handler(NULL);
			else
				swi_processqueue(ni);
		}
	} while (polling);
	mtx_unlock(&netisr_mtx);
}

static void
start_netisr(void *dummy)
{

	mtx_init(&netisr_mtx, "netisr lock", NULL, MTX_DEF);
	if (swi_add(NULL, "net", swi_net, NULL, SWI_NET, 0, &net_ih))
		panic("start_netisr");
}
SYSINIT(start_netisr, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_netisr, NULL)


More information about the cvs-all mailing list