ports/net/click anyone ?

Luigi Rizzo rizzo at iet.unipi.it
Fri Jul 1 17:24:31 UTC 2011


[Cc to ports at freebsd,org, but please followup at net at freebsd.org]

anyone interested in taking over maintainership of ports/net/click ?
We have 1.5.0 in the tree, which is old and partly broken.
1.8.0 builds almost without problems, and the two patches i am
attaching give huge speedups for the userland version, especially
if used together with netmap. To get an idea of what you can get
on a single core i7-870 CPU with the stock version and with
these patches:

					1.8.0		With patches
    InfiniteSource -> Discard		515Kpps		18.56Mpps
    InfiniteSource -> Queue -> Discard	500Kpps		13.41Mpps

					pcap		netmap
    FromDevice->Queue->ToDevice		420Kpps		3.97 Mpps

--- Details ----

Click userland performance was never a priority given the high cost
(until now) of packet I/O. But once packet i/o has become quite fast,
it turns out that there are to other big offenders:
- the C++ memory allocator is quite expensive, and replacing it with
  thread-local freelists (Packet objects and data buffers can be made
  all with the same size) gives huge savings -- 100ns per packet or more
  even on a fast machine;

- everytime an element wants a timestamp, it calls a syscall (gettimeofday()
  or similar) which consumes another 400-800ns per call. There are many
  elements (e.g. InfiniteSource, Counter, etc.) which timestamp packets.

Attached there are a couple of patches which address these problems:

- patch-pcap	makes FromDevice and ToDevice use libpcap properly,
		supporting I/O in bursts to amortize the syscall overhead;

- patch-more
   + introduces a NOTS option for InfiniteSource to remove timestamps.
     This gives a 10x performance improvement in simple apps using InfiniteSource

   + replaces the allocator for Packet and data buffers with local freelists;
     not thread safe, but this is easy to introduce. This gives another 1.5-2x
     speed improvement after the 10x gained removing timestamps;

   + enables BURST operation in Discard, giving another 2x speed improvement

Using netmap instead of pcap is another big win, as you can see the forwarding
performance of a simple FromDevice->Queue->ToDevice chain goes up by 10x

You can find netmap at http://info.iet.unipi.it/~luigi/netmap/

cheers
luigi

v
-------------- next part --------------
diff -ur ../../work/click-1.8.0/elements/userlevel/fromdevice.cc ./elements/userlevel/fromdevice.cc
--- ../../work/click-1.8.0/elements/userlevel/fromdevice.cc	2010-02-28 21:03:33.000000000 +0100
+++ ./elements/userlevel/fromdevice.cc	2011-07-01 10:42:54.000000000 +0200
@@ -73,6 +73,7 @@
 FromDevice::configure(Vector<String> &conf, ErrorHandler *errh)
 {
     bool promisc = false, outbound = false, sniffer = true;
+    _burst = 1;
     _snaplen = 2046;
     _headroom = Packet::default_headroom;
     _headroom += (4 - (_headroom + 2) % 4) % 4; // default 4/2 alignment
@@ -89,9 +90,11 @@
 		     "BPF_FILTER", 0, cpString, &bpf_filter,
 		     "OUTBOUND", 0, cpBool, &outbound,
 		     "HEADROOM", 0, cpUnsigned, &_headroom,
+		     "BURST", 0, cpUnsigned, &_burst,
 		     "ENCAP", cpkC, &has_encap, cpWord, &encap_type,
 		     cpEnd) < 0)
 	return -1;
+    click_chatter("FromDevice(%s): burst: %d", _ifname.c_str(), _burst);
     if (_snaplen > 8190 || _snaplen < 14)
 	return errh->error("SNAPLEN out of range");
     if (_headroom > 8190)
@@ -361,6 +364,8 @@
 	    p->set_packet_type_anno(Packet::MULTICAST);
     }
 
+if (p->data() == 0 || p->length() < 16 || p->length() > 1536)
+	click_chatter("send pkt %p len %d", p->data(), p->length());
     // set annotations
     p->set_timestamp_anno(Timestamp::make_usec(pkthdr->ts.tv_sec, pkthdr->ts.tv_usec));
     p->set_mac_header(p->data());
@@ -381,10 +386,11 @@
 #if FROMDEVICE_PCAP
     if (_capture == CAPTURE_PCAP) {
 	// Read and push() at most one packet.
-	int r = pcap_dispatch(_pcap, 1, FromDevice_get_packet, (u_char *) this);
-	if (r > 0)
-	    _pcap_task.reschedule();
-	else if (r < 0 && ++_pcap_complaints < 5)
+	int r = pcap_dispatch(_pcap, _burst, FromDevice_get_packet, (u_char *) this);
+	if (r > 0) {
+	    _count += r;
+	    _pcap_task.fast_reschedule();
+	} else if (r < 0 && ++_pcap_complaints < 5)
 	    ErrorHandler::default_handler()->error("%{element}: %s", this, pcap_geterr(_pcap));
     }
 #endif
@@ -421,12 +427,13 @@
 FromDevice::run_task(Task *)
 {
     // Read and push() at most one packet.
-    int r = pcap_dispatch(_pcap, 1, FromDevice_get_packet, (u_char *) this);
-    if (r > 0)
-	_pcap_task.fast_reschedule();
-    else if (r < 0 && ++_pcap_complaints < 5)
+    int r = pcap_dispatch(_pcap, _burst, FromDevice_get_packet, (u_char *) this);
+    if (r > 0) {
+	_count += r;
+	_pcap_task.fast_reschedule();	// was fast_reschedule
+    } else if (r < 0 && ++_pcap_complaints < 5)
 	ErrorHandler::default_handler()->error("%{element}: %s", this, pcap_geterr(_pcap));
-    return r > 0;
+    return r == (int)_burst;
 }
 #endif
 
diff -ur ../../work/click-1.8.0/elements/userlevel/fromdevice.hh ./elements/userlevel/fromdevice.hh
--- ../../work/click-1.8.0/elements/userlevel/fromdevice.hh	2010-02-28 21:06:16.000000000 +0100
+++ ./elements/userlevel/fromdevice.hh	2011-06-29 23:36:10.000000000 +0200
@@ -160,6 +160,7 @@
 
     void selected(int fd);
 #if FROMDEVICE_PCAP
+    inline pcap_t *pcap() const		{ return _pcap; }
     bool run_task(Task *);
 #endif
 
@@ -202,6 +203,8 @@
     unsigned _headroom;
     enum { CAPTURE_PCAP, CAPTURE_LINUX };
     int _capture;
+
+    unsigned _burst;
 #if FROMDEVICE_PCAP
     String _bpf_filter;
 #endif
diff -ur ../../work/click-1.8.0/elements/userlevel/todevice.cc ./elements/userlevel/todevice.cc
--- ../../work/click-1.8.0/elements/userlevel/todevice.cc	2009-09-03 22:36:30.000000000 +0200
+++ ./elements/userlevel/todevice.cc	2011-07-01 10:43:53.000000000 +0200
@@ -58,7 +58,7 @@
 ToDevice::ToDevice()
   : _task(this), _timer(&_task), _fd(-1), _my_fd(false),
     _q(0),
-    _pulls(0)
+    _pulls(0), _count(0)
 {
 }
 
@@ -70,13 +70,16 @@
 ToDevice::configure(Vector<String> &conf, ErrorHandler *errh)
 {
 
+  _burst = 1;
   if (cp_va_kparse(conf, this, errh,
 		   "DEVNAME", cpkP+cpkM, cpString, &_ifname,
+		   "BURST", 0, cpUnsigned, &_burst,
 		   "DEBUG", 0, cpBool, &_debug,
 		   cpEnd) < 0)
     return -1;
   if (!_ifname)
     return errh->error("interface not set");
+  click_chatter("ToDevice %s burst %d\n", _ifname.c_str(), _burst);
   return 0;
 }
 
@@ -85,6 +88,9 @@
 {
   _timer.initialize(this);
   _fd = -1;
+#if TODEVICE_PCAP
+  _pcap = 0;
+#endif
 
 #if TODEVICE_BSD_DEV_BPF
 
@@ -117,6 +123,9 @@
     FromDevice *fdev = (FromDevice *)e->cast("FromDevice");
     if (fdev && fdev->ifname() == _ifname && fdev->fd() >= 0) {
       _fd = fdev->fd();
+# if TODEVICE_PCAP
+      _pcap = fdev->pcap();
+# endif
       _my_fd = false;
     }
   }
@@ -170,14 +179,21 @@
 bool
 ToDevice::run_task(Task *)
 {
-    Packet *p = _q;
+    unsigned count;
+    Packet *p = _q;	// previously saved packet
     _q = 0;
-    if (!p) {
-	p = input(0).pull();
-	_pulls++;
-    }
+    for (count = 0; count < _burst; count++) {
+	if (!p) {
+	    p = input(0).pull();
+	    _pulls++;
+	    if (!p) {
+		if (!_signal)
+		    return false;
+		else
+		    break;
+	    }
+	}
 
-    if (p) {
 	int retval;
 	const char *syscall;
 
@@ -187,6 +203,9 @@
 #elif TODEVICE_SEND
 	retval = send(_fd, p->data(), p->length(), 0);
 	syscall = "send";
+#elif TODEVICE_INJECT
+	retval = ((uint32_t) pcap_inject(_pcap, p->data(), p->length()) == p->length() ? 0 : -1);
+	syscall = "pcap_inject";
 #else
 	retval = 0;
 #endif
@@ -194,6 +213,7 @@
 	if (retval >= 0) {
 	    _backoff = 0;
 	    checked_output_push(0, p);
+	    _count++;
 
 	} else if (errno == ENOBUFS || errno == EAGAIN) {
 	    assert(!_q);
@@ -215,15 +235,15 @@
 	    return false;
 
 	} else {
-	    click_chatter("ToDevice(%s) %s: %s", _ifname.c_str(), syscall, strerror(errno));
+	    click_chatter("ToDevice(%s) count %d pkt %p p %p len 0x%x errno %d %s: %s",
+		_ifname.c_str(), count, p, p->data(), p->length(), errno, syscall, strerror(errno));
 	    checked_output_push(1, p);
 	}
-    }
+	p = 0;
+    } // end for
 
-    if (!p && !_signal)
-	return false;
     _task.fast_reschedule();
-    return p != 0;
+    return true; // count == _burst;
 }
 
 void
@@ -234,7 +254,7 @@
 }
 
 
-enum {H_DEBUG, H_SIGNAL, H_PULLS, H_Q};
+enum {H_DEBUG, H_SIGNAL, H_PULLS, H_Q, H_COUNT};
 
 String
 ToDevice::read_param(Element *e, void *thunk)
@@ -249,6 +269,8 @@
       return String(td->_pulls);
   case H_Q:
       return String((bool) td->_q);
+  case H_COUNT:
+      return String(td->_count);
   default:
       return String();
   }
diff -ur ../../work/click-1.8.0/elements/userlevel/todevice.hh ./elements/userlevel/todevice.hh
--- ../../work/click-1.8.0/elements/userlevel/todevice.hh	2009-09-03 22:36:30.000000000 +0200
+++ ./elements/userlevel/todevice.hh	2011-06-30 17:15:52.000000000 +0200
@@ -60,7 +60,10 @@
 extern "C" {
 # include <pcap.h>
 }
-# if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__APPLE__) || defined(__NetBSD__)
+# if defined(__FreeBSD__)
+#  define TODEVICE_PCAP 1
+#  define TODEVICE_INJECT 1
+# elif defined(__OpenBSD__) || defined(__APPLE__) || defined(__NetBSD__)
 #  define TODEVICE_BSD_DEV_BPF 1
 #  define TODEVICE_WRITE 1
 # elif defined(__sun)
@@ -100,6 +103,10 @@
   String _ifname;
   int _fd;
   bool _my_fd;
+  unsigned _burst;
+#ifdef TODEVICE_PCAP
+  pcap_t *_pcap;
+#endif
   NotifierSignal _signal;
 
 
@@ -108,6 +115,7 @@
   bool _debug;
   bool _backoff;
   int _pulls;
+  unsigned _count;
 };
 
 CLICK_ENDDECLS
-------------- next part --------------
diff -ur ../../work/click-1.8.0/elements/standard/discard.cc ./elements/standard/discard.cc
--- ../../work/click-1.8.0/elements/standard/discard.cc	2009-12-18 18:44:25.000000000 +0100
+++ ./elements/standard/discard.cc	2011-06-29 23:36:10.000000000 +0200
@@ -23,7 +23,7 @@
 CLICK_DECLS
 
 Discard::Discard()
-    : _task(this), _count(0), _active(true)
+    : _task(this), _count(0), _active(true), _burst(1)
 {
 }
 
@@ -36,8 +36,11 @@
 {
     if (cp_va_kparse(conf, this, errh,
 		     "ACTIVE", 0, cpBool, &_active,
+		     "BURST", 0, cpInteger, &_burst,
 		     cpEnd) < 0)
 	return -1;
+    if (_burst < 1 || _burst > 1000)
+	return errh->error("BURST must be 1..1000");
     if (!_active && input_is_push(0))
 	return errh->error("ACTIVE is meaningless in push context");
     return 0;
@@ -63,12 +66,17 @@
 bool
 Discard::run_task(Task *)
 {
-    Packet *p = input(0).pull();
-    if (p) {
-	_count++;
-	p->kill();
-    } else if (!_signal || !_active)
-	return false;
+    Packet *p = 0;
+    int i;
+    for (i = _burst; i > 0; i--) {
+	p = input(0).pull();
+	if (p) {
+	    _count++;
+	    p->kill();
+	} else if (!_signal || !_active)
+	    return false;
+	if (!p) break;
+    }
     _task.fast_reschedule();
     return p != 0;
 }
diff -ur ../../work/click-1.8.0/elements/standard/discard.hh ./elements/standard/discard.hh
--- ../../work/click-1.8.0/elements/standard/discard.hh	2009-12-18 18:44:25.000000000 +0100
+++ ./elements/standard/discard.hh	2011-06-29 23:36:10.000000000 +0200
@@ -76,6 +76,8 @@
 
     bool _active;
 
+    int _burst;
+
     enum { h_reset_counts = 0, h_active = 1 };
     static int write_handler(const String &, Element *, void *, ErrorHandler *);
 
diff -ur ../../work/click-1.8.0/elements/standard/infinitesource.cc ./elements/standard/infinitesource.cc
--- ../../work/click-1.8.0/elements/standard/infinitesource.cc	2009-12-02 19:47:56.000000000 +0100
+++ ./elements/standard/infinitesource.cc	2011-06-29 23:36:10.000000000 +0200
@@ -28,7 +28,7 @@
 CLICK_DECLS
 
 InfiniteSource::InfiniteSource()
-  : _packet(0), _task(this)
+  : _packet(0), _task(this), _nots(false)
 {
 }
 
@@ -63,6 +63,7 @@
 		   "BURST", cpkP, cpInteger, &burstsize,
 		   "ACTIVE", cpkP, cpBool, &active,
 		   "LENGTH", 0, cpInteger, &datasize,
+		   "NOTS", 0, cpBool, &_nots,
 		   "DATASIZE", 0, cpInteger, &datasize, // deprecated
 		   "STOP", 0, cpBool, &stop,
 		   cpEnd) < 0)
@@ -110,7 +111,8 @@
 	n = (_count > _limit ? 0 : _limit - _count);
     for (int i = 0; i < n; i++) {
 	Packet *p = _packet->clone();
-	p->timestamp_anno().assign_now();
+	if (!_nots)
+	    p->timestamp_anno().assign_now();
 	output(0).push(p);
     }
     _count += n;
@@ -137,7 +139,8 @@
     }
     _count++;
     Packet *p = _packet->clone();
-    p->timestamp_anno().assign_now();
+    if (!_nots)
+        p->timestamp_anno().assign_now();
     return p;
 }
 
diff -ur ../../work/click-1.8.0/elements/standard/infinitesource.hh ./elements/standard/infinitesource.hh
--- ../../work/click-1.8.0/elements/standard/infinitesource.hh	2009-09-03 22:36:30.000000000 +0200
+++ ./elements/standard/infinitesource.hh	2011-06-29 23:36:10.000000000 +0200
@@ -119,6 +119,7 @@
   int _datasize;
   bool _active;
   bool _stop;
+  bool _nots;
   Task _task;
   String _data;
   NotifierSignal _nonfull_signal;
diff -ur ../../work/click-1.8.0/include/click/packet.hh ./include/click/packet.hh
--- ../../work/click-1.8.0/include/click/packet.hh	2010-02-28 18:22:28.000000000 +0100
+++ ./include/click/packet.hh	2011-06-30 00:37:00.000000000 +0200
@@ -669,6 +669,30 @@
 # endif
 #endif
 
+    static void *_p_freelist;
+    static int _p_freecount, _p_limit;
+    inline void *operator new(size_t len)	{
+	if (!_p_freelist) {
+	    //fprintf(stderr, "must allocate object\n");
+	    return malloc(len);
+	} else {
+	    Packet *p = (Packet *)_p_freelist;
+	    _p_freelist = *(void **)p;
+	    _p_freecount--;
+	    return p;
+	}
+    }
+
+    inline void operator delete(void *p)                   {
+	if (_p_freecount >= _p_limit) {
+	    free(p);
+	} else {
+	    *(void **)p = _p_freelist;
+	    _p_freelist = p;
+	    _p_freecount++;
+	}
+    }
+
     inline Packet();
     Packet(const Packet &);
     ~Packet();
diff -ur ../../work/click-1.8.0/lib/packet.cc ./lib/packet.cc
--- ../../work/click-1.8.0/lib/packet.cc	2010-02-28 18:22:28.000000000 +0100
+++ ./lib/packet.cc	2011-06-30 16:45:05.000000000 +0200
@@ -178,6 +178,15 @@
  * Avoid writing buggy code like this!  Use WritablePacket selectively, and
  * try to avoid calling WritablePacket::clone() when possible. */
 
+void *Packet::_p_freelist = 0;
+int Packet::_p_freecount = 0;
+int Packet::_p_limit = 10000;
+
+static void *_pb_freelist = 0;
+static int _pb_freecount = 0;
+static int _pb_limit = 10000;
+static int _pb_alloc = 0;
+
 Packet::~Packet()
 {
     // This is a convenient place to put static assertions.
@@ -197,13 +206,21 @@
 #if CLICK_LINUXMODULE
     panic("Packet destructor");
 #else
-    if (_data_packet)
+    if (_data_packet) // XXX cloned
 	_data_packet->kill();
 # if CLICK_USERLEVEL
     else if (_head && _destructor)
 	_destructor(_head, _end - _head);
-    else
-	delete[] _head;
+    else {
+	//fprintf(stderr, "free buffer %p\n", _head);
+	if (_pb_freecount > _pb_limit) {
+		free(_head);
+	} else {
+		*(void **)_head = _pb_freelist;
+		_pb_freelist = _head;
+		_pb_freecount++;
+	}
+    }
 # elif CLICK_BSDMODULE
     if (_m)
 	m_freem(_m);
@@ -217,6 +234,7 @@
 inline WritablePacket *
 Packet::make(int, int, int)
 {
+
     return static_cast<WritablePacket *>(new Packet(6, 6, 6));
 }
 
@@ -229,7 +247,19 @@
     n = min_buffer_length;
   }
 #if CLICK_USERLEVEL
-  unsigned char *d = new unsigned char[n];
+  unsigned char *d;
+  if (n > 2047) {
+	// fprintf(stderr, "alloc size %d count %d\n", n, _pb_alloc++);
+	d = (unsigned char *)malloc(n);
+  } else if (!_pb_freelist) {
+	// if (_pb_alloc % 10000 == 0)
+	// fprintf(stderr, "cur alloc size %d count %d\n", n, _pb_alloc++);
+	d = (unsigned char *)malloc(2048);
+  } else {
+	d = (unsigned char *)_pb_freelist;
+	_pb_freelist = *(char **)_pb_freelist;
+	_pb_freecount--;
+  }
   if (!d)
     return false;
   _head = d;
@@ -494,8 +524,14 @@
 # if CLICK_USERLEVEL
     else if (_destructor)
 	_destructor(old_head, old_end - old_head);
-    else
-	delete[] old_head;
+    else {
+	if (_pb_freecount > _pb_limit) {
+		free(old_head);
+	} else {
+		*(unsigned char **)_pb_freecount = old_head;
+		_pb_freecount++;
+	}
+    }
     _destructor = 0;
 # elif CLICK_BSDMODULE
     else


More information about the freebsd-net mailing list