svn commit: r243453 - projects/counters/sys/netinet

Gleb Smirnoff glebius at FreeBSD.org
Fri Nov 23 14:00:27 UTC 2012


Author: glebius
Date: Fri Nov 23 14:00:26 2012
New Revision: 243453
URL: http://svnweb.freebsd.org/changeset/base/243453

Log:
    Collect IP statistics in per-cpu 64-bit counters. This way we shoot
  two hares with one shot:
  
    - Parallel threads no longer invalidate the cache lines where
      old struct ipstat resided.
    - Parallel non-atomic writes no longer lose statistics.
  
    Old 'struct ipstat' left only as interface to userland, however
  all fields converted to uint64_t. Yes, this break ABI on 32-bit arches,
  but now statistics will not overflow.
    Old 'struct ipstat' was imported as SYSCTL_STRUCT(... CTLFLAG_RW ...),
  thus could not be only zeroed, but filled in with fake values. This is
  no longer possible - any attempt to write to statictics zeroes them,
  without accepting userland supplied info.

Modified:
  projects/counters/sys/netinet/ip_input.c
  projects/counters/sys/netinet/ip_var.h

Modified: projects/counters/sys/netinet/ip_input.c
==============================================================================
--- projects/counters/sys/netinet/ip_input.c	Fri Nov 23 13:55:38 2012	(r243452)
+++ projects/counters/sys/netinet/ip_input.c	Fri Nov 23 14:00:26 2012	(r243453)
@@ -153,11 +153,6 @@ VNET_DEFINE(struct in_ifaddrhead, in_ifa
 VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table  */
 VNET_DEFINE(u_long, in_ifaddrhmask);		/* mask for hash table */
 
-VNET_DEFINE(struct ipstat, ipstat);
-SYSCTL_VNET_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW,
-    &VNET_NAME(ipstat), ipstat,
-    "IP statistics (struct ipstat, netinet/ip_var.h)");
-
 static VNET_DEFINE(uma_zone_t, ipq_zone);
 static VNET_DEFINE(TAILQ_HEAD(ipqhead, ipq), ipq[IPREASS_NHASH]);
 static struct mtx ipqlock;
@@ -213,6 +208,175 @@ SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, 
 static void	ip_freef(struct ipqhead *, struct ipq *);
 
 /*
+ * ipstat
+ * XXXGL: more words here.
+ */
+VNET_DEFINE(struct ipstat_p, ipstatp);
+
+static void
+ipstat_zero()
+{
+	counter_u64_zero(V_ipstatp.ips_total);
+	counter_u64_zero(V_ipstatp.ips_badsum);
+	counter_u64_zero(V_ipstatp.ips_tooshort);
+	counter_u64_zero(V_ipstatp.ips_toosmall);
+	counter_u64_zero(V_ipstatp.ips_badhlen);
+	counter_u64_zero(V_ipstatp.ips_badlen);
+	counter_u64_zero(V_ipstatp.ips_fragments);
+	counter_u64_zero(V_ipstatp.ips_fragdropped);
+	counter_u64_zero(V_ipstatp.ips_fragtimeout);
+	counter_u64_zero(V_ipstatp.ips_forward);
+	counter_u64_zero(V_ipstatp.ips_fastforward);
+	counter_u64_zero(V_ipstatp.ips_cantforward);
+	counter_u64_zero(V_ipstatp.ips_redirectsent);
+	counter_u64_zero(V_ipstatp.ips_noproto);
+	counter_u64_zero(V_ipstatp.ips_delivered);
+	counter_u64_zero(V_ipstatp.ips_localout);
+	counter_u64_zero(V_ipstatp.ips_odropped);
+	counter_u64_zero(V_ipstatp.ips_reassembled);
+	counter_u64_zero(V_ipstatp.ips_fragmented);
+	counter_u64_zero(V_ipstatp.ips_ofragments);
+	counter_u64_zero(V_ipstatp.ips_cantfrag);
+	counter_u64_zero(V_ipstatp.ips_badoptions);
+	counter_u64_zero(V_ipstatp.ips_noroute);
+	counter_u64_zero(V_ipstatp.ips_badvers);
+	counter_u64_zero(V_ipstatp.ips_rawout);
+	counter_u64_zero(V_ipstatp.ips_toolong);
+	counter_u64_zero(V_ipstatp.ips_notmember);
+	counter_u64_zero(V_ipstatp.ips_nogif);
+	counter_u64_zero(V_ipstatp.ips_badaddr);
+}
+
+static void
+vnet_ipstatp_init(const void *unused)
+{
+
+	V_ipstatp.ips_total = counter_u64_alloc(M_WAITOK);
+	V_ipstatp.ips_badsum = counter_u64_alloc(M_WAITOK);
+	V_ipstatp.ips_tooshort = counter_u64_alloc(M_WAITOK);
+	V_ipstatp.ips_toosmall = counter_u64_alloc(M_WAITOK);
+	V_ipstatp.ips_badhlen = counter_u64_alloc(M_WAITOK);
+	V_ipstatp.ips_badlen = counter_u64_alloc(M_WAITOK);
+	V_ipstatp.ips_fragments = counter_u64_alloc(M_WAITOK);
+	V_ipstatp.ips_fragdropped = counter_u64_alloc(M_WAITOK);
+	V_ipstatp.ips_fragtimeout = counter_u64_alloc(M_WAITOK);
+	V_ipstatp.ips_forward = counter_u64_alloc(M_WAITOK);
+	V_ipstatp.ips_fastforward = counter_u64_alloc(M_WAITOK);
+	V_ipstatp.ips_cantforward = counter_u64_alloc(M_WAITOK);
+	V_ipstatp.ips_redirectsent = counter_u64_alloc(M_WAITOK);
+	V_ipstatp.ips_noproto = counter_u64_alloc(M_WAITOK);
+	V_ipstatp.ips_delivered = counter_u64_alloc(M_WAITOK);
+	V_ipstatp.ips_localout = counter_u64_alloc(M_WAITOK);
+	V_ipstatp.ips_odropped = counter_u64_alloc(M_WAITOK);
+	V_ipstatp.ips_reassembled = counter_u64_alloc(M_WAITOK);
+	V_ipstatp.ips_fragmented = counter_u64_alloc(M_WAITOK);
+	V_ipstatp.ips_ofragments = counter_u64_alloc(M_WAITOK);
+	V_ipstatp.ips_cantfrag = counter_u64_alloc(M_WAITOK);
+	V_ipstatp.ips_badoptions = counter_u64_alloc(M_WAITOK);
+	V_ipstatp.ips_noroute = counter_u64_alloc(M_WAITOK);
+	V_ipstatp.ips_badvers = counter_u64_alloc(M_WAITOK);
+	V_ipstatp.ips_rawout = counter_u64_alloc(M_WAITOK);
+	V_ipstatp.ips_toolong = counter_u64_alloc(M_WAITOK);
+	V_ipstatp.ips_notmember = counter_u64_alloc(M_WAITOK);
+	V_ipstatp.ips_nogif = counter_u64_alloc(M_WAITOK);
+	V_ipstatp.ips_badaddr = counter_u64_alloc(M_WAITOK);
+
+	ipstat_zero();
+}
+VNET_SYSINIT(vnet_ipstatp_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+            vnet_ipstatp_init, NULL);
+
+#ifdef VIMAGE
+static void
+vnet_ipstatp_uninit(const void *unused)
+{
+
+	counter_u64_free(V_ipstatp.ips_total);
+	counter_u64_free(V_ipstatp.ips_badsum);
+	counter_u64_free(V_ipstatp.ips_tooshort);
+	counter_u64_free(V_ipstatp.ips_toosmall);
+	counter_u64_free(V_ipstatp.ips_badhlen);
+	counter_u64_free(V_ipstatp.ips_badlen);
+	counter_u64_free(V_ipstatp.ips_fragments);
+	counter_u64_free(V_ipstatp.ips_fragdropped);
+	counter_u64_free(V_ipstatp.ips_fragtimeout);
+	counter_u64_free(V_ipstatp.ips_forward);
+	counter_u64_free(V_ipstatp.ips_fastforward);
+	counter_u64_free(V_ipstatp.ips_cantforward);
+	counter_u64_free(V_ipstatp.ips_redirectsent);
+	counter_u64_free(V_ipstatp.ips_noproto);
+	counter_u64_free(V_ipstatp.ips_delivered);
+	counter_u64_free(V_ipstatp.ips_localout);
+	counter_u64_free(V_ipstatp.ips_odropped);
+	counter_u64_free(V_ipstatp.ips_reassembled);
+	counter_u64_free(V_ipstatp.ips_fragmented);
+	counter_u64_free(V_ipstatp.ips_ofragments);
+	counter_u64_free(V_ipstatp.ips_cantfrag);
+	counter_u64_free(V_ipstatp.ips_badoptions);
+	counter_u64_free(V_ipstatp.ips_noroute);
+	counter_u64_free(V_ipstatp.ips_badvers);
+	counter_u64_free(V_ipstatp.ips_rawout);
+	counter_u64_free(V_ipstatp.ips_toolong);
+	counter_u64_free(V_ipstatp.ips_notmember);
+	counter_u64_free(V_ipstatp.ips_nogif);
+	counter_u64_free(V_ipstatp.ips_badaddr);
+}
+VNET_SYSUNINIT(vnet_ipstatp_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+            vnet_ipstatp_uninit, NULL);
+#endif /* VIMAGE */
+
+static int
+ipstat_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct ipstat ipstat;
+
+	ipstat.ips_total = counter_u64_fetch(V_ipstatp.ips_total);
+	ipstat.ips_badsum = counter_u64_fetch(V_ipstatp.ips_badsum);
+	ipstat.ips_tooshort = counter_u64_fetch(V_ipstatp.ips_tooshort);
+	ipstat.ips_toosmall = counter_u64_fetch(V_ipstatp.ips_toosmall);
+	ipstat.ips_badhlen = counter_u64_fetch(V_ipstatp.ips_badhlen);
+	ipstat.ips_badlen = counter_u64_fetch(V_ipstatp.ips_badlen);
+	ipstat.ips_fragments = counter_u64_fetch(V_ipstatp.ips_fragments);
+	ipstat.ips_fragdropped = counter_u64_fetch(V_ipstatp.ips_fragdropped);
+	ipstat.ips_fragtimeout = counter_u64_fetch(V_ipstatp.ips_fragtimeout);
+	ipstat.ips_forward = counter_u64_fetch(V_ipstatp.ips_forward);
+	ipstat.ips_fastforward = counter_u64_fetch(V_ipstatp.ips_fastforward);
+	ipstat.ips_cantforward = counter_u64_fetch(V_ipstatp.ips_cantforward);
+	ipstat.ips_redirectsent = counter_u64_fetch(V_ipstatp.ips_redirectsent);
+	ipstat.ips_noproto = counter_u64_fetch(V_ipstatp.ips_noproto);
+	ipstat.ips_delivered = counter_u64_fetch(V_ipstatp.ips_delivered);
+	ipstat.ips_localout = counter_u64_fetch(V_ipstatp.ips_localout);
+	ipstat.ips_odropped = counter_u64_fetch(V_ipstatp.ips_odropped);
+	ipstat.ips_reassembled = counter_u64_fetch(V_ipstatp.ips_reassembled);
+	ipstat.ips_fragmented = counter_u64_fetch(V_ipstatp.ips_fragmented);
+	ipstat.ips_ofragments = counter_u64_fetch(V_ipstatp.ips_ofragments);
+	ipstat.ips_cantfrag = counter_u64_fetch(V_ipstatp.ips_cantfrag);
+	ipstat.ips_badoptions = counter_u64_fetch(V_ipstatp.ips_badoptions);
+	ipstat.ips_noroute = counter_u64_fetch(V_ipstatp.ips_noroute);
+	ipstat.ips_badvers = counter_u64_fetch(V_ipstatp.ips_badvers);
+	ipstat.ips_rawout = counter_u64_fetch(V_ipstatp.ips_rawout);
+	ipstat.ips_toolong = counter_u64_fetch(V_ipstatp.ips_toolong);
+	ipstat.ips_notmember = counter_u64_fetch(V_ipstatp.ips_notmember);
+	ipstat.ips_nogif = counter_u64_fetch(V_ipstatp.ips_nogif);
+	ipstat.ips_badaddr = counter_u64_fetch(V_ipstatp.ips_badaddr);
+
+	/*
+	 * Old interface allowed to rewrite 'struct ipstat', and netstat(1)
+	 * used it to zero the structure. To keep compatibility with old
+	 * netstat(1) we will zero out statistics on every write attempt,
+	 * however we no longer support writing arbitrary fake values to
+	 * the statistics.
+	 */
+	if (req->newptr)
+		ipstat_zero();
+
+	return (SYSCTL_OUT(req, &ipstat, sizeof(ipstat)));
+}
+
+SYSCTL_VNET_PROC(_net_inet_ip, IPCTL_STATS, stats, CTLTYPE_OPAQUE | CTLFLAG_RW,
+    NULL, 0, ipstat_sysctl, "I",
+    "IP statistics (struct ipstat, netinet/ip_var.h)");
+/*
  * Kernel module interface for updating ipstat.  The argument is an index
  * into ipstat treated as an array of u_long.  While this encodes the general
  * layout of ipstat into the caller, it doesn't encode its location, so that
@@ -223,14 +387,14 @@ void
 kmod_ipstat_inc(int statnum)
 {
 
-	(*((u_long *)&V_ipstat + statnum))++;
+	counter_u64_inc((counter_u64_t )&V_ipstatp + statnum, 1);
 }
 
 void
 kmod_ipstat_dec(int statnum)
 {
 
-	(*((u_long *)&V_ipstat + statnum))--;
+	counter_u64_dec((counter_u64_t )&V_ipstatp + statnum, 1);
 }
 
 static int

Modified: projects/counters/sys/netinet/ip_var.h
==============================================================================
--- projects/counters/sys/netinet/ip_var.h	Fri Nov 23 13:55:38 2012	(r243452)
+++ projects/counters/sys/netinet/ip_var.h	Fri Nov 23 14:00:26 2012	(r243453)
@@ -97,47 +97,83 @@ struct ip_moptions {
 };
 
 struct	ipstat {
-	u_long	ips_total;		/* total packets received */
-	u_long	ips_badsum;		/* checksum bad */
-	u_long	ips_tooshort;		/* packet too short */
-	u_long	ips_toosmall;		/* not enough data */
-	u_long	ips_badhlen;		/* ip header length < data size */
-	u_long	ips_badlen;		/* ip length < ip header length */
-	u_long	ips_fragments;		/* fragments received */
-	u_long	ips_fragdropped;	/* frags dropped (dups, out of space) */
-	u_long	ips_fragtimeout;	/* fragments timed out */
-	u_long	ips_forward;		/* packets forwarded */
-	u_long	ips_fastforward;	/* packets fast forwarded */
-	u_long	ips_cantforward;	/* packets rcvd for unreachable dest */
-	u_long	ips_redirectsent;	/* packets forwarded on same net */
-	u_long	ips_noproto;		/* unknown or unsupported protocol */
-	u_long	ips_delivered;		/* datagrams delivered to upper level*/
-	u_long	ips_localout;		/* total ip packets generated here */
-	u_long	ips_odropped;		/* lost packets due to nobufs, etc. */
-	u_long	ips_reassembled;	/* total packets reassembled ok */
-	u_long	ips_fragmented;		/* datagrams successfully fragmented */
-	u_long	ips_ofragments;		/* output fragments created */
-	u_long	ips_cantfrag;		/* don't fragment flag was set, etc. */
-	u_long	ips_badoptions;		/* error in option processing */
-	u_long	ips_noroute;		/* packets discarded due to no route */
-	u_long	ips_badvers;		/* ip version != 4 */
-	u_long	ips_rawout;		/* total raw ip packets generated */
-	u_long	ips_toolong;		/* ip length > max ip packet size */
-	u_long	ips_notmember;		/* multicasts for unregistered grps */
-	u_long	ips_nogif;		/* no match gif found */
-	u_long	ips_badaddr;		/* invalid address on header */
+	uint64_t ips_total;		/* total packets received */
+	uint64_t ips_badsum;		/* checksum bad */
+	uint64_t ips_tooshort;		/* packet too short */
+	uint64_t ips_toosmall;		/* not enough data */
+	uint64_t ips_badhlen;		/* ip header length < data size */
+	uint64_t ips_badlen;		/* ip length < ip header length */
+	uint64_t ips_fragments;		/* fragments received */
+	uint64_t ips_fragdropped;	/* frags dropped (dups, out of space) */
+	uint64_t ips_fragtimeout;	/* fragments timed out */
+	uint64_t ips_forward;		/* packets forwarded */
+	uint64_t ips_fastforward;	/* packets fast forwarded */
+	uint64_t ips_cantforward;	/* packets rcvd for unreachable dest */
+	uint64_t ips_redirectsent;	/* packets forwarded on same net */
+	uint64_t ips_noproto;		/* unknown or unsupported protocol */
+	uint64_t ips_delivered;		/* datagrams delivered to upper level*/
+	uint64_t ips_localout;		/* total ip packets generated here */
+	uint64_t ips_odropped;		/* lost packets due to nobufs, etc. */
+	uint64_t ips_reassembled;	/* total packets reassembled ok */
+	uint64_t ips_fragmented;	/* datagrams successfully fragmented */
+	uint64_t ips_ofragments;	/* output fragments created */
+	uint64_t ips_cantfrag;		/* don't fragment flag was set, etc. */
+	uint64_t ips_badoptions;		/* error in option processing */
+	uint64_t ips_noroute;		/* packets discarded due to no route */
+	uint64_t ips_badvers;		/* ip version != 4 */
+	uint64_t ips_rawout;		/* total raw ip packets generated */
+	uint64_t ips_toolong;		/* ip length > max ip packet size */
+	uint64_t ips_notmember;		/* multicasts for unregistered grps */
+	uint64_t ips_nogif;		/* no match gif found */
+	uint64_t ips_badaddr;		/* invalid address on header */
 };
 
 #ifdef _KERNEL
 
+#include <sys/counter.h>
 #include <net/vnet.h>
 
+/* Should match 'struct ipstat' above. */
+struct ipstat_p {
+	counter_u64_t ips_total;
+	counter_u64_t ips_badsum;
+	counter_u64_t ips_tooshort;
+	counter_u64_t ips_toosmall;
+	counter_u64_t ips_badhlen;
+	counter_u64_t ips_badlen;
+	counter_u64_t ips_fragments;
+	counter_u64_t ips_fragdropped;
+	counter_u64_t ips_fragtimeout;
+	counter_u64_t ips_forward;
+	counter_u64_t ips_fastforward;
+	counter_u64_t ips_cantforward;
+	counter_u64_t ips_redirectsent;
+	counter_u64_t ips_noproto;
+	counter_u64_t ips_delivered;
+	counter_u64_t ips_localout;
+	counter_u64_t ips_odropped;
+	counter_u64_t ips_reassembled;
+	counter_u64_t ips_fragmented;
+	counter_u64_t ips_ofragments;
+	counter_u64_t ips_cantfrag;
+	counter_u64_t ips_badoptions;
+	counter_u64_t ips_noroute;
+	counter_u64_t ips_badvers;
+	counter_u64_t ips_rawout;
+	counter_u64_t ips_toolong;
+	counter_u64_t ips_notmember;
+	counter_u64_t ips_nogif;
+	counter_u64_t ips_badaddr;
+};
+VNET_DECLARE(struct ipstat_p, ipstatp);
+#define	V_ipstatp VNET(ipstatp)
+
 /*
  * In-kernel consumers can use these accessor macros directly to update
  * stats.
  */
-#define	IPSTAT_ADD(name, val)	V_ipstat.name += (val)
-#define	IPSTAT_SUB(name, val)	V_ipstat.name -= (val)
+#define	IPSTAT_ADD(name, val)	counter_u64_inc(V_ipstatp.name, (val))
+#define	IPSTAT_SUB(name, val)	counter_u64_dec(V_ipstatp.name, (val))
 #define	IPSTAT_INC(name)	IPSTAT_ADD(name, 1)
 #define	IPSTAT_DEC(name)	IPSTAT_SUB(name, 1)
 
@@ -146,10 +182,10 @@ struct	ipstat {
  */
 void	kmod_ipstat_inc(int statnum);
 #define	KMOD_IPSTAT_INC(name)						\
-	kmod_ipstat_inc(offsetof(struct ipstat, name) / sizeof(u_long))
+	kmod_ipstat_inc(offsetof(struct ipstat_p, name) / sizeof(counter_u64_t))
 void	kmod_ipstat_dec(int statnum);
 #define	KMOD_IPSTAT_DEC(name)						\
-	kmod_ipstat_dec(offsetof(struct ipstat, name) / sizeof(u_long))
+	kmod_ipstat_dec(offsetof(struct ipstat_p, name) / sizeof(counter_u64_t))
 
 /* flags passed to ip_output as last parameter */
 #define	IP_FORWARDING		0x1		/* most of ip header exists */
@@ -176,7 +212,6 @@ struct inpcb;
 struct route;
 struct sockopt;
 
-VNET_DECLARE(struct ipstat, ipstat);
 VNET_DECLARE(u_short, ip_id);			/* ip packet ctr, for ids */
 VNET_DECLARE(int, ip_defttl);			/* default IP ttl */
 VNET_DECLARE(int, ipforwarding);		/* ip forwarding */
@@ -192,7 +227,6 @@ VNET_DECLARE(int, rsvp_on);
 VNET_DECLARE(int, drop_redirect);
 extern struct	pr_usrreqs rip_usrreqs;
 
-#define	V_ipstat		VNET(ipstat)
 #define	V_ip_id			VNET(ip_id)
 #define	V_ip_defttl		VNET(ip_defttl)
 #define	V_ipforwarding		VNET(ipforwarding)


More information about the svn-src-projects mailing list