svn commit: r312379 - in head: lib/libc/sys sbin/ifconfig sys/conf sys/kern sys/modules/if_lagg sys/modules/if_vlan sys/net sys/netinet sys/netinet6 sys/sys

Gleb Smirnoff glebius at FreeBSD.org
Wed Jan 18 17:31:40 UTC 2017


  Hi!

  I'm quite disappointed by this checkin. This is an interesting experimental
feature, but to my knowledge, there were no production testing of the feature,
that will prove that the idea actually works. To my knowledge the code isn't
used anywhere right now, it was just tested to work as a concept. At the same
time it introduces yet another socket to interface layering violation, bloats
the ifnet structure and other intrusive things.

Please correct me if I am wrong.

On Wed, Jan 18, 2017 at 01:31:17PM +0000, Hans Petter Selasky wrote:
H> Author: hselasky
H> Date: Wed Jan 18 13:31:17 2017
H> New Revision: 312379
H> URL: https://svnweb.freebsd.org/changeset/base/312379
H> 
H> Log:
H>   Implement kernel support for hardware rate limited sockets.
H>   
H>   - Add RATELIMIT kernel configuration keyword which must be set to
H>   enable the new functionality.
H>   
H>   - Add support for hardware driven, Receive Side Scaling, RSS aware, rate
H>   limited sendqueues and expose the functionality through the already
H>   established SO_MAX_PACING_RATE setsockopt(). The API support rates in
H>   the range from 1 to 4Gbytes/s which are suitable for regular TCP and
H>   UDP streams. The setsockopt(2) manual page has been updated.
H>   
H>   - Add rate limit function callback API to "struct ifnet" which supports
H>   the following operations: if_snd_tag_alloc(), if_snd_tag_modify(),
H>   if_snd_tag_query() and if_snd_tag_free().
H>   
H>   - Add support to ifconfig to view, set and clear the IFCAP_TXRTLMT
H>   flag, which tells if a network driver supports rate limiting or not.
H>   
H>   - This patch also adds support for rate limiting through VLAN and LAGG
H>   intermediate network devices.
H>   
H>   - How rate limiting works:
H>   
H>   1) The userspace application calls setsockopt() after accepting or
H>   making a new connection to set the rate which is then stored in the
H>   socket structure in the kernel. Later on when packets are transmitted
H>   a check is made in the transmit path for rate changes. A rate change
H>   implies a non-blocking ifp->if_snd_tag_alloc() call will be made to the
H>   destination network interface, which then sets up a custom sendqueue
H>   with the given rate limitation parameter. A "struct m_snd_tag" pointer is
H>   returned which serves as a "snd_tag" hint in the m_pkthdr for the
H>   subsequently transmitted mbufs.
H>   
H>   2) When the network driver sees the "m->m_pkthdr.snd_tag" different
H>   from NULL, it will move the packets into a designated rate limited sendqueue
H>   given by the snd_tag pointer. It is up to the individual drivers how the rate
H>   limited traffic will be rate limited.
H>   
H>   3) Route changes are detected by the NIC drivers in the ifp->if_transmit()
H>   routine when the ifnet pointer in the incoming snd_tag mismatches the
H>   one of the network interface. The network adapter frees the mbuf and
H>   returns EAGAIN which causes the ip_output() to release and clear the send
H>   tag. Upon next ip_output() a new "snd_tag" will be tried allocated.
H>   
H>   4) When the PCB is detached the custom sendqueue will be released by a
H>   non-blocking ifp->if_snd_tag_free() call to the currently bound network
H>   interface.
H>   
H>   Reviewed by:		wblock (manpages), adrian, gallatin, scottl (network)
H>   Differential Revision:	https://reviews.freebsd.org/D3687
H>   Sponsored by:		Mellanox Technologies
H>   MFC after:		3 months
H> 
H> Modified:
H>   head/lib/libc/sys/getsockopt.2
H>   head/sbin/ifconfig/ifconfig.8
H>   head/sbin/ifconfig/ifconfig.c
H>   head/sys/conf/NOTES
H>   head/sys/conf/config.mk
H>   head/sys/conf/kern.opts.mk
H>   head/sys/conf/options
H>   head/sys/kern/uipc_socket.c
H>   head/sys/modules/if_lagg/Makefile
H>   head/sys/modules/if_vlan/Makefile
H>   head/sys/net/ieee8023ad_lacp.c
H>   head/sys/net/ieee8023ad_lacp.h
H>   head/sys/net/if.h
H>   head/sys/net/if_dead.c
H>   head/sys/net/if_lagg.c
H>   head/sys/net/if_var.h
H>   head/sys/net/if_vlan.c
H>   head/sys/netinet/in_pcb.c
H>   head/sys/netinet/in_pcb.h
H>   head/sys/netinet/ip_output.c
H>   head/sys/netinet6/ip6_output.c
H>   head/sys/sys/mbuf.h
H>   head/sys/sys/socket.h
H>   head/sys/sys/socketvar.h
H> 
H> Modified: head/lib/libc/sys/getsockopt.2
H> ==============================================================================
H> --- head/lib/libc/sys/getsockopt.2	Wed Jan 18 13:27:24 2017	(r312378)
H> +++ head/lib/libc/sys/getsockopt.2	Wed Jan 18 13:31:17 2017	(r312379)
H> @@ -28,7 +28,7 @@
H>  .\"     @(#)getsockopt.2	8.4 (Berkeley) 5/2/95
H>  .\" $FreeBSD$
H>  .\"
H> -.Dd April 5, 2013
H> +.Dd January 18, 2017
H>  .Dt GETSOCKOPT 2
H>  .Os
H>  .Sh NAME
H> @@ -188,6 +188,7 @@ The following options are recognized in
H>  .It Dv SO_LISTENINCQLEN Ta "get incomplete queue length of the socket (get only)"
H>  .It Dv SO_USER_COOKIE Ta "set the 'so_user_cookie' value for the socket (uint32_t, set only)"
H>  .It Dv SO_TS_CLOCK Ta "set specific format of timestamp returned by SO_TIMESTAMP"
H> +.It Dv SO_MAX_PACING_RATE "set the maximum transmit rate in bytes per second for the socket"
H>  .El
H>  .Pp
H>  .Dv SO_DEBUG
H> @@ -515,6 +516,10 @@ returns the maximal number of queued con
H>  returns the number of unaccepted complete connections.
H>  .Dv SO_LISTENINCQLEN
H>  returns the number of unaccepted incomplete connections.
H> +.Pp
H> +.Dv SO_MAX_PACING_RATE
H> +instruct the socket and underlying network adapter layers to limit the
H> +transfer rate to the given unsigned 32-bit value in bytes per second.
H>  .Sh RETURN VALUES
H>  .Rv -std
H>  .Sh ERRORS
H> 
H> Modified: head/sbin/ifconfig/ifconfig.8
H> ==============================================================================
H> --- head/sbin/ifconfig/ifconfig.8	Wed Jan 18 13:27:24 2017	(r312378)
H> +++ head/sbin/ifconfig/ifconfig.8	Wed Jan 18 13:31:17 2017	(r312379)
H> @@ -28,7 +28,7 @@
H>  .\"     From: @(#)ifconfig.8	8.3 (Berkeley) 1/5/94
H>  .\" $FreeBSD$
H>  .\"
H> -.Dd September 17, 2016
H> +.Dd January 18, 2017
H>  .Dt IFCONFIG 8
H>  .Os
H>  .Sh NAME
H> @@ -460,6 +460,8 @@ this directive is used to select between
H>  and 802.11g
H>  .Pq Cm 11g
H>  operating modes.
H> +.It Cm txrtlmt
H> +Set if the driver supports TX rate limiting.
H>  .It Cm inst Ar minst , Cm instance Ar minst
H>  Set the media instance to
H>  .Ar minst .
H> 
H> Modified: head/sbin/ifconfig/ifconfig.c
H> ==============================================================================
H> --- head/sbin/ifconfig/ifconfig.c	Wed Jan 18 13:27:24 2017	(r312378)
H> +++ head/sbin/ifconfig/ifconfig.c	Wed Jan 18 13:31:17 2017	(r312379)
H> @@ -1145,7 +1145,7 @@ unsetifdescr(const char *val, int value,
H>  "\020\1RXCSUM\2TXCSUM\3NETCONS\4VLAN_MTU\5VLAN_HWTAGGING\6JUMBO_MTU\7POLLING" \
H>  "\10VLAN_HWCSUM\11TSO4\12TSO6\13LRO\14WOL_UCAST\15WOL_MCAST\16WOL_MAGIC" \
H>  "\17TOE4\20TOE6\21VLAN_HWFILTER\23VLAN_HWTSO\24LINKSTATE\25NETMAP" \
H> -"\26RXCSUM_IPV6\27TXCSUM_IPV6"
H> +"\26RXCSUM_IPV6\27TXCSUM_IPV6\31TXRTLMT"
H>  
H>  /*
H>   * Print the status of the interface.  If an address family was
H> @@ -1453,6 +1453,8 @@ static struct cmd basic_cmds[] = {
H>  	DEF_CMD("-wol_mcast",	-IFCAP_WOL_MCAST,	setifcap),
H>  	DEF_CMD("wol_magic",	IFCAP_WOL_MAGIC,	setifcap),
H>  	DEF_CMD("-wol_magic",	-IFCAP_WOL_MAGIC,	setifcap),
H> +	DEF_CMD("txrtlmt",	IFCAP_TXRTLMT,	setifcap),
H> +	DEF_CMD("-txrtlmt",	-IFCAP_TXRTLMT,	setifcap),
H>  	DEF_CMD("normal",	-IFF_LINK0,	setifflags),
H>  	DEF_CMD("compress",	IFF_LINK0,	setifflags),
H>  	DEF_CMD("noicmp",	IFF_LINK1,	setifflags),
H> 
H> Modified: head/sys/conf/NOTES
H> ==============================================================================
H> --- head/sys/conf/NOTES	Wed Jan 18 13:27:24 2017	(r312378)
H> +++ head/sys/conf/NOTES	Wed Jan 18 13:31:17 2017	(r312379)
H> @@ -619,6 +619,8 @@ options 	HWPMC_HOOKS		# Other necessary 
H>  options 	INET			#Internet communications protocols
H>  options 	INET6			#IPv6 communications protocols
H>  
H> +options		RATELIMIT		# TX rate limiting support
H> +
H>  options 	ROUTETABLES=2		# allocated fibs up to 65536. default is 1.
H>  					# but that would be a bad idea as they are large.
H>  
H> 
H> Modified: head/sys/conf/config.mk
H> ==============================================================================
H> --- head/sys/conf/config.mk	Wed Jan 18 13:27:24 2017	(r312378)
H> +++ head/sys/conf/config.mk	Wed Jan 18 13:31:17 2017	(r312379)
H> @@ -19,6 +19,10 @@ opt_inet.h:
H>  opt_inet6.h:
H>  	@echo "#define INET6 1" > ${.TARGET}
H>  .endif
H> +.if ${MK_RATELIMIT} != "no"
H> +opt_ratelimit.h:
H> +	@echo "#define RATELIMIT 1" > ${.TARGET}
H> +.endif
H>  .if ${MK_EISA} != "no"
H>  opt_eisa.h:
H>  	@echo "#define DEV_EISA 1" > ${.TARGET}
H> 
H> Modified: head/sys/conf/kern.opts.mk
H> ==============================================================================
H> --- head/sys/conf/kern.opts.mk	Wed Jan 18 13:27:24 2017	(r312378)
H> +++ head/sys/conf/kern.opts.mk	Wed Jan 18 13:31:17 2017	(r312379)
H> @@ -48,6 +48,7 @@ __DEFAULT_NO_OPTIONS = \
H>      EXTRA_TCP_STACKS \
H>      NAND \
H>      OFED \
H> +    RATELIMIT \
H>      REPRODUCIBLE_BUILD
H>  
H>  # Some options are totally broken on some architectures. We disable
H> 
H> Modified: head/sys/conf/options
H> ==============================================================================
H> --- head/sys/conf/options	Wed Jan 18 13:27:24 2017	(r312378)
H> +++ head/sys/conf/options	Wed Jan 18 13:31:17 2017	(r312379)
H> @@ -412,6 +412,7 @@ BOOTP_NFSV3		opt_bootp.h
H>  BOOTP_WIRED_TO		opt_bootp.h
H>  DEVICE_POLLING
H>  DUMMYNET		opt_ipdn.h
H> +RATELIMIT		opt_ratelimit.h
H>  INET			opt_inet.h
H>  INET6			opt_inet6.h
H>  IPDIVERT
H> 
H> Modified: head/sys/kern/uipc_socket.c
H> ==============================================================================
H> --- head/sys/kern/uipc_socket.c	Wed Jan 18 13:27:24 2017	(r312378)
H> +++ head/sys/kern/uipc_socket.c	Wed Jan 18 13:31:17 2017	(r312379)
H> @@ -2699,6 +2699,14 @@ sosetopt(struct socket *so, struct socko
H>  			so->so_ts_clock = optval;
H>  			break;
H>  
H> +		case SO_MAX_PACING_RATE:
H> +			error = sooptcopyin(sopt, &val32, sizeof(val32),
H> +			    sizeof(val32));
H> +			if (error)
H> +				goto bad;
H> +			so->so_max_pacing_rate = val32;
H> +			break;
H> +
H>  		default:
H>  			if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0)
H>  				error = hhook_run_socket(so, sopt,
H> @@ -2890,6 +2898,10 @@ integer:
H>  			optval = so->so_ts_clock;
H>  			goto integer;
H>  
H> +		case SO_MAX_PACING_RATE:
H> +			optval = so->so_max_pacing_rate;
H> +			goto integer;
H> +
H>  		default:
H>  			if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0)
H>  				error = hhook_run_socket(so, sopt,
H> 
H> Modified: head/sys/modules/if_lagg/Makefile
H> ==============================================================================
H> --- head/sys/modules/if_lagg/Makefile	Wed Jan 18 13:27:24 2017	(r312378)
H> +++ head/sys/modules/if_lagg/Makefile	Wed Jan 18 13:31:17 2017	(r312379)
H> @@ -2,6 +2,6 @@
H>  
H>  .PATH:	${.CURDIR}/../../net
H>  KMOD=	if_lagg
H> -SRCS=	if_lagg.c ieee8023ad_lacp.c opt_inet.h opt_inet6.h
H> +SRCS=	if_lagg.c ieee8023ad_lacp.c opt_inet.h opt_inet6.h opt_ratelimit.h
H>  
H>  .include <bsd.kmod.mk>
H> 
H> Modified: head/sys/modules/if_vlan/Makefile
H> ==============================================================================
H> --- head/sys/modules/if_vlan/Makefile	Wed Jan 18 13:27:24 2017	(r312378)
H> +++ head/sys/modules/if_vlan/Makefile	Wed Jan 18 13:31:17 2017	(r312379)
H> @@ -4,6 +4,6 @@
H>  
H>  KMOD=	if_vlan
H>  SRCS=	if_vlan.c
H> -SRCS+=	opt_inet.h opt_vlan.h
H> +SRCS+=	opt_inet.h opt_vlan.h opt_ratelimit.h
H>  
H>  .include <bsd.kmod.mk>
H> 
H> Modified: head/sys/net/ieee8023ad_lacp.c
H> ==============================================================================
H> --- head/sys/net/ieee8023ad_lacp.c	Wed Jan 18 13:27:24 2017	(r312378)
H> +++ head/sys/net/ieee8023ad_lacp.c	Wed Jan 18 13:31:17 2017	(r312379)
H> @@ -30,6 +30,8 @@
H>  #include <sys/cdefs.h>
H>  __FBSDID("$FreeBSD$");
H>  
H> +#include "opt_ratelimit.h"
H> +
H>  #include <sys/param.h>
H>  #include <sys/callout.h>
H>  #include <sys/eventhandler.h>
H> @@ -853,6 +855,35 @@ lacp_select_tx_port(struct lagg_softc *s
H>  
H>  	return (lp->lp_lagg);
H>  }
H> +
H> +#ifdef RATELIMIT
H> +struct lagg_port *
H> +lacp_select_tx_port_by_hash(struct lagg_softc *sc, uint32_t flowid)
H> +{
H> +	struct lacp_softc *lsc = LACP_SOFTC(sc);
H> +	struct lacp_portmap *pm;
H> +	struct lacp_port *lp;
H> +	uint32_t hash;
H> +
H> +	if (__predict_false(lsc->lsc_suppress_distributing)) {
H> +		LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__));
H> +		return (NULL);
H> +	}
H> +
H> +	pm = &lsc->lsc_pmap[lsc->lsc_activemap];
H> +	if (pm->pm_count == 0) {
H> +		LACP_DPRINTF((NULL, "%s: no active aggregator\n", __func__));
H> +		return (NULL);
H> +	}
H> +
H> +	hash = flowid >> sc->flowid_shift;
H> +	hash %= pm->pm_count;
H> +	lp = pm->pm_map[hash];
H> +
H> +	return (lp->lp_lagg);
H> +}
H> +#endif
H> +
H>  /*
H>   * lacp_suppress_distributing: drop transmit packets for a while
H>   * to preserve packet ordering.
H> 
H> Modified: head/sys/net/ieee8023ad_lacp.h
H> ==============================================================================
H> --- head/sys/net/ieee8023ad_lacp.h	Wed Jan 18 13:27:24 2017	(r312378)
H> +++ head/sys/net/ieee8023ad_lacp.h	Wed Jan 18 13:31:17 2017	(r312379)
H> @@ -284,6 +284,9 @@ struct lacp_softc {
H>  
H>  struct mbuf	*lacp_input(struct lagg_port *, struct mbuf *);
H>  struct lagg_port *lacp_select_tx_port(struct lagg_softc *, struct mbuf *);
H> +#ifdef RATELIMIT
H> +struct lagg_port *lacp_select_tx_port_by_hash(struct lagg_softc *, uint32_t);
H> +#endif
H>  void		lacp_attach(struct lagg_softc *);
H>  void		lacp_detach(void *);
H>  void		lacp_init(struct lagg_softc *);
H> 
H> Modified: head/sys/net/if.h
H> ==============================================================================
H> --- head/sys/net/if.h	Wed Jan 18 13:27:24 2017	(r312378)
H> +++ head/sys/net/if.h	Wed Jan 18 13:31:17 2017	(r312379)
H> @@ -239,6 +239,7 @@ struct if_data {
H>  #define	IFCAP_RXCSUM_IPV6	0x200000  /* can offload checksum on IPv6 RX */
H>  #define	IFCAP_TXCSUM_IPV6	0x400000  /* can offload checksum on IPv6 TX */
H>  #define	IFCAP_HWSTATS		0x800000 /* manages counters internally */
H> +#define	IFCAP_TXRTLMT		0x1000000 /* hardware supports TX rate limiting */
H>  
H>  #define IFCAP_HWCSUM_IPV6	(IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6)
H>  
H> 
H> Modified: head/sys/net/if_dead.c
H> ==============================================================================
H> --- head/sys/net/if_dead.c	Wed Jan 18 13:27:24 2017	(r312378)
H> +++ head/sys/net/if_dead.c	Wed Jan 18 13:31:17 2017	(r312379)
H> @@ -100,6 +100,30 @@ ifdead_get_counter(struct ifnet *ifp, if
H>  	return (0);
H>  }
H>  
H> +static int
H> +ifdead_snd_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params,
H> +    struct m_snd_tag **ppmt)
H> +{
H> +	return (EOPNOTSUPP);
H> +}
H> +
H> +static int
H> +ifdead_snd_tag_modify(struct m_snd_tag *pmt, union if_snd_tag_modify_params *params)
H> +{
H> +	return (EOPNOTSUPP);
H> +}
H> +
H> +static int
H> +ifdead_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params)
H> +{
H> +	return (EOPNOTSUPP);
H> +}
H> +
H> +static void
H> +ifdead_snd_tag_free(struct m_snd_tag *pmt)
H> +{
H> +}
H> +
H>  void
H>  if_dead(struct ifnet *ifp)
H>  {
H> @@ -112,4 +136,8 @@ if_dead(struct ifnet *ifp)
H>  	ifp->if_qflush = ifdead_qflush;
H>  	ifp->if_transmit = ifdead_transmit;
H>  	ifp->if_get_counter = ifdead_get_counter;
H> +	ifp->if_snd_tag_alloc = ifdead_snd_tag_alloc;
H> +	ifp->if_snd_tag_modify = ifdead_snd_tag_modify;
H> +	ifp->if_snd_tag_query = ifdead_snd_tag_query;
H> +	ifp->if_snd_tag_free = ifdead_snd_tag_free;
H>  }
H> 
H> Modified: head/sys/net/if_lagg.c
H> ==============================================================================
H> --- head/sys/net/if_lagg.c	Wed Jan 18 13:27:24 2017	(r312378)
H> +++ head/sys/net/if_lagg.c	Wed Jan 18 13:31:17 2017	(r312379)
H> @@ -23,6 +23,7 @@ __FBSDID("$FreeBSD$");
H>  
H>  #include "opt_inet.h"
H>  #include "opt_inet6.h"
H> +#include "opt_ratelimit.h"
H>  
H>  #include <sys/param.h>
H>  #include <sys/kernel.h>
H> @@ -118,6 +119,11 @@ static void	lagg_port2req(struct lagg_po
H>  static void	lagg_init(void *);
H>  static void	lagg_stop(struct lagg_softc *);
H>  static int	lagg_ioctl(struct ifnet *, u_long, caddr_t);
H> +#ifdef RATELIMIT
H> +static int	lagg_snd_tag_alloc(struct ifnet *,
H> +		    union if_snd_tag_alloc_params *,
H> +		    struct m_snd_tag **);
H> +#endif
H>  static int	lagg_ether_setmulti(struct lagg_softc *);
H>  static int	lagg_ether_cmdmulti(struct lagg_port *, int);
H>  static	int	lagg_setflag(struct lagg_port *, int, int,
H> @@ -503,7 +509,12 @@ lagg_clone_create(struct if_clone *ifc, 
H>  	ifp->if_ioctl = lagg_ioctl;
H>  	ifp->if_get_counter = lagg_get_counter;
H>  	ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
H> +#ifdef RATELIMIT
H> +	ifp->if_snd_tag_alloc = lagg_snd_tag_alloc;
H> +	ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS | IFCAP_TXRTLMT;
H> +#else
H>  	ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
H> +#endif
H>  
H>  	/*
H>  	 * Attach as an ordinary ethernet device, children will be attached
H> @@ -1549,6 +1560,52 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd
H>  	return (error);
H>  }
H>  
H> +#ifdef RATELIMIT
H> +static int
H> +lagg_snd_tag_alloc(struct ifnet *ifp,
H> +    union if_snd_tag_alloc_params *params,
H> +    struct m_snd_tag **ppmt)
H> +{
H> +	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
H> +	struct lagg_port *lp;
H> +	struct lagg_lb *lb;
H> +	uint32_t p;
H> +
H> +	switch (sc->sc_proto) {
H> +	case LAGG_PROTO_FAILOVER:
H> +		lp = lagg_link_active(sc, sc->sc_primary);
H> +		break;
H> +	case LAGG_PROTO_LOADBALANCE:
H> +		if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 ||
H> +		    params->hdr.flowtype == M_HASHTYPE_NONE)
H> +			return (EOPNOTSUPP);
H> +		p = params->hdr.flowid >> sc->flowid_shift;
H> +		p %= sc->sc_count;
H> +		lb = (struct lagg_lb *)sc->sc_psc;
H> +		lp = lb->lb_ports[p];
H> +		lp = lagg_link_active(sc, lp);
H> +		break;
H> +	case LAGG_PROTO_LACP:
H> +		if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 ||
H> +		    params->hdr.flowtype == M_HASHTYPE_NONE)
H> +			return (EOPNOTSUPP);
H> +		lp = lacp_select_tx_port_by_hash(sc, params->hdr.flowid);
H> +		break;
H> +	default:
H> +		return (EOPNOTSUPP);
H> +	}
H> +	if (lp == NULL)
H> +		return (EOPNOTSUPP);
H> +	ifp = lp->lp_ifp;
H> +	if (ifp == NULL || ifp->if_snd_tag_alloc == NULL ||
H> +	    (ifp->if_capenable & IFCAP_TXRTLMT) == 0)
H> +		return (EOPNOTSUPP);
H> +
H> +	/* forward allocation request */
H> +	return (ifp->if_snd_tag_alloc(ifp, params, ppmt));
H> +}
H> +#endif
H> +
H>  static int
H>  lagg_ether_setmulti(struct lagg_softc *sc)
H>  {
H> 
H> Modified: head/sys/net/if_var.h
H> ==============================================================================
H> --- head/sys/net/if_var.h	Wed Jan 18 13:27:24 2017	(r312378)
H> +++ head/sys/net/if_var.h	Wed Jan 18 13:31:17 2017	(r312379)
H> @@ -175,6 +175,49 @@ struct if_encap_req {
H>  
H>  #define	IFENCAP_FLAG_BROADCAST	0x02	/* Destination is broadcast */
H>  
H> +/*
H> + * Network interface send tag support. The storage of "struct
H> + * m_snd_tag" comes from the network driver and it is free to allocate
H> + * as much additional space as it wants for its own use.
H> + */
H> +struct m_snd_tag;
H> +
H> +#define	IF_SND_TAG_TYPE_RATE_LIMIT 0
H> +#define	IF_SND_TAG_TYPE_MAX 1
H> +
H> +struct if_snd_tag_alloc_header {
H> +	uint32_t type;		/* send tag type, see IF_SND_TAG_XXX */
H> +	uint32_t flowid;	/* mbuf hash value */
H> +	uint32_t flowtype;	/* mbuf hash type */
H> +};
H> +
H> +struct if_snd_tag_alloc_rate_limit {
H> +	struct if_snd_tag_alloc_header hdr;
H> +	uint64_t max_rate;	/* in bytes/s */
H> +};
H> +
H> +struct if_snd_tag_rate_limit_params {
H> +	uint64_t max_rate;	/* in bytes/s */
H> +};
H> +
H> +union if_snd_tag_alloc_params {
H> +	struct if_snd_tag_alloc_header hdr;
H> +	struct if_snd_tag_alloc_rate_limit rate_limit;
H> +};
H> +
H> +union if_snd_tag_modify_params {
H> +	struct if_snd_tag_rate_limit_params rate_limit;
H> +};
H> +
H> +union if_snd_tag_query_params {
H> +	struct if_snd_tag_rate_limit_params rate_limit;
H> +};
H> +
H> +typedef int (if_snd_tag_alloc_t)(struct ifnet *, union if_snd_tag_alloc_params *,
H> +    struct m_snd_tag **);
H> +typedef int (if_snd_tag_modify_t)(struct m_snd_tag *, union if_snd_tag_modify_params *);
H> +typedef int (if_snd_tag_query_t)(struct m_snd_tag *, union if_snd_tag_query_params *);
H> +typedef void (if_snd_tag_free_t)(struct m_snd_tag *);
H>  
H>  /*
H>   * Structure defining a network interface.
H> @@ -304,12 +347,19 @@ struct ifnet {
H>  	u_int	if_hw_tsomaxsegsize;	/* TSO maximum segment size in bytes */
H>  
H>  	/*
H> +	 * Network adapter send tag support:
H> +	 */
H> +	if_snd_tag_alloc_t *if_snd_tag_alloc;
H> +	if_snd_tag_modify_t *if_snd_tag_modify;
H> +	if_snd_tag_query_t *if_snd_tag_query;
H> +	if_snd_tag_free_t *if_snd_tag_free;
H> +
H> +	/*
H>  	 * Spare fields to be added before branching a stable branch, so
H>  	 * that structure can be enhanced without changing the kernel
H>  	 * binary interface.
H>  	 */
H> -	void	*if_pspare[4];		/* packet pacing / general use */
H> -	int	if_ispare[4];		/* packet pacing / general use */
H> +	int	if_ispare[4];		/* general use */
H>  };
H>  
H>  /* for compatibility with other BSDs */
H> 
H> Modified: head/sys/net/if_vlan.c
H> ==============================================================================
H> --- head/sys/net/if_vlan.c	Wed Jan 18 13:27:24 2017	(r312378)
H> +++ head/sys/net/if_vlan.c	Wed Jan 18 13:31:17 2017	(r312379)
H> @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
H>  
H>  #include "opt_inet.h"
H>  #include "opt_vlan.h"
H> +#include "opt_ratelimit.h"
H>  
H>  #include <sys/param.h>
H>  #include <sys/eventhandler.h>
H> @@ -212,6 +213,10 @@ static	void trunk_destroy(struct ifvlant
H>  static	void vlan_init(void *foo);
H>  static	void vlan_input(struct ifnet *ifp, struct mbuf *m);
H>  static	int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr);
H> +#ifdef RATELIMIT
H> +static	int vlan_snd_tag_alloc(struct ifnet *,
H> +    union if_snd_tag_alloc_params *, struct m_snd_tag **);
H> +#endif
H>  static	void vlan_qflush(struct ifnet *ifp);
H>  static	int vlan_setflag(struct ifnet *ifp, int flag, int status,
H>      int (*func)(struct ifnet *, int));
H> @@ -971,6 +976,9 @@ vlan_clone_create(struct if_clone *ifc, 
H>  	ifp->if_transmit = vlan_transmit;
H>  	ifp->if_qflush = vlan_qflush;
H>  	ifp->if_ioctl = vlan_ioctl;
H> +#ifdef RATELIMIT
H> +	ifp->if_snd_tag_alloc = vlan_snd_tag_alloc;
H> +#endif
H>  	ifp->if_flags = VLAN_IFFLAGS;
H>  	ether_ifattach(ifp, eaddr);
H>  	/* Now undo some of the damage... */
H> @@ -1591,6 +1599,15 @@ vlan_capabilities(struct ifvlan *ifv)
H>  		TOEDEV(ifp) = TOEDEV(p);
H>  		ifp->if_capenable |= p->if_capenable & IFCAP_TOE;
H>  	}
H> +
H> +#ifdef RATELIMIT
H> +	/*
H> +	 * If the parent interface supports ratelimiting, so does the
H> +	 * VLAN interface.
H> +	 */
H> +	ifp->if_capabilities |= (p->if_capabilities & IFCAP_TXRTLMT);
H> +	ifp->if_capenable |= (p->if_capenable & IFCAP_TXRTLMT);
H> +#endif
H>  }
H>  
H>  static void
H> @@ -1801,3 +1818,19 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd
H>  
H>  	return (error);
H>  }
H> +
H> +#ifdef RATELIMIT
H> +static int
H> +vlan_snd_tag_alloc(struct ifnet *ifp,
H> +    union if_snd_tag_alloc_params *params,
H> +    struct m_snd_tag **ppmt)
H> +{
H> +
H> +	/* get trunk device */
H> +	ifp = vlan_trunkdev(ifp);
H> +	if (ifp == NULL || (ifp->if_capenable & IFCAP_TXRTLMT) == 0)
H> +		return (EOPNOTSUPP);
H> +	/* forward allocation request */
H> +	return (ifp->if_snd_tag_alloc(ifp, params, ppmt));
H> +}
H> +#endif
H> 
H> Modified: head/sys/netinet/in_pcb.c
H> ==============================================================================
H> --- head/sys/netinet/in_pcb.c	Wed Jan 18 13:27:24 2017	(r312378)
H> +++ head/sys/netinet/in_pcb.c	Wed Jan 18 13:31:17 2017	(r312379)
H> @@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
H>  #include "opt_ipsec.h"
H>  #include "opt_inet.h"
H>  #include "opt_inet6.h"
H> +#include "opt_ratelimit.h"
H>  #include "opt_pcbgroup.h"
H>  #include "opt_rss.h"
H>  
H> @@ -57,6 +58,7 @@ __FBSDID("$FreeBSD$");
H>  #include <sys/rmlock.h>
H>  #include <sys/socket.h>
H>  #include <sys/socketvar.h>
H> +#include <sys/sockio.h>
H>  #include <sys/priv.h>
H>  #include <sys/proc.h>
H>  #include <sys/refcount.h>
H> @@ -1140,6 +1142,10 @@ in_pcbdetach(struct inpcb *inp)
H>  
H>  	KASSERT(inp->inp_socket != NULL, ("%s: inp_socket == NULL", __func__));
H>  
H> +#ifdef RATELIMIT
H> +	if (inp->inp_snd_tag != NULL)
H> +		in_pcbdetach_txrtlmt(inp);
H> +#endif
H>  	inp->inp_socket->so_pcb = NULL;
H>  	inp->inp_socket = NULL;
H>  }
H> @@ -2677,3 +2683,253 @@ DB_SHOW_COMMAND(inpcb, db_show_inpcb)
H>  	db_print_inpcb(inp, "inpcb", 0);
H>  }
H>  #endif /* DDB */
H> +
H> +#ifdef RATELIMIT
H> +/*
H> + * Modify TX rate limit based on the existing "inp->inp_snd_tag",
H> + * if any.
H> + */
H> +int
H> +in_pcbmodify_txrtlmt(struct inpcb *inp, uint32_t max_pacing_rate)
H> +{
H> +	union if_snd_tag_modify_params params = {
H> +		.rate_limit.max_rate = max_pacing_rate,
H> +	};
H> +	struct m_snd_tag *mst;
H> +	struct ifnet *ifp;
H> +	int error;
H> +
H> +	mst = inp->inp_snd_tag;
H> +	if (mst == NULL)
H> +		return (EINVAL);
H> +
H> +	ifp = mst->ifp;
H> +	if (ifp == NULL)
H> +		return (EINVAL);
H> +
H> +	if (ifp->if_snd_tag_modify == NULL) {
H> +		error = EOPNOTSUPP;
H> +	} else {
H> +		error = ifp->if_snd_tag_modify(mst, &params);
H> +	}
H> +	return (error);
H> +}
H> +
H> +/*
H> + * Query existing TX rate limit based on the existing
H> + * "inp->inp_snd_tag", if any.
H> + */
H> +int
H> +in_pcbquery_txrtlmt(struct inpcb *inp, uint32_t *p_max_pacing_rate)
H> +{
H> +	union if_snd_tag_query_params params = { };
H> +	struct m_snd_tag *mst;
H> +	struct ifnet *ifp;
H> +	int error;
H> +
H> +	mst = inp->inp_snd_tag;
H> +	if (mst == NULL)
H> +		return (EINVAL);
H> +
H> +	ifp = mst->ifp;
H> +	if (ifp == NULL)
H> +		return (EINVAL);
H> +
H> +	if (ifp->if_snd_tag_query == NULL) {
H> +		error = EOPNOTSUPP;
H> +	} else {
H> +		error = ifp->if_snd_tag_query(mst, &params);
H> +		if (error == 0 &&  p_max_pacing_rate != NULL)
H> +			*p_max_pacing_rate = params.rate_limit.max_rate;
H> +	}
H> +	return (error);
H> +}
H> +
H> +/*
H> + * Allocate a new TX rate limit send tag from the network interface
H> + * given by the "ifp" argument and save it in "inp->inp_snd_tag":
H> + */
H> +int
H> +in_pcbattach_txrtlmt(struct inpcb *inp, struct ifnet *ifp,
H> +    uint32_t flowtype, uint32_t flowid, uint32_t max_pacing_rate)
H> +{
H> +	union if_snd_tag_alloc_params params = {
H> +		.rate_limit.hdr.type = IF_SND_TAG_TYPE_RATE_LIMIT,
H> +		.rate_limit.hdr.flowid = flowid,
H> +		.rate_limit.hdr.flowtype = flowtype,
H> +		.rate_limit.max_rate = max_pacing_rate,
H> +	};
H> +	int error;
H> +
H> +	INP_WLOCK_ASSERT(inp);
H> +
H> +	if (inp->inp_snd_tag != NULL)
H> +		return (EINVAL);
H> +
H> +	if (ifp->if_snd_tag_alloc == NULL) {
H> +		error = EOPNOTSUPP;
H> +	} else {
H> +		error = ifp->if_snd_tag_alloc(ifp, &params, &inp->inp_snd_tag);
H> +
H> +		/*
H> +		 * At success increment the refcount on
H> +		 * the send tag's network interface:
H> +		 */
H> +		if (error == 0)
H> +			if_ref(inp->inp_snd_tag->ifp);
H> +	}
H> +	return (error);
H> +}
H> +
H> +/*
H> + * Free an existing TX rate limit tag based on the "inp->inp_snd_tag",
H> + * if any:
H> + */
H> +void
H> +in_pcbdetach_txrtlmt(struct inpcb *inp)
H> +{
H> +	struct m_snd_tag *mst;
H> +	struct ifnet *ifp;
H> +
H> +	INP_WLOCK_ASSERT(inp);
H> +
H> +	mst = inp->inp_snd_tag;
H> +	inp->inp_snd_tag = NULL;
H> +
H> +	if (mst == NULL)
H> +		return;
H> +
H> +	ifp = mst->ifp;
H> +	if (ifp == NULL)
H> +		return;
H> +
H> +	/*
H> +	 * If the device was detached while we still had reference(s)
H> +	 * on the ifp, we assume if_snd_tag_free() was replaced with
H> +	 * stubs.
H> +	 */
H> +	ifp->if_snd_tag_free(mst);
H> +
H> +	/* release reference count on network interface */
H> +	if_rele(ifp);
H> +}
H> +
H> +/*
H> + * This function should be called when the INP_RATE_LIMIT_CHANGED flag
H> + * is set in the fast path and will attach/detach/modify the TX rate
H> + * limit send tag based on the socket's so_max_pacing_rate value.
H> + */
H> +void
H> +in_pcboutput_txrtlmt(struct inpcb *inp, struct ifnet *ifp, struct mbuf *mb)
H> +{
H> +	struct socket *socket;
H> +	uint32_t max_pacing_rate;
H> +	bool did_upgrade;
H> +	int error;
H> +
H> +	if (inp == NULL)
H> +		return;
H> +
H> +	socket = inp->inp_socket;
H> +	if (socket == NULL)
H> +		return;
H> +
H> +	if (!INP_WLOCKED(inp)) {
H> +		/*
H> +		 * NOTE: If the write locking fails, we need to bail
H> +		 * out and use the non-ratelimited ring for the
H> +		 * transmit until there is a new chance to get the
H> +		 * write lock.
H> +		 */
H> +		if (!INP_TRY_UPGRADE(inp))
H> +			return;
H> +		did_upgrade = 1;
H> +	} else {
H> +		did_upgrade = 0;
H> +	}
H> +
H> +	/*
H> +	 * NOTE: The so_max_pacing_rate value is read unlocked,
H> +	 * because atomic updates are not required since the variable
H> +	 * is checked at every mbuf we send. It is assumed that the
H> +	 * variable read itself will be atomic.
H> +	 */
H> +	max_pacing_rate = socket->so_max_pacing_rate;
H> +
H> +	/*
H> +	 * NOTE: When attaching to a network interface a reference is
H> +	 * made to ensure the network interface doesn't go away until
H> +	 * all ratelimit connections are gone. The network interface
H> +	 * pointers compared below represent valid network interfaces,
H> +	 * except when comparing towards NULL.
H> +	 */
H> +	if (max_pacing_rate == 0 && inp->inp_snd_tag == NULL) {
H> +		error = 0;
H> +	} else if (!(ifp->if_capenable & IFCAP_TXRTLMT)) {
H> +		if (inp->inp_snd_tag != NULL)
H> +			in_pcbdetach_txrtlmt(inp);
H> +		error = 0;
H> +	} else if (inp->inp_snd_tag == NULL) {
H> +		/*
H> +		 * In order to utilize packet pacing with RSS, we need
H> +		 * to wait until there is a valid RSS hash before we
H> +		 * can proceed:
H> +		 */
H> +		if (M_HASHTYPE_GET(mb) == M_HASHTYPE_NONE) {
H> +			error = EAGAIN;
H> +		} else {
H> +			error = in_pcbattach_txrtlmt(inp, ifp, M_HASHTYPE_GET(mb),
H> +			    mb->m_pkthdr.flowid, max_pacing_rate);
H> +		}
H> +	} else {
H> +		error = in_pcbmodify_txrtlmt(inp, max_pacing_rate);
H> +	}
H> +	if (error == 0 || error == EOPNOTSUPP)
H> +		inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED;
H> +	if (did_upgrade)
H> +		INP_DOWNGRADE(inp);
H> +}
H> +
H> +/*
H> + * Track route changes for TX rate limiting.
H> + */
H> +void
H> +in_pcboutput_eagain(struct inpcb *inp)
H> +{
H> +	struct socket *socket;
H> +	bool did_upgrade;
H> +
H> +	if (inp == NULL)
H> +		return;
H> +
H> +	socket = inp->inp_socket;
H> +	if (socket == NULL)
H> +		return;
H> +
H> +	if (inp->inp_snd_tag == NULL)
H> +		return;
H> +
H> +	if (!INP_WLOCKED(inp)) {
H> +		/*
H> +		 * NOTE: If the write locking fails, we need to bail
H> +		 * out and use the non-ratelimited ring for the
H> +		 * transmit until there is a new chance to get the
H> +		 * write lock.
H> +		 */
H> +		if (!INP_TRY_UPGRADE(inp))
H> +			return;
H> +		did_upgrade = 1;
H> +	} else {
H> +		did_upgrade = 0;
H> +	}
H> +
H> +	/* detach rate limiting */
H> +	in_pcbdetach_txrtlmt(inp);
H> +
H> +	/* make sure new mbuf send tag allocation is made */
H> +	inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED;
H> +
H> +	if (did_upgrade)
H> +		INP_DOWNGRADE(inp);
H> +}
H> +#endif /* RATELIMIT */
H> 
H> Modified: head/sys/netinet/in_pcb.h
H> ==============================================================================
H> --- head/sys/netinet/in_pcb.h	Wed Jan 18 13:27:24 2017	(r312378)
H> +++ head/sys/netinet/in_pcb.h	Wed Jan 18 13:31:17 2017	(r312379)
H> @@ -181,6 +181,7 @@ struct	icmp6_filter;
H>   * read-lock usage during modification, this model can be applied to other
H>   * protocols (especially SCTP).
H>   */
H> +struct m_snd_tag;
H>  struct inpcb {
H>  	LIST_ENTRY(inpcb) inp_hash;	/* (h/i) hash list */
H>  	LIST_ENTRY(inpcb) inp_pcbgrouphash;	/* (g/i) hash list */
H> @@ -202,11 +203,11 @@ struct inpcb {
H>  	u_char	inp_ip_minttl;		/* (i) minimum TTL or drop */
H>  	uint32_t inp_flowid;		/* (x) flow id / queue id */
H>  	u_int	inp_refcount;		/* (i) refcount */
H> -	void	*inp_pspare[5];		/* (x) packet pacing / general use */
H> +	struct m_snd_tag *inp_snd_tag;	/* (i) send tag for outgoing mbufs */
H> +	void	*inp_pspare[4];		/* (x) general use */
H>  	uint32_t inp_flowtype;		/* (x) M_HASHTYPE value */
H>  	uint32_t inp_rss_listen_bucket;	/* (x) overridden RSS listen bucket */
H> -	u_int	inp_ispare[4];		/* (x) packet pacing / user cookie /
H> -					 *     general use */
H> +	u_int	inp_ispare[4];		/* (x) user cookie / general use */
H>  
H>  	/* Local and foreign ports, local and foreign addr. */
H>  	struct	in_conninfo inp_inc;	/* (i) list for PCB's local port */
H> @@ -616,6 +617,7 @@ short	inp_so_options(const struct inpcb 
H>  #define	INP_RSS_BUCKET_SET	0x00000080 /* IP_RSS_LISTEN_BUCKET is set */
H>  #define	INP_RECVFLOWID		0x00000100 /* populate recv datagram with flow info */
H>  #define	INP_RECVRSSBUCKETID	0x00000200 /* populate recv datagram with bucket id */
H> +#define	INP_RATE_LIMIT_CHANGED	0x00000400 /* rate limit needs attention */
H>  
H>  /*
H>   * Flags passed to in_pcblookup*() functions.
H> @@ -736,6 +738,14 @@ int	in_getsockaddr(struct socket *so, st
H>  struct sockaddr *
H>  	in_sockaddr(in_port_t port, struct in_addr *addr);
H>  void	in_pcbsosetlabel(struct socket *so);
H> +#ifdef RATELIMIT
H> +int	in_pcbattach_txrtlmt(struct inpcb *, struct ifnet *, uint32_t, uint32_t, uint32_t);
H> +void	in_pcbdetach_txrtlmt(struct inpcb *);
H> +int	in_pcbmodify_txrtlmt(struct inpcb *, uint32_t);
H> +int	in_pcbquery_txrtlmt(struct inpcb *, uint32_t *);
H> +void	in_pcboutput_txrtlmt(struct inpcb *, struct ifnet *, struct mbuf *);
H> +void	in_pcboutput_eagain(struct inpcb *);
H> +#endif
H>  #endif /* _KERNEL */
H>  
H>  #endif /* !_NETINET_IN_PCB_H_ */
H> 
H> Modified: head/sys/netinet/ip_output.c
H> ==============================================================================
H> --- head/sys/netinet/ip_output.c	Wed Jan 18 13:27:24 2017	(r312378)
H> +++ head/sys/netinet/ip_output.c	Wed Jan 18 13:31:17 2017	(r312379)
H> @@ -33,6 +33,7 @@
H>  __FBSDID("$FreeBSD$");
H>  
H>  #include "opt_inet.h"
H> +#include "opt_ratelimit.h"
H>  #include "opt_ipsec.h"
H>  #include "opt_mbuf_stress_test.h"
H>  #include "opt_mpath.h"
H> @@ -661,8 +662,23 @@ sendit:
H>  		 */
H>  		m_clrprotoflags(m);
H>  		IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
H> +#ifdef RATELIMIT
H> +		if (inp != NULL) {
H> +			if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED)
H> +				in_pcboutput_txrtlmt(inp, ifp, m);
H> +			/* stamp send tag on mbuf */
H> +			m->m_pkthdr.snd_tag = inp->inp_snd_tag;
H> +		} else {
H> +			m->m_pkthdr.snd_tag = NULL;
H> +		}
H> +#endif
H>  		error = (*ifp->if_output)(ifp, m,
H>  		    (const struct sockaddr *)gw, ro);
H> +#ifdef RATELIMIT
H> +		/* check for route change */
H> +		if (error == EAGAIN)
H> +			in_pcboutput_eagain(inp);
H> +#endif
H>  		goto done;
H>  	}
H>  
H> @@ -698,8 +714,23 @@ sendit:
H>  
H>  			IP_PROBE(send, NULL, NULL, mtod(m, struct ip *), ifp,
H>  			    mtod(m, struct ip *), NULL);
H> +#ifdef RATELIMIT
H> +			if (inp != NULL) {
H> +				if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED)
H> +					in_pcboutput_txrtlmt(inp, ifp, m);
H> +				/* stamp send tag on mbuf */
H> +				m->m_pkthdr.snd_tag = inp->inp_snd_tag;
H> +			} else {
H> +				m->m_pkthdr.snd_tag = NULL;
H> +			}
H> +#endif
H>  			error = (*ifp->if_output)(ifp, m,
H>  			    (const struct sockaddr *)gw, ro);
H> +#ifdef RATELIMIT
H> +			/* check for route change */
H> +			if (error == EAGAIN)
H> +				in_pcboutput_eagain(inp);
H> +#endif
H>  		} else
H>  			m_freem(m);
H>  	}
H> @@ -974,6 +1005,16 @@ ip_ctloutput(struct socket *so, struct s
H>  				INP_WUNLOCK(inp);
H>  				error = 0;
H>  				break;
H> +			case SO_MAX_PACING_RATE:
H> +#ifdef RATELIMIT
H> +				INP_WLOCK(inp);
H> +				inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED;
H> +				INP_WUNLOCK(inp);
H> +				error = 0;
H> +#else
H> +				error = EOPNOTSUPP;
H> +#endif
H> +				break;
H>  			default:
H>  				break;
H>  			}
H> 
H> Modified: head/sys/netinet6/ip6_output.c
H> ==============================================================================
H> --- head/sys/netinet6/ip6_output.c	Wed Jan 18 13:27:24 2017	(r312378)
H> +++ head/sys/netinet6/ip6_output.c	Wed Jan 18 13:31:17 2017	(r312379)
H> @@ -65,6 +65,7 @@ __FBSDID("$FreeBSD$");
H>  
H>  #include "opt_inet.h"
H>  #include "opt_inet6.h"
H> +#include "opt_ratelimit.h"
H>  #include "opt_ipsec.h"
H>  #include "opt_sctp.h"
H>  #include "opt_route.h"
H> @@ -954,8 +955,23 @@ passout:
H>  			    m->m_pkthdr.len);
H>  			ifa_free(&ia6->ia_ifa);
H>  		}
H> +#ifdef RATELIMIT
H> +		if (inp != NULL) {
H> +			if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED)
H> +				in_pcboutput_txrtlmt(inp, ifp, m);
H> +			/* stamp send tag on mbuf */
H> +			m->m_pkthdr.snd_tag = inp->inp_snd_tag;
H> +		} else {
H> +			m->m_pkthdr.snd_tag = NULL;
H> +		}
H> +#endif
H>  		error = nd6_output_ifp(ifp, origifp, m, dst,
H>  		    (struct route *)ro);
H> +#ifdef RATELIMIT
H> +		/* check for route change */
H> +		if (error == EAGAIN)
H> +			in_pcboutput_eagain(inp);
H> +#endif
H>  		goto done;
H>  	}
H>  
H> @@ -1054,8 +1070,23 @@ sendorfree:
H>  				counter_u64_add(ia->ia_ifa.ifa_obytes,
H>  				    m->m_pkthdr.len);
H>  			}
H> +#ifdef RATELIMIT
H> +			if (inp != NULL) {
H> +				if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED)
H> +					in_pcboutput_txrtlmt(inp, ifp, m);
H> +				/* stamp send tag on mbuf */
H> +				m->m_pkthdr.snd_tag = inp->inp_snd_tag;
H> +			} else {
H> +				m->m_pkthdr.snd_tag = NULL;
H> +			}
H> +#endif
H>  			error = nd6_output_ifp(ifp, origifp, m, dst,
H>  			    (struct route *)ro);
H> +#ifdef RATELIMIT
H> +			/* check for route change */
H> +			if (error == EAGAIN)
H> +				in_pcboutput_eagain(inp);
H> +#endif
H>  		} else
H>  			m_freem(m);
H>  	}
H> @@ -1441,6 +1472,16 @@ ip6_ctloutput(struct socket *so, struct 
H>  				INP_WUNLOCK(in6p);
H>  				error = 0;
H>  				break;
H> +			case SO_MAX_PACING_RATE:
H> +#ifdef RATELIMIT
H> +				INP_WLOCK(in6p);
H> +				in6p->inp_flags2 |= INP_RATE_LIMIT_CHANGED;
H> +				INP_WUNLOCK(in6p);
H> +				error = 0;
H> +#else
H> +				error = EOPNOTSUPP;
H> +#endif
H> +				break;
H>  			default:
H>  				break;
H>  			}
H> 
H> Modified: head/sys/sys/mbuf.h
H> ==============================================================================
H> --- head/sys/sys/mbuf.h	Wed Jan 18 13:27:24 2017	(r312378)
H> +++ head/sys/sys/mbuf.h	Wed Jan 18 13:31:17 2017	(r312379)
H> @@ -130,6 +130,14 @@ struct m_tag {
H>  };
H>  
H>  /*
H> + * Static network interface owned tag.
H> + * Allocated through ifp->if_snd_tag_alloc().
H> + */
H> +struct m_snd_tag {
H> +	struct ifnet *ifp;		/* network interface tag belongs to */
H> +};
H> +
H> +/*
H>   * Record/packet header in first mbuf of chain; valid only if M_PKTHDR is set.
H>   * Size ILP32: 48
H>   *	 LP64: 56
H> 
H> *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
H> 

-- 
Totus tuus, Glebius.


More information about the svn-src-all mailing list