svn commit: r355859 - in head/sys/netinet: . tcp_stacks

Randall Stewart rrs at FreeBSD.org
Tue Dec 17 16:08:09 UTC 2019


Author: rrs
Date: Tue Dec 17 16:08:07 2019
New Revision: 355859
URL: https://svnweb.freebsd.org/changeset/base/355859

Log:
  This commit is a bit of a re-arrange of deck chairs. It
  gets both rack and bbr ready for the completion of the STATs
  framework in FreeBSD. For now if you don't have both NF_stats and
  stats on it disables them. As soon as the rest of the stats framework
  lands we can remove that restriction and then just uses stats when
  defined.
  
  Sponsored by:	Netflix Inc.
  Differential Revision:	https://reviews.freebsd.org/D22479

Modified:
  head/sys/netinet/tcp_stacks/bbr.c
  head/sys/netinet/tcp_stacks/rack.c
  head/sys/netinet/tcp_stacks/rack_bbr_common.c
  head/sys/netinet/tcp_stacks/rack_bbr_common.h
  head/sys/netinet/tcp_stacks/sack_filter.c
  head/sys/netinet/tcp_stacks/tcp_bbr.h
  head/sys/netinet/tcp_stacks/tcp_rack.h
  head/sys/netinet/tcp_subr.c
  head/sys/netinet/tcp_var.h

Modified: head/sys/netinet/tcp_stacks/bbr.c
==============================================================================
--- head/sys/netinet/tcp_stacks/bbr.c	Tue Dec 17 15:56:48 2019	(r355858)
+++ head/sys/netinet/tcp_stacks/bbr.c	Tue Dec 17 16:08:07 2019	(r355859)
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2016-2019
+ * Copyright (c) 2016-9
  *	Netflix Inc.
  *      All rights reserved.
  *
@@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
 #include "opt_ratelimit.h"
 #include "opt_kern_tls.h"
 #include <sys/param.h>
+#include <sys/arb.h>
 #include <sys/module.h>
 #include <sys/kernel.h>
 #ifdef TCP_HHOOK
@@ -57,9 +58,9 @@ __FBSDID("$FreeBSD$");
 #endif
 #include <sys/sysctl.h>
 #include <sys/systm.h>
+#ifdef STATS
 #include <sys/qmath.h>
 #include <sys/tree.h>
-#ifdef NETFLIX_STATS
 #include <sys/stats.h> /* Must come after qmath.h and tree.h */
 #endif
 #include <sys/refcount.h>
@@ -161,9 +162,8 @@ static int32_t bbr_num_pktepo_for_del_limit = BBR_NUM_
 static int32_t bbr_hardware_pacing_limit = 8000;
 static int32_t bbr_quanta = 3;	/* How much extra quanta do we get? */
 static int32_t bbr_no_retran = 0;
-static int32_t bbr_tcp_map_entries_limit = 1500;
-static int32_t bbr_tcp_map_split_limit = 256;
 
+
 static int32_t bbr_error_base_paceout = 10000; /* usec to pace */
 static int32_t bbr_max_net_error_cnt = 10;
 /* Should the following be dynamic too -- loss wise */
@@ -3381,8 +3381,8 @@ bbr_alloc(struct tcp_bbr *bbr)
 static struct bbr_sendmap *
 bbr_alloc_full_limit(struct tcp_bbr *bbr)
 {
-	if ((bbr_tcp_map_entries_limit > 0) &&
-	    (bbr->r_ctl.rc_num_maps_alloced >= bbr_tcp_map_entries_limit)) {
+	if ((V_tcp_map_entries_limit > 0) &&
+	    (bbr->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) {
 		BBR_STAT_INC(bbr_alloc_limited);
 		if (!bbr->alloc_limit_reported) {
 			bbr->alloc_limit_reported = 1;
@@ -3402,8 +3402,8 @@ bbr_alloc_limit(struct tcp_bbr *bbr, uint8_t limit_typ
 
 	if (limit_type) {
 		/* currently there is only one limit type */
-		if (bbr_tcp_map_split_limit > 0 &&
-		    bbr->r_ctl.rc_num_split_allocs >= bbr_tcp_map_split_limit) {
+		if (V_tcp_map_split_limit > 0 &&
+		    bbr->r_ctl.rc_num_split_allocs >= V_tcp_map_split_limit) {
 			BBR_STAT_INC(bbr_split_limited);
 			if (!bbr->alloc_limit_reported) {
 				bbr->alloc_limit_reported = 1;
@@ -3685,7 +3685,7 @@ bbr_ack_received(struct tcpcb *tp, struct tcp_bbr *bbr
 	uint32_t cwnd, target_cwnd, saved_bytes, maxseg;
 	int32_t meth;
 
-#ifdef NETFLIX_STATS
+#ifdef STATS
 	if ((tp->t_flags & TF_GPUTINPROG) &&
 	    SEQ_GEQ(th->th_ack, tp->gput_ack)) {
 		/*
@@ -6510,7 +6510,7 @@ tcp_bbr_xmit_timer_commit(struct tcp_bbr *bbr, struct 
 	}
 	TCPSTAT_INC(tcps_rttupdated);
 	tp->t_rttupdated++;
-#ifdef NETFLIX_STATS
+#ifdef STATS
 	stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT, imax(0, rtt_ticks));
 #endif
 	/*
@@ -8490,6 +8490,7 @@ dodata:				/* XXX */
 					return (0);
 				}
 			}
+
 #endif
 			if (DELAY_ACK(tp, bbr, nsegs) || tfo_syn) {
 				bbr->bbr_segs_rcvd += max(1, nsegs);
@@ -8698,6 +8699,7 @@ bbr_do_fastnewdata(struct mbuf *m, struct tcphdr *th, 
 	 * reassembly queue and we have enough buffer space to take it.
 	 */
 	nsegs = max(1, m->m_pkthdr.lro_nsegs);
+
 #ifdef NETFLIX_SB_LIMITS
 	if (so->so_rcv.sb_shlim) {
 		mcnt = m_memcnt(m);
@@ -8746,6 +8748,7 @@ bbr_do_fastnewdata(struct mbuf *m, struct tcphdr *th, 
 			    newsize, so, NULL))
 				so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
 		m_adj(m, drop_hdrlen);	/* delayed header drop */
+
 #ifdef NETFLIX_SB_LIMITS
 		appended =
 #endif
@@ -11561,7 +11564,7 @@ bbr_do_segment_nounlock(struct mbuf *m, struct tcphdr 
 	 * the scale is zero.
 	 */
 	tiwin = th->th_win << tp->snd_scale;
-#ifdef NETFLIX_STATS
+#ifdef STATS
 	stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_FRWIN, tiwin);
 #endif
 	/*
@@ -11960,7 +11963,7 @@ bbr_do_send_accounting(struct tcpcb *tp, struct tcp_bb
 	if ((tp->t_flags & TF_FORCEDATA) && len == 1) {
 		/* Window probe */
 		TCPSTAT_INC(tcps_sndprobe);
-#ifdef NETFLIX_STATS
+#ifdef STATS
 		stats_voi_update_abs_u32(tp->t_stats,
 		    VOI_TCP_RETXPB, len);
 #endif
@@ -11981,7 +11984,7 @@ bbr_do_send_accounting(struct tcpcb *tp, struct tcp_bb
 			tp->t_sndrexmitpack++;
 			TCPSTAT_INC(tcps_sndrexmitpack);
 			TCPSTAT_ADD(tcps_sndrexmitbyte, len);
-#ifdef NETFLIX_STATS
+#ifdef STATS
 			stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RETXPB,
 			    len);
 #endif
@@ -12017,7 +12020,7 @@ bbr_do_send_accounting(struct tcpcb *tp, struct tcp_bb
 		/* Place in 17's the total sent */
 		counter_u64_add(bbr_state_resend[17], len);
 		counter_u64_add(bbr_state_lost[17], len);
-#ifdef NETFLIX_STATS
+#ifdef STATS
 		stats_voi_update_abs_u64(tp->t_stats, VOI_TCP_TXPB,
 		    len);
 #endif
@@ -12517,8 +12520,8 @@ recheck_resend:
 	 * as long as we are not retransmiting.
 	 */
 	if ((rsm == NULL) &&
-	    (bbr_tcp_map_entries_limit > 0) &&
-	    (bbr->r_ctl.rc_num_maps_alloced >= bbr_tcp_map_entries_limit)) {
+	    (V_tcp_map_entries_limit > 0) &&
+	    (bbr->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) {
 		BBR_STAT_INC(bbr_alloc_limited);
 		if (!bbr->alloc_limit_reported) {
 			bbr->alloc_limit_reported = 1;
@@ -13256,7 +13259,6 @@ send:
 			SOCKBUF_UNLOCK(&so->so_snd);
 			return (EHOSTUNREACH);
 		}
-
 		hdrlen += sizeof(struct udphdr);
 	}
 #endif
@@ -14276,7 +14278,7 @@ nomore:
 			bbr_start_hpts_timer(bbr, tp, cts, 11, slot, 0);
 			return (error);
 		}
-#ifdef NETFLIX_STATS
+#ifdef STATS
 	} else if (((tp->t_flags & TF_GPUTINPROG) == 0) &&
 		    len &&
 		    (rsm == NULL) &&

Modified: head/sys/netinet/tcp_stacks/rack.c
==============================================================================
--- head/sys/netinet/tcp_stacks/rack.c	Tue Dec 17 15:56:48 2019	(r355858)
+++ head/sys/netinet/tcp_stacks/rack.c	Tue Dec 17 16:08:07 2019	(r355859)
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2016-2019 Netflix, Inc.
+ * Copyright (c) 2016-9 Netflix, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
 #include "opt_ratelimit.h"
 #include "opt_kern_tls.h"
 #include <sys/param.h>
+#include <sys/arb.h>
 #include <sys/module.h>
 #include <sys/kernel.h>
 #ifdef TCP_HHOOK
@@ -52,7 +53,7 @@ __FBSDID("$FreeBSD$");
 #endif
 #include <sys/sysctl.h>
 #include <sys/systm.h>
-#ifdef NETFLIX_STATS
+#ifdef STATS
 #include <sys/qmath.h>
 #include <sys/tree.h>
 #include <sys/stats.h> /* Must come after qmath.h and tree.h */
@@ -187,21 +188,6 @@ static int32_t rack_persist_max = 1000;	/* 1 Second */
 static int32_t rack_sack_not_required = 0;	/* set to one to allow non-sack to use rack */
 static int32_t rack_hw_tls_max_seg = 0; /* 0 means use hw-tls single segment */
 
-/*  Sack attack detection thresholds and such */
-static int32_t tcp_force_detection = 0;
-
-#ifdef NETFLIX_EXP_DETECTION
-static int32_t tcp_sack_to_ack_thresh = 700;	/* 70 % */
-static int32_t tcp_sack_to_move_thresh = 600;	/* 60 % */
-static int32_t tcp_restoral_thresh = 650;	/* 65 % (sack:2:ack -5%) */
-static int32_t tcp_attack_on_turns_on_logging = 0;
-static int32_t tcp_map_minimum = 500;
-#endif
-static int32_t tcp_sad_decay_val = 800;
-static int32_t tcp_sad_pacing_interval = 2000;
-static int32_t tcp_sad_low_pps = 100;
-
-
 /*
  * Currently regular tcp has a rto_min of 30ms
  * the backoff goes 12 times so that ends up
@@ -226,10 +212,7 @@ static int32_t rack_always_send_oldest = 0;
 static int32_t rack_use_sack_filter = 1;
 static int32_t rack_tlp_threshold_use = TLP_USE_TWO_ONE;
 static int32_t rack_per_of_gp = 50;
-static int32_t rack_tcp_map_entries_limit = 1500;
-static int32_t rack_tcp_map_split_limit = 256;
 
-
 /* Rack specific counters */
 counter_u64_t rack_badfr;
 counter_u64_t rack_badfr_bytes;
@@ -1577,9 +1560,9 @@ rack_alloc(struct tcp_rack *rack)
 static struct rack_sendmap *
 rack_alloc_full_limit(struct tcp_rack *rack)
 {
-	if ((rack_tcp_map_entries_limit > 0) &&
+	if ((V_tcp_map_entries_limit > 0) &&
 	    (rack->do_detection == 0) &&
-	    (rack->r_ctl.rc_num_maps_alloced >= rack_tcp_map_entries_limit)) {
+	    (rack->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) {
 		counter_u64_add(rack_to_alloc_limited, 1);
 		if (!rack->alloc_limit_reported) {
 			rack->alloc_limit_reported = 1;
@@ -1598,9 +1581,9 @@ rack_alloc_limit(struct tcp_rack *rack, uint8_t limit_
 
 	if (limit_type) {
 		/* currently there is only one limit type */
-		if (rack_tcp_map_split_limit > 0 &&
+		if (V_tcp_map_split_limit > 0 &&
 		    (rack->do_detection == 0) &&
-		    rack->r_ctl.rc_num_split_allocs >= rack_tcp_map_split_limit) {
+		    rack->r_ctl.rc_num_split_allocs >= V_tcp_map_split_limit) {
 			counter_u64_add(rack_split_limited, 1);
 			if (!rack->alloc_limit_reported) {
 				rack->alloc_limit_reported = 1;
@@ -1648,7 +1631,7 @@ static void
 rack_ack_received(struct tcpcb *tp, struct tcp_rack *rack, struct tcphdr *th, uint16_t nsegs,
     uint16_t type, int32_t recovery)
 {
-#ifdef NETFLIX_STATS
+#ifdef STATS
 	int32_t gput;
 #endif
 
@@ -1671,7 +1654,7 @@ rack_ack_received(struct tcpcb *tp, struct tcp_rack *r
 		tp->ccv->flags &= ~CCF_CWND_LIMITED;
 
 	if (type == CC_ACK) {
-#ifdef NETFLIX_STATS
+#ifdef STATS
 		stats_voi_update_abs_s32(tp->t_stats, VOI_TCP_CALCFRWINDIFF,
 		    ((int32_t) tp->snd_cwnd) - tp->snd_wnd);
 		if ((tp->t_flags & TF_GPUTINPROG) &&
@@ -1725,7 +1708,7 @@ rack_ack_received(struct tcpcb *tp, struct tcp_rack *r
 		tp->ccv->curack = th->th_ack;
 		CC_ALGO(tp)->ack_received(tp->ccv, type);
 	}
-#ifdef NETFLIX_STATS
+#ifdef STATS
 	stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_LCWIN, tp->snd_cwnd);
 #endif
 	if (rack->r_ctl.rc_rack_largest_cwnd < tp->snd_cwnd) {
@@ -2436,6 +2419,7 @@ rack_start_hpts_timer(struct tcp_rack *rack, struct tc
 			slot = 1;
 	}
 	hpts_timeout = rack_timer_start(tp, rack, cts, sup_rack);
+#ifdef NETFLIX_EXP_DETECTION
 	if (rack->sack_attack_disable &&
 	    (slot < USEC_TO_MSEC(tcp_sad_pacing_interval))) {
 		/*
@@ -2450,6 +2434,7 @@ rack_start_hpts_timer(struct tcp_rack *rack, struct tc
 		 */
 		slot = USEC_TO_MSEC(tcp_sad_pacing_interval);
 	}
+#endif
 	if (tp->t_flags & TF_DELACK) {
 		delayed_ack = TICKS_2_MSEC(tcp_delacktime);
 		rack->r_ctl.rc_hpts_flags |= PACE_TMR_DELACK;
@@ -3776,7 +3761,8 @@ refind:
 			seq_out = rack_update_entry(tp, rack, nrsm, ts, &len);
 			if (len == 0) {
 				return;
-			}
+			} else if (len > 0)
+				goto refind;
 		}
 	}
 	/*
@@ -3912,7 +3898,7 @@ tcp_rack_xmit_timer_commit(struct tcp_rack *rack, stru
 	TCPSTAT_INC(tcps_rttupdated);
 	rack_log_rtt_upd(tp, rack, rtt, o_srtt, o_var);
 	tp->t_rttupdated++;
-#ifdef NETFLIX_STATS
+#ifdef STATS
 	stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT, imax(0, rtt));
 #endif
 	tp->t_rxtshift = 0;
@@ -4620,6 +4606,7 @@ rack_peer_reneges(struct tcp_rack *rack, struct rack_s
 static void
 rack_do_decay(struct tcp_rack *rack)
 {
+#ifdef NETFLIX_EXP_DETECTION
 	struct timeval res;
 
 #define	timersub(tvp, uvp, vvp)						\
@@ -4670,6 +4657,7 @@ rack_do_decay(struct tcp_rack *rack)
 		rack->r_ctl.sack_noextra_move = ctf_decay_count(rack->r_ctl.sack_noextra_move,
 								tcp_sad_decay_val);
 	}
+#endif	
 }
 
 static void
@@ -7406,9 +7394,11 @@ rack_init(struct tcpcb *tp)
 	rack->r_ctl.rc_last_time_decay = rack->r_ctl.rc_last_ack;
 	rack->r_ctl.rc_tlp_rxt_last_time = tcp_ts_getticks();
 	/* Do we force on detection? */
+#ifdef NETFLIX_EXP_DETECTION
 	if (tcp_force_detection)
 		rack->do_detection = 1;
 	else
+#endif
 		rack->do_detection = 0;
 	if (tp->snd_una != tp->snd_max) {
 		/* Create a send map for the current outstanding data */
@@ -7701,7 +7691,7 @@ rack_do_segment_nounlock(struct mbuf *m, struct tcphdr
 	 * the scale is zero.
 	 */
 	tiwin = th->th_win << tp->snd_scale;
-#ifdef NETFLIX_STATS
+#ifdef STATS
 	stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_FRWIN, tiwin);
 #endif
 	if (tiwin > rack->r_ctl.rc_high_rwnd)
@@ -8390,8 +8380,8 @@ again:
 	 */
 	if ((rsm == NULL) &&
 	    (rack->do_detection == 0) &&
-	    (rack_tcp_map_entries_limit > 0) &&
-	    (rack->r_ctl.rc_num_maps_alloced >= rack_tcp_map_entries_limit)) {
+	    (V_tcp_map_entries_limit > 0) &&
+	    (rack->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) {
 		counter_u64_add(rack_to_alloc_limited, 1);
 		if (!rack->alloc_limit_reported) {
 			rack->alloc_limit_reported = 1;
@@ -9318,7 +9308,7 @@ send:
 		}
 		if ((tp->t_flags & TF_FORCEDATA) && len == 1) {
 			TCPSTAT_INC(tcps_sndprobe);
-#ifdef NETFLIX_STATS
+#ifdef STATS
 			if (SEQ_LT(tp->snd_nxt, tp->snd_max))
 				stats_voi_update_abs_u32(tp->t_stats,
 				    VOI_TCP_RETXPB, len);
@@ -9339,14 +9329,14 @@ send:
 				TCPSTAT_INC(tcps_sndrexmitpack);
 				TCPSTAT_ADD(tcps_sndrexmitbyte, len);
 			}
-#ifdef NETFLIX_STATS
+#ifdef STATS
 			stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RETXPB,
 			    len);
 #endif
 		} else {
 			TCPSTAT_INC(tcps_sndpack);
 			TCPSTAT_ADD(tcps_sndbyte, len);
-#ifdef NETFLIX_STATS
+#ifdef STATS
 			stats_voi_update_abs_u64(tp->t_stats, VOI_TCP_TXPB,
 			    len);
 #endif
@@ -9929,7 +9919,7 @@ out:
 				tp->t_rtseq = startseq;
 				TCPSTAT_INC(tcps_segstimed);
 			}
-#ifdef NETFLIX_STATS
+#ifdef STATS
 			if (!(tp->t_flags & TF_GPUTINPROG) && len) {
 				tp->t_flags |= TF_GPUTINPROG;
 				tp->gput_seq = startseq;
@@ -10142,7 +10132,7 @@ rack_set_sockopt(struct socket *so, struct sockopt *so
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	switch (sopt->sopt_name) {
 	case TCP_RACK_DO_DETECTION:
-		RACK_OPTS_INC(tcp_rack_no_sack);
+		RACK_OPTS_INC(tcp_rack_do_detection);
 		if (optval == 0)
 			rack->do_detection = 0;
 		else

Modified: head/sys/netinet/tcp_stacks/rack_bbr_common.c
==============================================================================
--- head/sys/netinet/tcp_stacks/rack_bbr_common.c	Tue Dec 17 15:56:48 2019	(r355858)
+++ head/sys/netinet/tcp_stacks/rack_bbr_common.c	Tue Dec 17 16:08:07 2019	(r355859)
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2016-2018
+ * Copyright (c) 2016-9
  *	Netflix Inc.
  *      All rights reserved.
  *
@@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
 #include "opt_ratelimit.h"
 #include "opt_kern_tls.h"
 #include <sys/param.h>
+#include <sys/arb.h>
 #include <sys/module.h>
 #include <sys/kernel.h>
 #ifdef TCP_HHOOK
@@ -133,8 +134,6 @@ __FBSDID("$FreeBSD$");
  * Common TCP Functions - These are shared by borth
  * rack and BBR.
  */
-
-
 #ifdef KERN_TLS
 uint32_t
 ctf_get_opt_tls_size(struct socket *so, uint32_t rwnd)

Modified: head/sys/netinet/tcp_stacks/rack_bbr_common.h
==============================================================================
--- head/sys/netinet/tcp_stacks/rack_bbr_common.h	Tue Dec 17 15:56:48 2019	(r355858)
+++ head/sys/netinet/tcp_stacks/rack_bbr_common.h	Tue Dec 17 16:08:07 2019	(r355859)
@@ -1,5 +1,5 @@
-#ifndef __pacer_timer_h__
-#define __pacer_timer_h__
+#ifndef __rack_bbr_common_h__
+#define __rack_bbr_common_h__
 /*-
  * Copyright (c) 2017-9 Netflix, Inc.
  *
@@ -26,6 +26,12 @@
  *
  * __FBSDID("$FreeBSD$");
  */
+
+/* XXXLAS: Couple STATS to NETFLIX_STATS until stats(3) is fully upstreamed. */
+#ifndef	NETFLIX_STATS
+#undef	STATS
+#endif
+
 /* Common defines and such used by both RACK and BBR */
 /* Special values for mss accounting array */
 #define TCP_MSS_ACCT_JUSTRET 0
@@ -46,7 +52,24 @@
 #define PROGRESS_CLEAR  3
 #define PROGRESS_START  4
 
+/* codes for just-return */
+#define CTF_JR_SENT_DATA    0
+#define CTF_JR_CWND_LIMITED 1
+#define CTF_JR_RWND_LIMITED 2
+#define CTF_JR_APP_LIMITED  3
+#define CTF_JR_ASSESSING    4
+#define CTF_JR_PERSISTS     5
+#define CTF_JR_PRR	    6
 
+/* Compat. */
+#define BBR_JR_SENT_DATA CTF_JR_SENT_DATA
+#define BBR_JR_CWND_LIMITED CTF_JR_CWND_LIMITED
+#define BBR_JR_RWND_LIMITED CTF_JR_RWND_LIMITED
+#define BBR_JR_APP_LIMITED CTF_JR_APP_LIMITED
+#define BBR_JR_ASSESSING CTF_JR_ASSESSING
+#define BBR_JR_PERSISTS CTF_JR_PERSISTS
+#define BBR_JR_PRR CTF_JR_PRR
+
 /* RTT sample methods */
 #define USE_RTT_HIGH 0
 #define USE_RTT_LOW  1
@@ -58,6 +81,13 @@
 #define MS_IN_USEC 1000
 #define USEC_TO_MSEC(x) (x / MS_IN_USEC)
 #define TCP_TS_OVERHEAD 12		/* Overhead of having Timestamps on */
+
+/* Bits per second in bytes per second */
+#define FORTY_EIGHT_MBPS 6000000 /* 48 megabits in bytes */
+#define THIRTY_MBPS 3750000 /* 30 megabits in bytes */
+#define TWENTY_THREE_MBPS 2896000
+#define FIVETWELVE_MBPS 64000000 /* 512 megabits in bytes */
+#define ONE_POINT_TWO_MEG 150000 /* 1.2 megabits in bytes */
 
 #ifdef _KERNEL
 /* We have only 7 bits in rack so assert its true */

Modified: head/sys/netinet/tcp_stacks/sack_filter.c
==============================================================================
--- head/sys/netinet/tcp_stacks/sack_filter.c	Tue Dec 17 15:56:48 2019	(r355858)
+++ head/sys/netinet/tcp_stacks/sack_filter.c	Tue Dec 17 16:08:07 2019	(r355859)
@@ -25,11 +25,16 @@
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
+#ifndef _KERNEL
+#define _WANT_TCPCB 1
+#endif
 #include <sys/types.h>
 #include <sys/queue.h>
 #include <sys/socket.h>
+#ifdef _KERNEL
 #include <sys/mbuf.h>
 #include <sys/sockopt.h>
+#endif
 #include <netinet/tcp.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_seq.h>

Modified: head/sys/netinet/tcp_stacks/tcp_bbr.h
==============================================================================
--- head/sys/netinet/tcp_stacks/tcp_bbr.h	Tue Dec 17 15:56:48 2019	(r355858)
+++ head/sys/netinet/tcp_stacks/tcp_bbr.h	Tue Dec 17 16:08:07 2019	(r355859)
@@ -128,12 +128,6 @@ TAILQ_HEAD(bbr_head, bbr_sendmap);
 					 * an clear to start measuring */
 #define BBR_RED_BW_PE_NOEARLY_OUT 7	/* Set pkt epoch judged that we do not
 					 * get out of jail early */
-/* codes for just-return */
-#define BBR_JR_SENT_DATA    0
-#define BBR_JR_CWND_LIMITED 1
-#define BBR_JR_RWND_LIMITED 2
-#define BBR_JR_APP_LIMITED  3
-#define BBR_JR_ASSESSING 4
 /* For calculating a rate */
 #define BBR_CALC_BW 	1
 #define BBR_CALC_LOSS  	2
@@ -384,13 +378,6 @@ struct bbr_log_sysctl_out {
 #define BBR_REDUCE_AT_FR 5
 
 #define BBR_BIG_LOG_SIZE 300000
-
-/* Bits per second in bytes per second */
-#define FORTY_EIGHT_MBPS 6000000 /* 48 megabits in bytes */
-#define THIRTY_MBPS 3750000 /* 30 megabits in bytes */
-#define TWENTY_THREE_MBPS 2896000
-#define FIVETWELVE_MBPS 64000000 /* 512 megabits in bytes */
-#define ONE_POINT_TWO_MEG 150000 /* 1.2 megabits in bytes */
 
 struct bbr_stats {
 	uint64_t bbr_badfr;		/* 0 */

Modified: head/sys/netinet/tcp_stacks/tcp_rack.h
==============================================================================
--- head/sys/netinet/tcp_stacks/tcp_rack.h	Tue Dec 17 15:56:48 2019	(r355858)
+++ head/sys/netinet/tcp_stacks/tcp_rack.h	Tue Dec 17 16:08:07 2019	(r355859)
@@ -137,7 +137,7 @@ struct rack_opts_stats {
 	uint64_t tcp_rack_min_pace_seg;
 	uint64_t tcp_rack_min_pace;
 	uint64_t tcp_rack_cheat;
-	uint64_t tcp_rack_no_sack;
+	uint64_t tcp_rack_do_detection;
 };
 
 #define TLP_USE_ID	1	/* Internet draft behavior */

Modified: head/sys/netinet/tcp_subr.c
==============================================================================
--- head/sys/netinet/tcp_subr.c	Tue Dec 17 15:56:48 2019	(r355858)
+++ head/sys/netinet/tcp_subr.c	Tue Dec 17 16:08:07 2019	(r355859)
@@ -138,6 +138,58 @@ VNET_DEFINE(int, tcp_mssdflt) = TCP_MSS;
 VNET_DEFINE(int, tcp_v6mssdflt) = TCP6_MSS;
 #endif
 
+#ifdef NETFLIX_EXP_DETECTION
+/*  Sack attack detection thresholds and such */
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack_attack, CTLFLAG_RW, 0,
+    "Sack Attack detection thresholds");
+int32_t tcp_force_detection = 0;
+SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, force_detection,
+    CTLFLAG_RW,
+    &tcp_force_detection, 0,
+    "Do we force detection even if the INP has it off?");
+int32_t tcp_sack_to_ack_thresh = 700;	/* 70 % */
+SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sack_to_ack_thresh,
+    CTLFLAG_RW,
+    &tcp_sack_to_ack_thresh, 700,
+    "Percentage of sacks to acks we must see above (10.1 percent is 101)?");
+int32_t tcp_sack_to_move_thresh = 600;	/* 60 % */
+SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, move_thresh,
+    CTLFLAG_RW,
+    &tcp_sack_to_move_thresh, 600,
+    "Percentage of sack moves we must see above (10.1 percent is 101)");
+int32_t tcp_restoral_thresh = 650;	/* 65 % (sack:2:ack -5%) */
+SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, restore_thresh,
+    CTLFLAG_RW,
+    &tcp_restoral_thresh, 550,
+    "Percentage of sack to ack percentage we must see below to restore(10.1 percent is 101)");
+int32_t tcp_sad_decay_val = 800;
+SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, decay_per,
+    CTLFLAG_RW,
+    &tcp_sad_decay_val, 800,
+    "The decay percentage (10.1 percent equals 101 )");
+int32_t tcp_map_minimum = 500;
+SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, nummaps,
+    CTLFLAG_RW,
+    &tcp_map_minimum, 500,
+    "Number of Map enteries before we start detection");
+int32_t tcp_attack_on_turns_on_logging = 0;
+SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, attacks_logged,
+    CTLFLAG_RW,
+    &tcp_attack_on_turns_on_logging, 0,
+   "When we have a positive hit on attack, do we turn on logging?");
+int32_t tcp_sad_pacing_interval = 2000;
+SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sad_pacing_int,
+    CTLFLAG_RW,
+    &tcp_sad_pacing_interval, 2000,
+    "What is the minimum pacing interval for a classified attacker?");
+
+int32_t tcp_sad_low_pps = 100;
+SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sad_low_pps,
+    CTLFLAG_RW,
+    &tcp_sad_low_pps, 100,
+    "What is the input pps that below which we do not decay?");
+#endif
+
 struct rwlock tcp_function_lock;
 
 static int
@@ -240,6 +292,34 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, soreceive_stream, 
 
 VNET_DEFINE(uma_zone_t, sack_hole_zone);
 #define	V_sack_hole_zone		VNET(sack_hole_zone)
+VNET_DEFINE(uint32_t, tcp_map_entries_limit) = 0;	/* unlimited */
+static int
+sysctl_net_inet_tcp_map_limit_check(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	uint32_t new;
+
+	new = V_tcp_map_entries_limit;
+	error = sysctl_handle_int(oidp, &new, 0, req);
+	if (error == 0 && req->newptr) {
+		/* only allow "0" and value > minimum */
+		if (new > 0 && new < TCP_MIN_MAP_ENTRIES_LIMIT)
+			error = EINVAL;
+		else
+			V_tcp_map_entries_limit = new;
+	}
+	return (error);
+}
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, map_limit,
+    CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW,
+    &VNET_NAME(tcp_map_entries_limit), 0,
+    &sysctl_net_inet_tcp_map_limit_check, "IU",
+    "Total sendmap entries limit");
+
+VNET_DEFINE(uint32_t, tcp_map_split_limit) = 0;	/* unlimited */
+SYSCTL_UINT(_net_inet_tcp, OID_AUTO, split_limit, CTLFLAG_VNET | CTLFLAG_RW,
+     &VNET_NAME(tcp_map_split_limit), 0,
+    "Total sendmap split entries limit");
 
 #ifdef TCP_HHOOK
 VNET_DEFINE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST+1]);

Modified: head/sys/netinet/tcp_var.h
==============================================================================
--- head/sys/netinet/tcp_var.h	Tue Dec 17 15:56:48 2019	(r355858)
+++ head/sys/netinet/tcp_var.h	Tue Dec 17 16:08:07 2019	(r355859)
@@ -235,6 +235,9 @@ struct tcptemp {
 	struct	tcphdr tt_t;
 };
 
+/* Minimum map entries limit value, if set */
+#define TCP_MIN_MAP_ENTRIES_LIMIT	128
+
 /* 
  * TODO: We yet need to brave plowing in
  * to tcp_input() and the pru_usrreq() block.
@@ -790,6 +793,8 @@ VNET_DECLARE(int, tcp_ecn_maxretries);
 VNET_DECLARE(int, tcp_initcwnd_segments);
 VNET_DECLARE(int, tcp_insecure_rst);
 VNET_DECLARE(int, tcp_insecure_syn);
+VNET_DECLARE(uint32_t, tcp_map_entries_limit);
+VNET_DECLARE(uint32_t, tcp_map_split_limit);
 VNET_DECLARE(int, tcp_minmss);
 VNET_DECLARE(int, tcp_mssdflt);
 #ifdef STATS
@@ -830,6 +835,8 @@ VNET_DECLARE(struct inpcbinfo, tcbinfo);
 #define	V_tcp_initcwnd_segments		VNET(tcp_initcwnd_segments)
 #define	V_tcp_insecure_rst		VNET(tcp_insecure_rst)
 #define	V_tcp_insecure_syn		VNET(tcp_insecure_syn)
+#define	V_tcp_map_entries_limit		VNET(tcp_map_entries_limit)
+#define	V_tcp_map_split_limit		VNET(tcp_map_split_limit)
 #define	V_tcp_minmss			VNET(tcp_minmss)
 #define	V_tcp_mssdflt			VNET(tcp_mssdflt)
 #ifdef STATS
@@ -845,7 +852,6 @@ VNET_DECLARE(struct inpcbinfo, tcbinfo);
 #define	V_tcp_udp_tunneling_overhead	VNET(tcp_udp_tunneling_overhead)
 #define	V_tcp_udp_tunneling_port	VNET(tcp_udp_tunneling_port)
 
-
 #ifdef TCP_HHOOK
 VNET_DECLARE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST + 1]);
 #define	V_tcp_hhh		VNET(tcp_hhh)
@@ -914,6 +920,19 @@ extern counter_u64_t tcp_inp_lro_compressed;
 extern counter_u64_t tcp_inp_lro_single_push;
 extern counter_u64_t tcp_inp_lro_locks_taken;
 extern counter_u64_t tcp_inp_lro_sack_wake;
+
+#ifdef NETFLIX_EXP_DETECTION
+/* Various SACK attack thresholds */
+extern int32_t tcp_force_detection;
+extern int32_t tcp_sack_to_ack_thresh;
+extern int32_t tcp_sack_to_move_thresh;
+extern int32_t tcp_restoral_thresh;
+extern int32_t tcp_sad_decay_val;
+extern int32_t tcp_sad_pacing_interval;
+extern int32_t tcp_sad_low_pps;
+extern int32_t tcp_map_minimum;
+extern int32_t tcp_attack_on_turns_on_logging;
+#endif
 
 uint32_t tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *);
 uint32_t tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *);


More information about the svn-src-head mailing list