svn commit: r293284 - in head/sys: dev/cxgb/ulp/tom dev/cxgbe/tom netinet

Gleb Smirnoff glebius at FreeBSD.org
Thu Jan 7 00:14:44 UTC 2016


Author: glebius
Date: Thu Jan  7 00:14:42 2016
New Revision: 293284
URL: https://svnweb.freebsd.org/changeset/base/293284

Log:
  Historically we have two fields in tcpcb to describe sender MSS: t_maxopd,
  and t_maxseg. This dualism emerged with T/TCP, but was not properly cleaned
  up after T/TCP removal. After all permutations over the years the result is
  that t_maxopd stores a minimum of peer offered MSS and MTU reduced by minimum
  protocol header. And t_maxseg stores (t_maxopd - TCPOLEN_TSTAMP_APPA) if
  timestamps are in action, or is equal to t_maxopd otherwise. That's a very
  rough estimate of MSS reduced by options length. Throughout the code it
  was used in places, where preciseness was not important, like cwnd or
  ssthresh calculations.
  
  With this change:
  
  - t_maxopd goes away.
  - t_maxseg now stores MSS not adjusted by options.
  - new function tcp_maxseg() is provided, that calculates MSS reduced by
    options length. The functions gives a better estimate, since it takes
    into account SACK state as well.
  
  Reviewed by:	jtl
  Differential Revision:	https://reviews.freebsd.org/D3593

Modified:
  head/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c
  head/sys/dev/cxgbe/tom/t4_cpl_io.c
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_output.c
  head/sys/netinet/tcp_subr.c
  head/sys/netinet/tcp_timer.c
  head/sys/netinet/tcp_usrreq.c
  head/sys/netinet/tcp_var.h

Modified: head/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c
==============================================================================
--- head/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c	Wed Jan  6 22:02:08 2016	(r293283)
+++ head/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c	Thu Jan  7 00:14:42 2016	(r293284)
@@ -1536,14 +1536,13 @@ assign_rxopt(struct tcpcb *tp, uint16_t 
 	struct toepcb *toep = tp->t_toe;
 	struct adapter *sc = toep->tp_tod->tod_softc;
 
-	tp->t_maxseg = tp->t_maxopd = sc->params.mtus[G_TCPOPT_MSS(tcpopt)] - 40;
+	tp->t_maxseg = sc->params.mtus[G_TCPOPT_MSS(tcpopt)] - 40;
 
 	if (G_TCPOPT_TSTAMP(tcpopt)) {
 		tp->t_flags |= TF_RCVD_TSTMP;
 		tp->t_flags |= TF_REQ_TSTMP;	/* forcibly set */
 		tp->ts_recent = 0;		/* XXX */
 		tp->ts_recent_age = tcp_ts_getticks();
-		tp->t_maxseg -= TCPOLEN_TSTAMP_APPA;
 	}
 
 	if (G_TCPOPT_SACK(tcpopt))

Modified: head/sys/dev/cxgbe/tom/t4_cpl_io.c
==============================================================================
--- head/sys/dev/cxgbe/tom/t4_cpl_io.c	Wed Jan  6 22:02:08 2016	(r293283)
+++ head/sys/dev/cxgbe/tom/t4_cpl_io.c	Thu Jan  7 00:14:42 2016	(r293284)
@@ -221,7 +221,7 @@ assign_rxopt(struct tcpcb *tp, unsigned 
 		n = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
 	else
 		n = sizeof(struct ip) + sizeof(struct tcphdr);
-	tp->t_maxseg = tp->t_maxopd = sc->params.mtus[G_TCPOPT_MSS(opt)] - n;
+	tp->t_maxseg = sc->params.mtus[G_TCPOPT_MSS(opt)] - n;
 
 	CTR4(KTR_CXGBE, "%s: tid %d, mtu_idx %u (%u)", __func__, toep->tid,
 	    G_TCPOPT_MSS(opt), sc->params.mtus[G_TCPOPT_MSS(opt)]);
@@ -230,7 +230,6 @@ assign_rxopt(struct tcpcb *tp, unsigned 
 		tp->t_flags |= TF_RCVD_TSTMP;	/* timestamps ok */
 		tp->ts_recent = 0;		/* hmmm */
 		tp->ts_recent_age = tcp_ts_getticks();
-		tp->t_maxseg -= TCPOLEN_TSTAMP_APPA;
 	}
 
 	if (G_TCPOPT_SACK(opt))

Modified: head/sys/netinet/tcp_input.c
==============================================================================
--- head/sys/netinet/tcp_input.c	Wed Jan  6 22:02:08 2016	(r293283)
+++ head/sys/netinet/tcp_input.c	Thu Jan  7 00:14:42 2016	(r293284)
@@ -290,7 +290,7 @@ cc_ack_received(struct tcpcb *tp, struct
 	if (type == CC_ACK) {
 		if (tp->snd_cwnd > tp->snd_ssthresh) {
 			tp->t_bytes_acked += min(tp->ccv->bytes_this_ack,
-			     V_tcp_abc_l_var * tp->t_maxseg);
+			     V_tcp_abc_l_var * tcp_maxseg(tp));
 			if (tp->t_bytes_acked >= tp->snd_cwnd) {
 				tp->t_bytes_acked -= tp->snd_cwnd;
 				tp->ccv->flags |= CCF_ABC_SENTAWND;
@@ -313,11 +313,13 @@ cc_conn_init(struct tcpcb *tp)
 {
 	struct hc_metrics_lite metrics;
 	struct inpcb *inp = tp->t_inpcb;
+	u_int maxseg;
 	int rtt;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	tcp_hc_get(&inp->inp_inc, &metrics);
+	maxseg = tcp_maxseg(tp);
 
 	if (tp->t_srtt == 0 && (rtt = metrics.rmx_rtt)) {
 		tp->t_srtt = rtt;
@@ -342,7 +344,7 @@ cc_conn_init(struct tcpcb *tp)
 		 * the slow start threshhold, but set the
 		 * threshold to no less than 2*mss.
 		 */
-		tp->snd_ssthresh = max(2 * tp->t_maxseg, metrics.rmx_ssthresh);
+		tp->snd_ssthresh = max(2 * maxseg, metrics.rmx_ssthresh);
 		TCPSTAT_INC(tcps_usedssthresh);
 	}
 
@@ -359,21 +361,20 @@ cc_conn_init(struct tcpcb *tp)
 	 * requiring us to be cautious.
 	 */
 	if (tp->snd_cwnd == 1)
-		tp->snd_cwnd = tp->t_maxseg;		/* SYN(-ACK) lost */
+		tp->snd_cwnd = maxseg;		/* SYN(-ACK) lost */
 	else if (V_tcp_initcwnd_segments)
-		tp->snd_cwnd = min(V_tcp_initcwnd_segments * tp->t_maxseg,
-		    max(2 * tp->t_maxseg, V_tcp_initcwnd_segments * 1460));
+		tp->snd_cwnd = min(V_tcp_initcwnd_segments * maxseg,
+		    max(2 * maxseg, V_tcp_initcwnd_segments * 1460));
 	else if (V_tcp_do_rfc3390)
-		tp->snd_cwnd = min(4 * tp->t_maxseg,
-		    max(2 * tp->t_maxseg, 4380));
+		tp->snd_cwnd = min(4 * maxseg, max(2 * maxseg, 4380));
 	else {
 		/* Per RFC5681 Section 3.1 */
-		if (tp->t_maxseg > 2190)
-			tp->snd_cwnd = 2 * tp->t_maxseg;
-		else if (tp->t_maxseg > 1095)
-			tp->snd_cwnd = 3 * tp->t_maxseg;
+		if (maxseg > 2190)
+			tp->snd_cwnd = 2 * maxseg;
+		else if (maxseg > 1095)
+			tp->snd_cwnd = 3 * maxseg;
 		else
-			tp->snd_cwnd = 4 * tp->t_maxseg;
+			tp->snd_cwnd = 4 * maxseg;
 	}
 
 	if (CC_ALGO(tp)->conn_init != NULL)
@@ -383,6 +384,8 @@ cc_conn_init(struct tcpcb *tp)
 void inline
 cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type)
 {
+	u_int maxseg;
+
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	switch(type) {
@@ -402,12 +405,13 @@ cc_cong_signal(struct tcpcb *tp, struct 
 		}
 		break;
 	case CC_RTO:
+		maxseg = tcp_maxseg(tp);
 		tp->t_dupacks = 0;
 		tp->t_bytes_acked = 0;
 		EXIT_RECOVERY(tp->t_flags);
 		tp->snd_ssthresh = max(2, min(tp->snd_wnd, tp->snd_cwnd) / 2 /
-		    tp->t_maxseg) * tp->t_maxseg;
-		tp->snd_cwnd = tp->t_maxseg;
+		    maxseg) * maxseg;
+		tp->snd_cwnd = maxseg;
 		break;
 	case CC_RTO_ERR:
 		TCPSTAT_INC(tcps_sndrexmitbad);
@@ -469,13 +473,11 @@ tcp_signature_verify_input(struct mbuf *
  *	  the ack that opens up a 0-sized window.
  *	- LRO wasn't used for this segment. We make sure by checking that the
  *	  segment size is not larger than the MSS.
- *	- Delayed acks are enabled or this is a half-synchronized T/TCP
- *	  connection.
  */
 #define DELAY_ACK(tp, tlen)						\
 	((!tcp_timer_active(tp, TT_DELACK) &&				\
 	    (tp->t_flags & TF_RXWIN0SENT) == 0) &&			\
-	    (tlen <= tp->t_maxopd) &&					\
+	    (tlen <= tp->t_maxseg) &&					\
 	    (V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN)))
 
 static void inline
@@ -2481,6 +2483,9 @@ tcp_do_segment(struct mbuf *m, struct tc
 		hhook_run_tcp_est_in(tp, th, &to);
 
 		if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
+			u_int maxseg;
+
+			maxseg = tcp_maxseg(tp);
 			if (tlen == 0 &&
 			    (tiwin == tp->snd_wnd ||
 			    (tp->t_flags & TF_SACK_PERMIT))) {
@@ -2560,12 +2565,12 @@ tcp_do_segment(struct mbuf *m, struct tc
 								tp->sackhint.sack_bytes_rexmit;
 
 						if (awnd < tp->snd_ssthresh) {
-							tp->snd_cwnd += tp->t_maxseg;
+							tp->snd_cwnd += maxseg;
 							if (tp->snd_cwnd > tp->snd_ssthresh)
 								tp->snd_cwnd = tp->snd_ssthresh;
 						}
 					} else
-						tp->snd_cwnd += tp->t_maxseg;
+						tp->snd_cwnd += maxseg;
 					(void) tp->t_fb->tfb_tcp_output(tp);
 					goto drop;
 				} else if (tp->t_dupacks == tcprexmtthresh) {
@@ -2599,18 +2604,18 @@ tcp_do_segment(struct mbuf *m, struct tc
 						TCPSTAT_INC(
 						    tcps_sack_recovery_episode);
 						tp->sack_newdata = tp->snd_nxt;
-						tp->snd_cwnd = tp->t_maxseg;
+						tp->snd_cwnd = maxseg;
 						(void) tp->t_fb->tfb_tcp_output(tp);
 						goto drop;
 					}
 					tp->snd_nxt = th->th_ack;
-					tp->snd_cwnd = tp->t_maxseg;
+					tp->snd_cwnd = maxseg;
 					(void) tp->t_fb->tfb_tcp_output(tp);
 					KASSERT(tp->snd_limited <= 2,
 					    ("%s: tp->snd_limited too big",
 					    __func__));
 					tp->snd_cwnd = tp->snd_ssthresh +
-					     tp->t_maxseg *
+					     maxseg *
 					     (tp->t_dupacks - tp->snd_limited);
 					if (SEQ_GT(onxt, tp->snd_nxt))
 						tp->snd_nxt = onxt;
@@ -2641,7 +2646,7 @@ tcp_do_segment(struct mbuf *m, struct tc
 					tp->snd_cwnd =
 					    (tp->snd_nxt - tp->snd_una) +
 					    (tp->t_dupacks - tp->snd_limited) *
-					    tp->t_maxseg;
+					    maxseg;
 					/*
 					 * Only call tcp_output when there
 					 * is new data available to be sent.
@@ -2654,10 +2659,10 @@ tcp_do_segment(struct mbuf *m, struct tc
 					if (avail > 0)
 						(void) tp->t_fb->tfb_tcp_output(tp);
 					sent = tp->snd_max - oldsndmax;
-					if (sent > tp->t_maxseg) {
+					if (sent > maxseg) {
 						KASSERT((tp->t_dupacks == 2 &&
 						    tp->snd_limited == 0) ||
-						   (sent == tp->t_maxseg + 1 &&
+						   (sent == maxseg + 1 &&
 						    tp->t_flags & TF_SENTFIN),
 						    ("%s: sent too much",
 						    __func__));
@@ -3510,11 +3515,9 @@ tcp_xmit_timer(struct tcpcb *tp, int rtt
  * While looking at the routing entry, we also initialize other path-dependent
  * parameters from pre-set or cached values in the routing entry.
  *
- * Also take into account the space needed for options that we
- * send regularly.  Make maxseg shorter by that amount to assure
- * that we can send maxseg amount of data even when the options
- * are present.  Store the upper limit of the length of options plus
- * data in maxopd.
+ * NOTE that resulting t_maxseg doesn't include space for TCP options or
+ * IP options, e.g. IPSEC data, since length of this data may vary, and
+ * thus it is calculated for every segment separately in tcp_output().
  *
  * NOTE that this routine is only called when we process an incoming
  * segment, or an ICMP need fragmentation datagram. Outgoing SYN/ACK MSS
@@ -3528,7 +3531,6 @@ tcp_mss_update(struct tcpcb *tp, int off
 	u_long maxmtu = 0;
 	struct inpcb *inp = tp->t_inpcb;
 	struct hc_metrics_lite metrics;
-	int origoffer;
 #ifdef INET6
 	int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
 	size_t min_protoh = isipv6 ?
@@ -3544,13 +3546,12 @@ tcp_mss_update(struct tcpcb *tp, int off
 		KASSERT(offer == -1, ("%s: conflict", __func__));
 		offer = mtuoffer - min_protoh;
 	}
-	origoffer = offer;
 
 	/* Initialize. */
 #ifdef INET6
 	if (isipv6) {
 		maxmtu = tcp_maxmtu6(&inp->inp_inc, cap);
-		tp->t_maxopd = tp->t_maxseg = V_tcp_v6mssdflt;
+		tp->t_maxseg = V_tcp_v6mssdflt;
 	}
 #endif
 #if defined(INET) && defined(INET6)
@@ -3559,7 +3560,7 @@ tcp_mss_update(struct tcpcb *tp, int off
 #ifdef INET
 	{
 		maxmtu = tcp_maxmtu(&inp->inp_inc, cap);
-		tp->t_maxopd = tp->t_maxseg = V_tcp_mssdflt;
+		tp->t_maxseg = V_tcp_mssdflt;
 	}
 #endif
 
@@ -3583,9 +3584,9 @@ tcp_mss_update(struct tcpcb *tp, int off
 			/*
 			 * Offer == 0 means that there was no MSS on the SYN
 			 * segment, in this case we use tcp_mssdflt as
-			 * already assigned to t_maxopd above.
+			 * already assigned to t_maxseg above.
 			 */
-			offer = tp->t_maxopd;
+			offer = tp->t_maxseg;
 			break;
 
 		case -1:
@@ -3657,31 +3658,15 @@ tcp_mss_update(struct tcpcb *tp, int off
 	mss = min(mss, offer);
 
 	/*
-	 * Sanity check: make sure that maxopd will be large
+	 * Sanity check: make sure that maxseg will be large
 	 * enough to allow some data on segments even if the
 	 * all the option space is used (40bytes).  Otherwise
 	 * funny things may happen in tcp_output.
+	 *
+	 * XXXGL: shouldn't we reserve space for IP/IPv6 options?
 	 */
 	mss = max(mss, 64);
 
-	/*
-	 * maxopd stores the maximum length of data AND options
-	 * in a segment; maxseg is the amount of data in a normal
-	 * segment.  We need to store this value (maxopd) apart
-	 * from maxseg, because now every segment carries options
-	 * and thus we normally have somewhat less data in segments.
-	 */
-	tp->t_maxopd = mss;
-
-	/*
-	 * origoffer==-1 indicates that no segments were received yet.
-	 * In this case we just guess.
-	 */
-	if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
-	    (origoffer == -1 ||
-	     (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP))
-		mss -= TCPOLEN_TSTAMP_APPA;
-
 	tp->t_maxseg = mss;
 }
 
@@ -3804,7 +3789,8 @@ void
 tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th)
 {
 	tcp_seq onxt = tp->snd_nxt;
-	u_long  ocwnd = tp->snd_cwnd;
+	u_long ocwnd = tp->snd_cwnd;
+	u_int maxseg = tcp_maxseg(tp);
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
@@ -3815,7 +3801,7 @@ tcp_newreno_partial_ack(struct tcpcb *tp
 	 * Set snd_cwnd to one segment beyond acknowledged offset.
 	 * (tp->snd_una has not yet been updated when this function is called.)
 	 */
-	tp->snd_cwnd = tp->t_maxseg + BYTES_THIS_ACK(tp, th);
+	tp->snd_cwnd = maxseg + BYTES_THIS_ACK(tp, th);
 	tp->t_flags |= TF_ACKNOW;
 	(void) tp->t_fb->tfb_tcp_output(tp);
 	tp->snd_cwnd = ocwnd;
@@ -3829,7 +3815,7 @@ tcp_newreno_partial_ack(struct tcpcb *tp
 		tp->snd_cwnd -= BYTES_THIS_ACK(tp, th);
 	else
 		tp->snd_cwnd = 0;
-	tp->snd_cwnd += tp->t_maxseg;
+	tp->snd_cwnd += maxseg;
 }
 
 int

Modified: head/sys/netinet/tcp_output.c
==============================================================================
--- head/sys/netinet/tcp_output.c	Wed Jan  6 22:02:08 2016	(r293283)
+++ head/sys/netinet/tcp_output.c	Thu Jan  7 00:14:42 2016	(r293284)
@@ -830,11 +830,11 @@ send:
 
 	/*
 	 * Adjust data length if insertion of options will
-	 * bump the packet length beyond the t_maxopd length.
+	 * bump the packet length beyond the t_maxseg length.
 	 * Clear the FIN bit because we cut off the tail of
 	 * the segment.
 	 */
-	if (len + optlen + ipoptlen > tp->t_maxopd) {
+	if (len + optlen + ipoptlen > tp->t_maxseg) {
 		flags &= ~TH_FIN;
 
 		if (tso) {
@@ -937,7 +937,7 @@ send:
 			 * fractional unless the send sockbuf can be
 			 * emptied:
 			 */
-			max_len = (tp->t_maxopd - optlen);
+			max_len = (tp->t_maxseg - optlen);
 			if ((off + len) < sbavail(&so->so_snd)) {
 				moff = len % max_len;
 				if (moff != 0) {
@@ -967,7 +967,7 @@ send:
 				sendalot = 1;
 
 		} else {
-			len = tp->t_maxopd - optlen - ipoptlen;
+			len = tp->t_maxseg - optlen - ipoptlen;
 			sendalot = 1;
 		}
 	} else
@@ -1277,10 +1277,10 @@ send:
 	 * The TCP pseudo header checksum is always provided.
 	 */
 	if (tso) {
-		KASSERT(len > tp->t_maxopd - optlen,
+		KASSERT(len > tp->t_maxseg - optlen,
 		    ("%s: len <= tso_segsz", __func__));
 		m->m_pkthdr.csum_flags |= CSUM_TSO;
-		m->m_pkthdr.tso_segsz = tp->t_maxopd - optlen;
+		m->m_pkthdr.tso_segsz = tp->t_maxseg - optlen;
 	}
 
 #ifdef IPSEC
@@ -1348,7 +1348,7 @@ send:
 		 */
 		ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6));
 
-		if (V_path_mtu_discovery && tp->t_maxopd > V_tcp_minmss)
+		if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss)
 			tp->t_flags2 |= TF2_PLPMTU_PMTUD;
 		else
 			tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
@@ -1394,7 +1394,7 @@ send:
 	 *
 	 * NB: Don't set DF on small MTU/MSS to have a safe fallback.
 	 */
-	if (V_path_mtu_discovery && tp->t_maxopd > V_tcp_minmss) {
+	if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss) {
 		ip->ip_off |= htons(IP_DF);
 		tp->t_flags2 |= TF2_PLPMTU_PMTUD;
 	} else {

Modified: head/sys/netinet/tcp_subr.c
==============================================================================
--- head/sys/netinet/tcp_subr.c	Wed Jan  6 22:02:08 2016	(r293283)
+++ head/sys/netinet/tcp_subr.c	Thu Jan  7 00:14:42 2016	(r293284)
@@ -1087,7 +1087,7 @@ tcp_newtcpcb(struct inpcb *inp)
 #endif
 	tp->t_timers = &tm->tt;
 	/*	LIST_INIT(&tp->t_segq); */	/* XXX covered by M_ZERO */
-	tp->t_maxseg = tp->t_maxopd =
+	tp->t_maxseg =
 #ifdef INET6
 		isipv6 ? V_tcp_v6mssdflt :
 #endif /* INET6 */
@@ -1901,7 +1901,7 @@ tcp_ctlinput(int cmd, struct sockaddr *s
 					 * Only process the offered MTU if it
 					 * is smaller than the current one.
 					 */
-					if (mtu < tp->t_maxopd +
+					if (mtu < tp->t_maxseg +
 					    sizeof(struct tcpiphdr)) {
 						bzero(&inc, sizeof(inc));
 						inc.inc_faddr = faddr;
@@ -2283,6 +2283,59 @@ tcp_maxmtu6(struct in_conninfo *inc, str
 }
 #endif /* INET6 */
 
+/*
+ * Calculate effective SMSS per RFC5681 definition for a given TCP
+ * connection at its current state, taking into account SACK and etc.
+ */
+u_int
+tcp_maxseg(const struct tcpcb *tp)
+{
+	u_int optlen;
+
+	if (tp->t_flags & TF_NOOPT)
+		return (tp->t_maxseg);
+
+	/*
+	 * Here we have a simplified code from tcp_addoptions(),
+	 * without a proper loop, and having most of paddings hardcoded.
+	 * We might make mistakes with padding here in some edge cases,
+	 * but this is harmless, since result of tcp_maxseg() is used
+	 * only in cwnd and ssthresh estimations.
+	 */
+#define	PAD(len)	((((len) / 4) + !!((len) % 4)) * 4)
+	if (TCPS_HAVEESTABLISHED(tp->t_state)) {
+		if (tp->t_flags & TF_RCVD_TSTMP)
+			optlen = TCPOLEN_TSTAMP_APPA;
+		else
+			optlen = 0;
+#ifdef TCP_SIGNATURE
+		if (tp->t_flags & TF_SIGNATURE)
+			optlen += PAD(TCPOLEN_SIGNATURE);
+#endif
+		if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks > 0) {
+			optlen += TCPOLEN_SACKHDR;
+			optlen += tp->rcv_numsacks * TCPOLEN_SACK;
+			optlen = PAD(optlen);
+		}
+	} else {
+		if (tp->t_flags & TF_REQ_TSTMP)
+			optlen = TCPOLEN_TSTAMP_APPA;
+		else
+			optlen = PAD(TCPOLEN_MAXSEG);
+		if (tp->t_flags & TF_REQ_SCALE)
+			optlen += PAD(TCPOLEN_WINDOW);
+#ifdef TCP_SIGNATURE
+		if (tp->t_flags & TF_SIGNATURE)
+			optlen += PAD(TCPOLEN_SIGNATURE);
+#endif
+		if (tp->t_flags & TF_SACK_PERMIT)
+			optlen += PAD(TCPOLEN_SACK_PERMITTED);
+	}
+#undef PAD
+	optlen = min(optlen, TCP_MAXOLEN);
+	return (tp->t_maxseg - optlen);
+}
+
 #ifdef IPSEC
 /* compute ESP/AH header size for TCP, including outer IP header. */
 size_t

Modified: head/sys/netinet/tcp_timer.c
==============================================================================
--- head/sys/netinet/tcp_timer.c	Wed Jan  6 22:02:08 2016	(r293283)
+++ head/sys/netinet/tcp_timer.c	Thu Jan  7 00:14:42 2016	(r293284)
@@ -660,7 +660,6 @@ tcp_timer_rexmt(void * xtp)
 	 */
 	if (V_tcp_pmtud_blackhole_detect && (((tp->t_state == TCPS_ESTABLISHED))
 	    || (tp->t_state == TCPS_FIN_WAIT_1))) {
-		int optlen;
 #ifdef INET6
 		int isipv6;
 #endif
@@ -684,8 +683,7 @@ tcp_timer_rexmt(void * xtp)
 			tp->t_flags2 |= TF2_PLPMTU_BLACKHOLE;
 
 			/* Keep track of previous MSS. */
-			optlen = tp->t_maxopd - tp->t_maxseg;
-			tp->t_pmtud_saved_maxopd = tp->t_maxopd;
+			tp->t_pmtud_saved_maxseg = tp->t_maxseg;
 
 			/* 
 			 * Reduce the MSS to blackhole value or to the default
@@ -694,13 +692,13 @@ tcp_timer_rexmt(void * xtp)
 #ifdef INET6
 			isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) ? 1 : 0;
 			if (isipv6 &&
-			    tp->t_maxopd > V_tcp_v6pmtud_blackhole_mss) {
+			    tp->t_maxseg > V_tcp_v6pmtud_blackhole_mss) {
 				/* Use the sysctl tuneable blackhole MSS. */
-				tp->t_maxopd = V_tcp_v6pmtud_blackhole_mss;
+				tp->t_maxseg = V_tcp_v6pmtud_blackhole_mss;
 				V_tcp_pmtud_blackhole_activated++;
 			} else if (isipv6) {
 				/* Use the default MSS. */
-				tp->t_maxopd = V_tcp_v6mssdflt;
+				tp->t_maxseg = V_tcp_v6mssdflt;
 				/*
 				 * Disable Path MTU Discovery when we switch to
 				 * minmss.
@@ -713,13 +711,13 @@ tcp_timer_rexmt(void * xtp)
 			else
 #endif
 #ifdef INET
-			if (tp->t_maxopd > V_tcp_pmtud_blackhole_mss) {
+			if (tp->t_maxseg > V_tcp_pmtud_blackhole_mss) {
 				/* Use the sysctl tuneable blackhole MSS. */
-				tp->t_maxopd = V_tcp_pmtud_blackhole_mss;
+				tp->t_maxseg = V_tcp_pmtud_blackhole_mss;
 				V_tcp_pmtud_blackhole_activated++;
 			} else {
 				/* Use the default MSS. */
-				tp->t_maxopd = V_tcp_mssdflt;
+				tp->t_maxseg = V_tcp_mssdflt;
 				/*
 				 * Disable Path MTU Discovery when we switch to
 				 * minmss.
@@ -728,7 +726,6 @@ tcp_timer_rexmt(void * xtp)
 				V_tcp_pmtud_blackhole_activated_min_mss++;
 			}
 #endif
-			tp->t_maxseg = tp->t_maxopd - optlen;
 			/*
 			 * Reset the slow-start flight size
 			 * as it may depend on the new MSS.
@@ -748,9 +745,7 @@ tcp_timer_rexmt(void * xtp)
 			    (tp->t_rxtshift > 6)) {
 				tp->t_flags2 |= TF2_PLPMTU_PMTUD;
 				tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
-				optlen = tp->t_maxopd - tp->t_maxseg;
-				tp->t_maxopd = tp->t_pmtud_saved_maxopd;
-				tp->t_maxseg = tp->t_maxopd - optlen;
+				tp->t_maxseg = tp->t_pmtud_saved_maxseg;
 				V_tcp_pmtud_blackhole_failed++;
 				/*
 				 * Reset the slow-start flight size as it

Modified: head/sys/netinet/tcp_usrreq.c
==============================================================================
--- head/sys/netinet/tcp_usrreq.c	Wed Jan  6 22:02:08 2016	(r293283)
+++ head/sys/netinet/tcp_usrreq.c	Thu Jan  7 00:14:42 2016	(r293284)
@@ -904,8 +904,7 @@ tcp_usr_send(struct socket *so, int flag
 			/*
 			 * Do implied connect if not yet connected,
 			 * initialize window to default value, and
-			 * initialize maxseg/maxopd using peer's cached
-			 * MSS.
+			 * initialize maxseg using peer's cached MSS.
 			 */
 #ifdef INET6
 			if (isipv6)
@@ -964,8 +963,7 @@ tcp_usr_send(struct socket *so, int flag
 			/*
 			 * Do implied connect if not yet connected,
 			 * initialize window to default value, and
-			 * initialize maxseg/maxopd using peer's cached
-			 * MSS.
+			 * initialize maxseg using peer's cached MSS.
 			 */
 #ifdef INET6
 			if (isipv6)
@@ -2208,8 +2206,8 @@ db_print_tcpcb(struct tcpcb *tp, const c
 	    "0x%08x\n", tp->snd_ssthresh, tp->snd_recover);
 
 	db_print_indent(indent);
-	db_printf("t_maxopd: %u   t_rcvtime: %u   t_startime: %u\n",
-	    tp->t_maxopd, tp->t_rcvtime, tp->t_starttime);
+	db_printf("t_rcvtime: %u   t_startime: %u\n",
+	    tp->t_rcvtime, tp->t_starttime);
 
 	db_print_indent(indent);
 	db_printf("t_rttime: %u   t_rtsq: 0x%08x\n",

Modified: head/sys/netinet/tcp_var.h
==============================================================================
--- head/sys/netinet/tcp_var.h	Wed Jan  6 22:02:08 2016	(r293283)
+++ head/sys/netinet/tcp_var.h	Thu Jan  7 00:14:42 2016	(r293284)
@@ -180,8 +180,6 @@ struct tcpcb {
 	u_long	snd_spare2;		/* unused */
 	tcp_seq	snd_recover;		/* for use in NewReno Fast Recovery */
 
-	u_int	t_maxopd;		/* mss plus options */
-
 	u_int	t_rcvtime;		/* inactivity time */
 	u_int	t_starttime;		/* time connection was established */
 	u_int	t_rtttime;		/* RTT measurement start time */
@@ -192,6 +190,7 @@ struct tcpcb {
 
 	int	t_rxtcur;		/* current retransmit value (ticks) */
 	u_int	t_maxseg;		/* maximum segment size */
+	u_int	t_pmtud_saved_maxseg;	/* pre-blackhole MSS */
 	int	t_srtt;			/* smoothed round-trip time */
 	int	t_rttvar;		/* variance in round-trip time */
 
@@ -251,7 +250,6 @@ struct tcpcb {
 	u_int	t_tsomax;		/* TSO total burst length limit in bytes */
 	u_int	t_tsomaxsegcount;	/* TSO maximum segment count */
 	u_int	t_tsomaxsegsize;	/* TSO maximum segment size in bytes */
-	u_int	t_pmtud_saved_maxopd;	/* pre-blackhole MSS */
 	u_int	t_flags2;		/* More tcpcb flags storage */
 #if defined(_KERNEL) && defined(TCP_RFC7413)
 	uint32_t t_ispare[6];		/* 5 UTO, 1 TBD */
@@ -775,6 +773,7 @@ int tcp_default_ctloutput(struct socket 
 
 u_long	 tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *);
 u_long	 tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *);
+u_int	 tcp_maxseg(const struct tcpcb *);
 void	 tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *,
 	    struct tcp_ifcap *);
 void	 tcp_mss(struct tcpcb *, int);


More information about the svn-src-all mailing list