svn commit: r232945 - in stable/9: share/man/man4 sys/i386/conf sys/netinet sys/sys

Gleb Smirnoff glebius at FreeBSD.org
Tue Mar 13 20:37:58 UTC 2012


Author: glebius
Date: Tue Mar 13 20:37:57 2012
New Revision: 232945
URL: http://svn.freebsd.org/changeset/base/232945

Log:
  Merge 231025 from head:
    Add new socket options: TCP_KEEPINIT, TCP_KEEPIDLE, TCP_KEEPINTVL and
    TCP_KEEPCNT, that allow to control initial timeout, idle time, idle
    re-send interval and idle send count on a per-socket basis.
  
    Reviewed by:  andre, bz, lstewart

Modified:
  stable/9/share/man/man4/tcp.4
  stable/9/sys/netinet/tcp.h
  stable/9/sys/netinet/tcp_input.c
  stable/9/sys/netinet/tcp_syncache.c
  stable/9/sys/netinet/tcp_timer.c
  stable/9/sys/netinet/tcp_timer.h
  stable/9/sys/netinet/tcp_usrreq.c
  stable/9/sys/netinet/tcp_var.h
  stable/9/sys/sys/param.h
Directory Properties:
  stable/9/share/man/   (props changed)
  stable/9/share/man/man4/   (props changed)
  stable/9/share/man/man5/   (props changed)
  stable/9/share/man/man7/   (props changed)
  stable/9/share/man/man9/   (props changed)
  stable/9/sys/   (props changed)
  stable/9/sys/amd64/include/xen/   (props changed)
  stable/9/sys/boot/   (props changed)
  stable/9/sys/boot/i386/efi/   (props changed)
  stable/9/sys/boot/ia64/efi/   (props changed)
  stable/9/sys/boot/ia64/ski/   (props changed)
  stable/9/sys/boot/powerpc/boot1.chrp/   (props changed)
  stable/9/sys/boot/powerpc/ofw/   (props changed)
  stable/9/sys/cddl/contrib/opensolaris/   (props changed)
  stable/9/sys/conf/   (props changed)
  stable/9/sys/contrib/dev/acpica/   (props changed)
  stable/9/sys/contrib/octeon-sdk/   (props changed)
  stable/9/sys/contrib/pf/   (props changed)
  stable/9/sys/contrib/x86emu/   (props changed)
  stable/9/sys/i386/conf/XENHVM   (props changed)

Modified: stable/9/share/man/man4/tcp.4
==============================================================================
--- stable/9/share/man/man4/tcp.4	Tue Mar 13 20:37:08 2012	(r232944)
+++ stable/9/share/man/man4/tcp.4	Tue Mar 13 20:37:57 2012	(r232945)
@@ -38,7 +38,7 @@
 .\"     From: @(#)tcp.4	8.1 (Berkeley) 6/5/93
 .\" $FreeBSD$
 .\"
-.Dd September 15, 2011
+.Dd February 5, 2012
 .Dt TCP 4
 .Os
 .Sh NAME
@@ -146,6 +146,65 @@ connection.
 See
 .Xr mod_cc 4
 for details.
+.It Dv TCP_KEEPINIT
+This write-only 
+.Xr setsockopt 2
+option accepts a per-socket timeout argument of
+.Vt "u_int"
+in seconds, for new, non-established
+.Tn TCP
+connections.
+For the global default in milliseconds see
+.Va keepinit
+in the
+.Sx MIB Variables
+section further down.
+.It Dv TCP_KEEPIDLE
+This write-only 
+.Xr setsockopt 2
+option accepts an argument of
+.Vt "u_int"
+for the amount of time, in seconds, that the connection must be idle
+before keepalive probes (if enabled) are sent for the connection of this
+socket.
+If set on a listening socket, the value is inherited by the newly created
+socket upon
+.Xr accept 2 .
+For the global default in milliseconds see
+.Va keepidle
+in the
+.Sx MIB Variables
+section further down.
+.It Dv TCP_KEEPINTVL
+This write-only 
+.Xr setsockopt 2
+option accepts an argument of
+.Vt "u_int"
+to set the per-socket interval, in seconds, between keepalive probes sent
+to a peer.
+If set on a listening socket, the value is inherited by the newly created
+socket upon
+.Xr accept 2 .
+For the global default in milliseconds see
+.Va keepintvl
+in the
+.Sx MIB Variables
+section further down.
+.It Dv TCP_KEEPCNT
+This write-only 
+.Xr setsockopt 2
+option accepts an argument of
+.Vt "u_int"
+and allows a per-socket tuning of the number of probes sent, with no response,
+before the connection will be dropped.
+If set on a listening socket, the value is inherited by the newly created
+socket upon
+.Xr accept 2 .
+For the global default see the
+.Va keepcnt
+in the
+.Sx MIB Variables
+section further down.
 .It Dv TCP_NODELAY
 Under most circumstances,
 .Tn TCP
@@ -304,17 +363,21 @@ The Maximum Segment Lifetime, in millise
 Timeout, in milliseconds, for new, non-established
 .Tn TCP
 connections.
+The default is 75000 msec.
 .It Va keepidle
 Amount of time, in milliseconds, that the connection must be idle
 before keepalive probes (if enabled) are sent.
+The default is 7200000 msec (2 hours).
 .It Va keepintvl
 The interval, in milliseconds, between keepalive probes sent to remote
 machines, when no response is received on a
 .Va keepidle
 probe.
-After
-.Dv TCPTV_KEEPCNT
-(default 8) probes are sent, with no response, the connection is dropped.
+The default is 75000 msec.
+.It Va keepcnt
+Number of probes sent, with no response, before a connection
+is dropped.
+The default is 8 packets.
 .It Va always_keepalive
 Assume that
 .Dv SO_KEEPALIVE

Modified: stable/9/sys/netinet/tcp.h
==============================================================================
--- stable/9/sys/netinet/tcp.h	Tue Mar 13 20:37:08 2012	(r232944)
+++ stable/9/sys/netinet/tcp.h	Tue Mar 13 20:37:57 2012	(r232945)
@@ -159,6 +159,10 @@ struct tcphdr {
 #define TCP_MD5SIG	0x10	/* use MD5 digests (RFC2385) */
 #define	TCP_INFO	0x20	/* retrieve tcp_info structure */
 #define	TCP_CONGESTION	0x40	/* get/set congestion control algorithm */
+#define	TCP_KEEPINIT	0x80	/* N, time to establish connection */
+#define	TCP_KEEPIDLE	0x100	/* L,N,X start keeplives after this period */
+#define	TCP_KEEPINTVL	0x200	/* L,N interval between keepalives */
+#define	TCP_KEEPCNT	0x400	/* L,N number of keepalives before close */
 
 #define	TCP_CA_NAME_MAX	16	/* max congestion control name length */
 

Modified: stable/9/sys/netinet/tcp_input.c
==============================================================================
--- stable/9/sys/netinet/tcp_input.c	Tue Mar 13 20:37:08 2012	(r232944)
+++ stable/9/sys/netinet/tcp_input.c	Tue Mar 13 20:37:57 2012	(r232945)
@@ -1477,7 +1477,7 @@ tcp_do_segment(struct mbuf *m, struct tc
 	 */
 	tp->t_rcvtime = ticks;
 	if (TCPS_HAVEESTABLISHED(tp->t_state))
-		tcp_timer_activate(tp, TT_KEEP, tcp_keepidle);
+		tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
 
 	/*
 	 * Unscale the window into a 32-bit value.
@@ -1920,7 +1920,8 @@ tcp_do_segment(struct mbuf *m, struct tc
 			} else {
 				tp->t_state = TCPS_ESTABLISHED;
 				cc_conn_init(tp);
-				tcp_timer_activate(tp, TT_KEEP, tcp_keepidle);
+				tcp_timer_activate(tp, TT_KEEP,
+				    TP_KEEPIDLE(tp));
 			}
 		} else {
 			/*
@@ -2324,7 +2325,7 @@ tcp_do_segment(struct mbuf *m, struct tc
 		} else {
 			tp->t_state = TCPS_ESTABLISHED;
 			cc_conn_init(tp);
-			tcp_timer_activate(tp, TT_KEEP, tcp_keepidle);
+			tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
 		}
 		/*
 		 * If segment contains data or ACK, will call tcp_reass()
@@ -2661,12 +2662,11 @@ process_ACK:
 				 * compressed state.
 				 */
 				if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
-					int timeout;
-
 					soisdisconnected(so);
-					timeout = (tcp_fast_finwait2_recycle) ? 
-						tcp_finwait2_timeout : tcp_maxidle;
-					tcp_timer_activate(tp, TT_2MSL, timeout);
+					tcp_timer_activate(tp, TT_2MSL,
+					    (tcp_fast_finwait2_recycle ?
+					    tcp_finwait2_timeout :
+					    TP_MAXIDLE(tp)));
 				}
 				tp->t_state = TCPS_FIN_WAIT_2;
 			}

Modified: stable/9/sys/netinet/tcp_syncache.c
==============================================================================
--- stable/9/sys/netinet/tcp_syncache.c	Tue Mar 13 20:37:08 2012	(r232944)
+++ stable/9/sys/netinet/tcp_syncache.c	Tue Mar 13 20:37:57 2012	(r232945)
@@ -844,7 +844,15 @@ syncache_socket(struct syncache *sc, str
 	 */
 	if (sc->sc_rxmits > 1)
 		tp->snd_cwnd = tp->t_maxseg;
-	tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
+
+	/*
+	 * Copy and activate timers.
+	 */
+	tp->t_keepinit = sototcpcb(lso)->t_keepinit;
+	tp->t_keepidle = sototcpcb(lso)->t_keepidle;
+	tp->t_keepintvl = sototcpcb(lso)->t_keepintvl;
+	tp->t_keepcnt = sototcpcb(lso)->t_keepcnt;
+	tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
 
 	INP_WUNLOCK(inp);
 

Modified: stable/9/sys/netinet/tcp_timer.c
==============================================================================
--- stable/9/sys/netinet/tcp_timer.c	Tue Mar 13 20:37:08 2012	(r232944)
+++ stable/9/sys/netinet/tcp_timer.c	Tue Mar 13 20:37:57 2012	(r232945)
@@ -111,12 +111,12 @@ int    tcp_finwait2_timeout;
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout");
 
+int	tcp_keepcnt = TCPTV_KEEPCNT;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0,
+    "Number of keepalive probes to send");
 
-static int	tcp_keepcnt = TCPTV_KEEPCNT;
 	/* max idle probes */
 int	tcp_maxpersistidle;
-	/* max idle time in persist */
-int	tcp_maxidle;
 
 static int	per_cpu_timers = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
@@ -138,7 +138,6 @@ tcp_slowtimo(void)
 	VNET_LIST_RLOCK_NOSLEEP();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
-		tcp_maxidle = tcp_keepcnt * tcp_keepintvl;
 		INP_INFO_WLOCK(&V_tcbinfo);
 		(void) tcp_tw_2msl_scan(0);
 		INP_INFO_WUNLOCK(&V_tcbinfo);
@@ -255,9 +254,9 @@ tcp_timer_2msl(void *xtp)
 		tp = tcp_close(tp);             
 	} else {
 		if (tp->t_state != TCPS_TIME_WAIT &&
-		   ticks - tp->t_rcvtime <= tcp_maxidle)
-		       callout_reset_on(&tp->t_timers->tt_2msl, tcp_keepintvl,
-			   tcp_timer_2msl, tp, INP_CPU(inp));
+		   ticks - tp->t_rcvtime <= TP_MAXIDLE(tp))
+		       callout_reset_on(&tp->t_timers->tt_2msl,
+			   TP_KEEPINTVL(tp), tcp_timer_2msl, tp, INP_CPU(inp));
 	       else
 		       tp = tcp_close(tp);
        }
@@ -318,7 +317,7 @@ tcp_timer_keep(void *xtp)
 		goto dropit;
 	if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
 	    tp->t_state <= TCPS_CLOSING) {
-		if (ticks - tp->t_rcvtime >= tcp_keepidle + tcp_maxidle)
+		if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
 			goto dropit;
 		/*
 		 * Send a packet designed to force a response
@@ -340,9 +339,11 @@ tcp_timer_keep(void *xtp)
 				    tp->rcv_nxt, tp->snd_una - 1, 0);
 			free(t_template, M_TEMP);
 		}
-		callout_reset_on(&tp->t_timers->tt_keep, tcp_keepintvl, tcp_timer_keep, tp, INP_CPU(inp));
+		callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
+		    tcp_timer_keep, tp, INP_CPU(inp));
 	} else
-		callout_reset_on(&tp->t_timers->tt_keep, tcp_keepidle, tcp_timer_keep, tp, INP_CPU(inp));
+		callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
+		    tcp_timer_keep, tp, INP_CPU(inp));
 
 #ifdef TCPDEBUG
 	if (inp->inp_socket->so_options & SO_DEBUG)

Modified: stable/9/sys/netinet/tcp_timer.h
==============================================================================
--- stable/9/sys/netinet/tcp_timer.h	Tue Mar 13 20:37:08 2012	(r232944)
+++ stable/9/sys/netinet/tcp_timer.h	Tue Mar 13 20:37:57 2012	(r232945)
@@ -153,10 +153,16 @@ struct tcp_timer {
 #define TT_KEEP		0x08
 #define TT_2MSL		0x10
 
+#define	TP_KEEPINIT(tp)	((tp)->t_keepinit ? (tp)->t_keepinit : tcp_keepinit)
+#define	TP_KEEPIDLE(tp)	((tp)->t_keepidle ? (tp)->t_keepidle : tcp_keepidle)
+#define	TP_KEEPINTVL(tp) ((tp)->t_keepintvl ? (tp)->t_keepintvl : tcp_keepintvl)
+#define	TP_KEEPCNT(tp)	((tp)->t_keepcnt ? (tp)->t_keepcnt : tcp_keepcnt)
+#define	TP_MAXIDLE(tp)	(TP_KEEPCNT(tp) * TP_KEEPINTVL(tp))
+
 extern int tcp_keepinit;		/* time to establish connection */
 extern int tcp_keepidle;		/* time before keepalive probes begin */
 extern int tcp_keepintvl;		/* time between keepalive probes */
-extern int tcp_maxidle;			/* time to drop after starting probes */
+extern int tcp_keepcnt;			/* number of keepalives */
 extern int tcp_delacktime;		/* time before sending a delayed ACK */
 extern int tcp_maxpersistidle;
 extern int tcp_rexmit_min;

Modified: stable/9/sys/netinet/tcp_usrreq.c
==============================================================================
--- stable/9/sys/netinet/tcp_usrreq.c	Tue Mar 13 20:37:08 2012	(r232944)
+++ stable/9/sys/netinet/tcp_usrreq.c	Tue Mar 13 20:37:57 2012	(r232945)
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
@@ -1118,7 +1119,7 @@ tcp_connect(struct tcpcb *tp, struct soc
 	soisconnecting(so);
 	TCPSTAT_INC(tcps_connattempt);
 	tp->t_state = TCPS_SYN_SENT;
-	tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
+	tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
 	tp->iss = tcp_new_isn(tp);
 	tcp_sendseqinit(tp);
 
@@ -1191,7 +1192,7 @@ tcp6_connect(struct tcpcb *tp, struct so
 	soisconnecting(so);
 	TCPSTAT_INC(tcps_connattempt);
 	tp->t_state = TCPS_SYN_SENT;
-	tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
+	tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
 	tp->iss = tcp_new_isn(tp);
 	tcp_sendseqinit(tp);
 
@@ -1272,6 +1273,7 @@ int
 tcp_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	int	error, opt, optval;
+	u_int	ui;
 	struct	inpcb *inp;
 	struct	tcpcb *tp;
 	struct	tcp_info ti;
@@ -1439,6 +1441,59 @@ tcp_ctloutput(struct socket *so, struct 
 			INP_WUNLOCK(inp);
 			break;
 
+		case TCP_KEEPIDLE:
+		case TCP_KEEPINTVL:
+		case TCP_KEEPCNT:
+		case TCP_KEEPINIT:
+			INP_WUNLOCK(inp);
+			error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
+			if (error)
+				return (error);
+
+			if (ui > (UINT_MAX / hz)) {
+				error = EINVAL;
+				break;
+			}
+			ui *= hz;
+
+			INP_WLOCK_RECHECK(inp);
+			switch (sopt->sopt_name) {
+			case TCP_KEEPIDLE:
+				tp->t_keepidle = ui;
+				/*
+				 * XXX: better check current remaining
+				 * timeout and "merge" it with new value.
+				 */
+				if ((tp->t_state > TCPS_LISTEN) &&
+				    (tp->t_state <= TCPS_CLOSING))
+					tcp_timer_activate(tp, TT_KEEP,
+					    TP_KEEPIDLE(tp));
+				break;
+			case TCP_KEEPINTVL:
+				tp->t_keepintvl = ui;
+				if ((tp->t_state == TCPS_FIN_WAIT_2) &&
+				    (TP_MAXIDLE(tp) > 0))
+					tcp_timer_activate(tp, TT_2MSL,
+					    TP_MAXIDLE(tp));
+				break;
+			case TCP_KEEPCNT:
+				tp->t_keepcnt = ui;
+				if ((tp->t_state == TCPS_FIN_WAIT_2) &&
+				    (TP_MAXIDLE(tp) > 0))
+					tcp_timer_activate(tp, TT_2MSL,
+					    TP_MAXIDLE(tp));
+				break;
+			case TCP_KEEPINIT:
+				tp->t_keepinit = ui;
+				if (tp->t_state == TCPS_SYN_RECEIVED ||
+				    tp->t_state == TCPS_SYN_SENT)
+					tcp_timer_activate(tp, TT_KEEP,
+					    TP_KEEPINIT(tp));
+				break;
+			}
+			INP_WUNLOCK(inp);
+			break;
+
 		default:
 			INP_WUNLOCK(inp);
 			error = ENOPROTOOPT;
@@ -1648,7 +1703,7 @@ tcp_usrclosed(struct tcpcb *tp)
 			int timeout;
 
 			timeout = (tcp_fast_finwait2_recycle) ? 
-			    tcp_finwait2_timeout : tcp_maxidle;
+			    tcp_finwait2_timeout : TP_MAXIDLE(tp);
 			tcp_timer_activate(tp, TT_2MSL, timeout);
 		}
 	}

Modified: stable/9/sys/netinet/tcp_var.h
==============================================================================
--- stable/9/sys/netinet/tcp_var.h	Tue Mar 13 20:37:08 2012	(r232944)
+++ stable/9/sys/netinet/tcp_var.h	Tue Mar 13 20:37:57 2012	(r232945)
@@ -203,7 +203,12 @@ struct tcpcb {
 	struct cc_var	*ccv;		/* congestion control specific vars */
 	struct osd	*osd;		/* storage for Khelp module data */
 
-	uint32_t t_ispare[12];		/* 4 keep timers, 5 UTO, 3 TBD */
+	u_int	t_keepinit;		/* time to establish connection */
+	u_int	t_keepidle;		/* time before keepalive probes begin */
+	u_int	t_keepintvl;		/* interval between keepalives */
+	u_int	t_keepcnt;		/* number of keepalives before close */
+
+	uint32_t t_ispare[8];		/* 5 UTO, 3 TBD */
 	void	*t_pspare2[4];		/* 4 TBD */
 	uint64_t _pad[6];		/* 6 TBD (1-2 CC/RTT?) */
 };

Modified: stable/9/sys/sys/param.h
==============================================================================
--- stable/9/sys/sys/param.h	Tue Mar 13 20:37:08 2012	(r232944)
+++ stable/9/sys/sys/param.h	Tue Mar 13 20:37:57 2012	(r232945)
@@ -58,7 +58,7 @@
  *		in the range 5 to 9.
  */
 #undef __FreeBSD_version
-#define __FreeBSD_version 900504	/* Master, propagated to newvers */
+#define __FreeBSD_version 900505	/* Master, propagated to newvers */
 
 /*
  * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,


More information about the svn-src-stable-9 mailing list