svn commit: r333041 - head/sys/netinet

Randall Stewart rrs at FreeBSD.org
Thu Apr 26 21:41:17 UTC 2018


Author: rrs
Date: Thu Apr 26 21:41:16 2018
New Revision: 333041
URL: https://svnweb.freebsd.org/changeset/base/333041

Log:
  This change re-arranges the fields within the tcp-pcb so that
  they are more in order of cache line use as one passes
  through the tcp_input/output paths (non-errors most likely path). This
  helps speed up cache line optimization so that the tcp stack runs
  a bit more efficently.
  
  Sponsored by:	Netflix Inc.
  Differential Revision:	https://reviews.freebsd.org/D15136

Modified:
  head/sys/netinet/tcp_var.h

Modified: head/sys/netinet/tcp_var.h
==============================================================================
--- head/sys/netinet/tcp_var.h	Thu Apr 26 21:40:05 2018	(r333040)
+++ head/sys/netinet/tcp_var.h	Thu Apr 26 21:41:16 2018	(r333041)
@@ -83,125 +83,123 @@ STAILQ_HEAD(tcp_log_stailq, tcp_log_mem);
 
 /*
  * Tcp control block, one per tcp; fields:
- * Organized for 16 byte cacheline efficiency.
+ * Organized for 64 byte cacheline efficiency based
+ * on common tcp_input/tcp_output processing.
  */
 struct tcpcb {
-	struct	tsegqe_head t_segq;	/* segment reassembly queue */
-	int	t_segqlen;		/* segment reassembly queue length */
-	int	t_dupacks;		/* consecutive dup acks recd */
-
-	struct mbuf      *t_in_pkt;	/* head of the input packet queue for the tcp_hpts system */
-	struct mbuf	 *t_tail_pkt;	/* tail of the input packet queue for the tcp_hpts system */
-	struct tcp_timer *t_timers;	/* All the TCP timers in one struct */
-
+	/* Cache line 1 */
 	struct	inpcb *t_inpcb;		/* back pointer to internet pcb */
-	int	t_state;		/* state of this connection */
+	struct tcp_function_block *t_fb;/* TCP function call block */
+	void	*t_fb_ptr;		/* Pointer to t_fb specific data */
+	uint32_t t_maxseg:24,		/* maximum segment size */
+		t_logstate:8;		/* State of "black box" logging */
+	uint32_t t_state:4,		/* state of this connection */
+		bits_spare : 24;
 	u_int	t_flags;
-
-	struct	vnet *t_vnet;		/* back pointer to parent vnet */
-
 	tcp_seq	snd_una;		/* sent but unacknowledged */
 	tcp_seq	snd_max;		/* highest sequence number sent;
 					 * used to recognize retransmits
 					 */
 	tcp_seq	snd_nxt;		/* send next */
 	tcp_seq	snd_up;			/* send urgent pointer */
-
-	tcp_seq	snd_wl1;		/* window update seg seq number */
-	tcp_seq	snd_wl2;		/* window update seg ack number */
-	tcp_seq	iss;			/* initial send sequence number */
-	tcp_seq	irs;			/* initial receive sequence number */
-
+	uint32_t  snd_wnd;		/* send window */
+	uint32_t  snd_cwnd;		/* congestion-controlled window */
+	uint32_t cl1_spare; 		/* Spare to round out CL 1 */
+	/* Cache line 2 */
+	u_int32_t  ts_offset;		/* our timestamp offset */
+	u_int32_t	rfbuf_ts;	/* recv buffer autoscaling timestamp */
+	int	rcv_numsacks;		/* # distinct sack blks present */
+	u_int	t_tsomax;		/* TSO total burst length limit in bytes */
+	u_int	t_tsomaxsegcount;	/* TSO maximum segment count */
+	u_int	t_tsomaxsegsize;	/* TSO maximum segment size in bytes */
 	tcp_seq	rcv_nxt;		/* receive next */
 	tcp_seq	rcv_adv;		/* advertised window */
 	uint32_t  rcv_wnd;		/* receive window */
+	u_int	t_flags2;		/* More tcpcb flags storage */
+	int	t_srtt;			/* smoothed round-trip time */
+	int	t_rttvar;		/* variance in round-trip time */
+	u_int32_t  ts_recent;		/* timestamp echo data */
+	u_char	snd_scale;		/* window scaling for send window */
+	u_char	rcv_scale;		/* window scaling for recv window */
+	u_char	snd_limited;		/* segments limited transmitted */
+	u_char	request_r_scale;	/* pending window scaling */
+	tcp_seq	last_ack_sent;
+	u_int	t_rcvtime;		/* inactivity time */
+	/* Cache line 3 */
 	tcp_seq	rcv_up;			/* receive urgent pointer */
-
-	uint32_t  snd_wnd;		/* send window */
-	uint32_t  snd_cwnd;		/* congestion-controlled window */
+	int	t_segqlen;		/* segment reassembly queue length */
+	struct	tsegqe_head t_segq;	/* segment reassembly queue */
+	struct mbuf      *t_in_pkt;
+	struct mbuf	 *t_tail_pkt;
+	struct tcp_timer *t_timers;	/* All the TCP timers in one struct */
+	struct	vnet *t_vnet;		/* back pointer to parent vnet */
 	uint32_t  snd_ssthresh;		/* snd_cwnd size threshold for
 					 * for slow start exponential to
 					 * linear switch
 					 */
+	tcp_seq	snd_wl1;		/* window update seg seq number */
+	/* Cache line 4 */
+	tcp_seq	snd_wl2;		/* window update seg ack number */
+
+	tcp_seq	irs;			/* initial receive sequence number */
+	tcp_seq	iss;		        /* initial send sequence number */
+	u_int   t_acktime;
+	u_int	ts_recent_age;		/* when last updated */
 	tcp_seq	snd_recover;		/* for use in NewReno Fast Recovery */
+	uint16_t cl4_spare;		/* Spare to adjust CL 4 */
+	char	t_oobflags;		/* have some */
+	char	t_iobc;			/* input character */
+	int	t_rxtcur;		/* current retransmit value (ticks) */
 
-	u_int	t_rcvtime;		/* inactivity time */
-	u_int	t_starttime;		/* time connection was established */
+	int	t_rxtshift;		/* log(2) of rexmt exp. backoff */
 	u_int	t_rtttime;		/* RTT measurement start time */
+
 	tcp_seq	t_rtseq;		/* sequence number being timed */
+	u_int	t_starttime;		/* time connection was established */
 
-	int	t_rxtcur;		/* current retransmit value (ticks) */
-	u_int	t_maxseg;		/* maximum segment size */
 	u_int	t_pmtud_saved_maxseg;	/* pre-blackhole MSS */
-	int	t_srtt;			/* smoothed round-trip time */
-	int	t_rttvar;		/* variance in round-trip time */
-
-	int	t_rxtshift;		/* log(2) of rexmt exp. backoff */
 	u_int	t_rttmin;		/* minimum rtt allowed */
+
 	u_int	t_rttbest;		/* best rtt we've seen */
-	u_long	t_rttupdated;		/* number of times rtt sampled */
-	uint32_t  max_sndwnd;		/* largest window peer has offered */
 
 	int	t_softerror;		/* possible error not yet reported */
-/* out-of-band data */
-	char	t_oobflags;		/* have some */
-	char	t_iobc;			/* input character */
-/* RFC 1323 variables */
-	u_char	snd_scale;		/* window scaling for send window */
-	u_char	rcv_scale;		/* window scaling for recv window */
-	u_char	request_r_scale;	/* pending window scaling */
-	u_int32_t  ts_recent;		/* timestamp echo data */
-	u_int	ts_recent_age;		/* when last updated */
-	u_int32_t  ts_offset;		/* our timestamp offset */
-
-	tcp_seq	last_ack_sent;
-/* experimental */
+	uint32_t  max_sndwnd;		/* largest window peer has offered */
+	/* Cache line 5 */
 	uint32_t  snd_cwnd_prev;	/* cwnd prior to retransmit */
 	uint32_t  snd_ssthresh_prev;	/* ssthresh prior to retransmit */
 	tcp_seq	snd_recover_prev;	/* snd_recover prior to retransmit */
 	int	t_sndzerowin;		/* zero-window updates sent */
-	u_int	t_badrxtwin;		/* window for retransmit recovery */
-	u_char	snd_limited;		/* segments limited transmitted */
-/* SACK related state */
+	u_long	t_rttupdated;		/* number of times rtt sampled */
 	int	snd_numholes;		/* number of holes seen by sender */
+	u_int	t_badrxtwin;		/* window for retransmit recovery */
 	TAILQ_HEAD(sackhole_head, sackhole) snd_holes;
 					/* SACK scoreboard (sorted) */
 	tcp_seq	snd_fack;		/* last seq number(+1) sack'd by rcv'r*/
-	int	rcv_numsacks;		/* # distinct sack blks present */
-	struct sackblk sackblks[MAX_SACK_BLKS]; /* seq nos. of sack blocks */
 	tcp_seq sack_newdata;		/* New data xmitted in this recovery
 					   episode starts at this seq number */
+	struct sackblk sackblks[MAX_SACK_BLKS]; /* seq nos. of sack blocks */
 	struct sackhint	sackhint;	/* SACK scoreboard hint */
 	int	t_rttlow;		/* smallest observerved RTT */
-	u_int32_t	rfbuf_ts;	/* recv buffer autoscaling timestamp */
 	int	rfbuf_cnt;		/* recv buffer autoscaling byte count */
 	struct toedev	*tod;		/* toedev handling this connection */
 	int	t_sndrexmitpack;	/* retransmit packets sent */
 	int	t_rcvoopack;		/* out-of-order packets received */
 	void	*t_toe;			/* TOE pcb pointer */
-	int	t_bytes_acked;		/* # bytes acked during current RTT */
 	struct cc_algo	*cc_algo;	/* congestion control algorithm */
 	struct cc_var	*ccv;		/* congestion control specific vars */
 	struct osd	*osd;		/* storage for Khelp module data */
-
+	int	t_bytes_acked;		/* # bytes acked during current RTT */
 	u_int	t_keepinit;		/* time to establish connection */
 	u_int	t_keepidle;		/* time before keepalive probes begin */
 	u_int	t_keepintvl;		/* interval between keepalives */
 	u_int	t_keepcnt;		/* number of keepalives before close */
-
-	u_int	t_tsomax;		/* TSO total burst length limit in bytes */
-	u_int	t_tsomaxsegcount;	/* TSO maximum segment count */
-	u_int	t_tsomaxsegsize;	/* TSO maximum segment size in bytes */
-	u_int	t_flags2;		/* More tcpcb flags storage */
-	int	t_logstate;		/* State of "black box" logging */
-	struct tcp_log_stailq t_logs;	/* Log buffer */
+	int	t_dupacks;		/* consecutive dup acks recd */
 	int	t_lognum;		/* Number of log entries */
-	uint32_t t_logsn;		/* Log "serial number" */
+	struct tcp_log_stailq t_logs;	/* Log buffer */
 	struct tcp_log_id_node *t_lin;
 	struct tcp_log_id_bucket *t_lib;
 	const char *t_output_caller;	/* Function that called tcp_output */
-	struct tcp_function_block *t_fb;/* TCP function call block */
-	void	*t_fb_ptr;		/* Pointer to t_fb specific data */
+	uint32_t t_logsn;		/* Log "serial number" */
 	uint8_t t_tfo_client_cookie_len; /* TCP Fast Open client cookie length */
 	unsigned int *t_tfo_pending;	/* TCP Fast Open server pending counter */
 	union {


More information about the svn-src-all mailing list