TCP and syncache question

Wed Nov 19 12:01:11 PST 2008

On 17 Nov 2008, at 22:40, Andre Oppermann wrote:
> This is a bit more complicated because of interactions with  
> tcp_input()
> where syncache_expand() is called from.
>
> The old code (as of December 2002) behaved slightly different.  It  
> would
> not remove the syncache entry when (SND.UNA == SEG.ACK) but send a  
> RST.
> The (RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND) test wasn't  
> done at
> all.  Instead a socket was opened whenever (SND.UNA == SEG.ACK)  
> succeeded.
> This gave way to the "LAND" DoS attack which was mostly fixed with a  
> test
> for (RCV.IRS < SEG.SEQ).
>
> See the attached patch for fixed version of syncache_expand().  This  
> patch
> is untested though.  My development machine is currently down.   
> Harti, Rui
> and Bjoern, please have a look at the patch and review it.
>
> -- 
> Andre
>
> --- tcp_input.c.1.390	Mon Nov 17 21:33:25 2008
> +++ tcp_input.c.1.390.mod	Mon Nov 17 21:35:22 2008
> @@ -642,10 +642,13 @@ findpcb:
> 			if (!syncache_expand(&inc, &to, th, &so, m)) {
> 				/*
> 				 * No syncache entry or ACK was not
> -				 * for our SYN/ACK.  Send a RST.
> +				 * for our SYN/ACK.  Send a RST or
> +				 * an ACK for re-synchronization.
> 				 * NB: syncache did its own logging
> 				 * of the failure cause.
> 				 */
> +				if (so == 1)
> +					goto dropunlock;
> 				rstreason = BANDLIM_RST_OPENPORT;
> 				goto dropwithreset;
> 			}
> --- tcp_syncache.c.1.160	Mon Nov 17 16:49:01 2008
> +++ tcp_syncache.c.1.160.mod	Mon Nov 17 23:35:39 2008
> @@ -817,59 +817,47 @@ syncache_expand(struct in_conninfo *inc,
> 	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
> 	KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK,
> 	    ("%s: can handle only ACK", __func__));
> +	*lsop = NULL;
>
> 	sc = syncache_lookup(inc, &sch);	/* returns locked sch */
> 	SCH_LOCK_ASSERT(sch);
> 	if (sc == NULL) {
> 		/*
> 		 * There is no syncache entry, so see if this ACK is
> -		 * a returning syncookie.  To do this, first:
> -		 *  A. See if this socket has had a syncache entry dropped in
> -		 *     the past.  We don't want to accept a bogus syncookie
> -		 *     if we've never received a SYN.
> -		 *  B. check that the syncookie is valid.  If it is, then
> -		 *     cobble up a fake syncache entry, and return.
> +		 * a returning syncookie.  If the syncookie is valid,
> +		 * cobble up a fake syncache entry and create a socket.
> +		 *
> +		 * NB: Syncache head is locked for the syncookie access.
> 		 */
> 		if (!tcp_syncookies) {
> -			SCH_UNLOCK(sch);
> 			if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
> 				log(LOG_DEBUG, "%s; %s: Spurious ACK, "
> 				    "segment rejected (syncookies disabled)\n",
> 				    s, __func__);
> -			goto failed;
> +			goto sendrst;
> 		}
> 		bzero(&scs, sizeof(scs));
> 		sc = syncookie_lookup(inc, sch, &scs, to, th, *lsop);
> -		SCH_UNLOCK(sch);
> 		if (sc == NULL) {
> 			if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
> 				log(LOG_DEBUG, "%s; %s: Segment failed "
> 				    "SYNCOOKIE authentication, segment rejected "
> 				    "(probably spoofed)\n", s, __func__);
> -			goto failed;
> +			goto sendrst;
> 		}
> -	} else {
> -		/* Pull out the entry to unlock the bucket row. */
> -		TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
> -		sch->sch_length--;
> -		V_tcp_syncache.cache_count--;
> -		SCH_UNLOCK(sch);
> +		goto expand;		/* fully validated through syncookie */
> 	}
> +	SCH_LOCK_ASSERT(sch);

Why do you need this assert?

>
>
> 	/*
> 	 * Segment validation:
> -	 * ACK must match our initial sequence number + 1 (the SYN|ACK).
> -	 */
> -	if (th->th_ack != sc->sc_iss + 1 && !TOEPCB_ISSET(sc)) {
> -		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
> -			log(LOG_DEBUG, "%s; %s: ACK %u != ISS+1 %u, segment "
> -			    "rejected\n", s, __func__, th->th_ack, sc->sc_iss);
> -		goto failed;
> -	}
> -
> -	/*
> +	 *
> 	 * The SEQ must fall in the window starting at the received
> 	 * initial receive sequence number + 1 (the SYN).
> +	 * If not the segment may be from an earlier connection.  We send
> +	 * an ACK to re-synchronize the connection and keep the syncache
> +	 * entry without ajusting its timers.
> +	 * See RFC793 page 69, first check sequence number [SYN_RECEIVED].
> 	 */
> 	if ((SEQ_LEQ(th->th_seq, sc->sc_irs) ||
> 	    SEQ_GT(th->th_seq, sc->sc_irs + sc->sc_wnd)) &&
> @@ -877,14 +865,41 @@ syncache_expand(struct in_conninfo *inc,
> 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
> 			log(LOG_DEBUG, "%s; %s: SEQ %u != IRS+1 %u, segment "
> 			    "rejected\n", s, __func__, th->th_seq, sc->sc_irs);
> -		goto failed;
> +		(void) syncache_respond(sc);
> +		*lsop = 1;			/* prevent RST */
> +		goto sendrstkeep;
> 	}
>
> +	/*
> +	 * ACK must match our initial sequence number + 1 (the SYN|ACK).
> +	 * If not the segment may be from an earlier connection.  We send
> +	 * a RST but keep the syncache entry without ajusting its timers.
> +	 * See RFC793 page 72, fifth check the ACK field, [SYN_RECEIVED].
> +	 */
> +	if (th->th_ack != sc->sc_iss + 1 && !TOEPCB_ISSET(sc)) {
> +		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
> +			log(LOG_DEBUG, "%s; %s: ACK %u != ISS+1 %u, segment "
> +			    "rejected\n", s, __func__, th->th_ack, sc->sc_iss);
> +		goto sendrstkeep;
> +	}
> +
> +	/*
> +	 * Remove the entry to unlock the bucket row.
> +	 * Tests from now on are fatal and remove the syncache entry.
> +	 */
> +	TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
> +	sch->sch_length--;
> +	V_tcp_syncache.cache_count--;
> +
> +	/*
> +	 * If timestamps were not negotiated they must not show up later.
> +	 * See RFC1312bis, section 1.3, second paragraph
> +	 */
> 	if (!(sc->sc_flags & SCF_TIMESTAMP) && (to->to_flags & TOF_TS)) {
> 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
> 			log(LOG_DEBUG, "%s; %s: Timestamp not expected, "
> 			    "segment rejected\n", s, __func__);
> -		goto failed;
> +		goto sendrst;
> 	}
> 	/*
> 	 * If timestamps were negotiated the reflected timestamp
> @@ -896,9 +911,11 @@ syncache_expand(struct in_conninfo *inc,
> 			log(LOG_DEBUG, "%s; %s: TSECR %u != TS %u, "
> 			    "segment rejected\n",
> 			    s, __func__, to->to_tsecr, sc->sc_ts);
> -		goto failed;
> +		goto sendrst;
> 	}
>
> +expand:
> +	SCH_UNLOCK(sch);
> 	*lsop = syncache_socket(sc, *lsop, m);
>
> 	if (*lsop == NULL)
> @@ -906,16 +923,18 @@ syncache_expand(struct in_conninfo *inc,
> 	else
> 		V_tcpstat.tcps_sc_completed++;
>
> -/* how do we find the inp for the new socket? */
> 	if (sc != &scs)
> 		syncache_free(sc);
> 	return (1);
> -failed:
> -	if (sc != NULL && sc != &scs)
> +
> +sendrst:
> +	if (sc != &scs)
> 		syncache_free(sc);
> +sendrstkeep:
> +	SCH_LOCK_ASSERT(sch);
> +	SCH_UNLOCK(sch);

Why do we need an assert before an unlock?

>
> 	if (s != NULL)
> 		free(s, M_TCPLOG);
> -	*lsop = NULL;
> 	return (0);
> }
>

This was probably out of scope:

> @@ -1322,6 +1341,8 @@ syncache_respond(struct syncache *sc)
> 		 *
> 		 *	1) path_mtu_discovery is disabled
> 		 *	2) the SCF_UNREACH flag has been set
> +		 *
> +		 * XXXAO: The route lookup comment doesn't make sense.
> 		 */
> 		if (V_path_mtu_discovery && ((sc->sc_flags & SCF_UNREACH) == 0))
> 		       ip->ip_off |= IP_DF;

I won't be able to test this any time soon, so I can't really comment  
on the rest, but it *looks* okay.

Regards,
--
Rui Paulo