git: 4747500deaaa - main - tcp: A better fix for the previously attempted fix of the ack-war issue with tcp.

Kevin Bowling kevin.bowling at kev009.com
Fri Jun 4 17:25:53 UTC 2021


Will you MFC this?

On Fri, Jun 4, 2021 at 2:29 AM Randall Stewart <rrs at freebsd.org> wrote:
>
> The branch main has been updated by rrs:
>
> URL: https://cgit.FreeBSD.org/src/commit/?id=4747500deaaa7765ba1c0413197c23ddba4faf49
>
> commit 4747500deaaa7765ba1c0413197c23ddba4faf49
> Author:     Randall Stewart <rrs at FreeBSD.org>
> AuthorDate: 2021-06-04 09:26:43 +0000
> Commit:     Randall Stewart <rrs at FreeBSD.org>
> CommitDate: 2021-06-04 09:26:43 +0000
>
>     tcp: A better fix for the previously attempted fix of the ack-war issue with tcp.
>
>     So it turns out that my fix before was not correct. It ended with us failing
>     some of the "improved" SYN tests, since we are not in the correct states.
>     With more digging I have figured out the root of the problem is that when
>     we receive a SYN|FIN the reassembly code made it so we create a segq entry
>     to hold the FIN. In the established state where we were not in order this
>     would be correct i.e. a 0 len with a FIN would need to be accepted. But
>     if you are in a front state we need to strip the FIN so we correctly handle
>     the ACK but ignore the FIN. This gets us into the proper states
>     and avoids the previous ack war.
>
>     I back out some of the previous changes but then add a new change
>     here in tcp_reass() that fixes the root cause of the issue. We still
>     leave the rack panic fixes in place however.
>
>     Reviewed by: mtuexen
>     Sponsored by: Netflix Inc
>     Differential Revision:  https://reviews.freebsd.org/D30627
> ---
>  sys/netinet/tcp_input.c       | 12 +++---------
>  sys/netinet/tcp_reass.c       | 14 ++++++++++++++
>  sys/netinet/tcp_stacks/bbr.c  | 12 +++---------
>  sys/netinet/tcp_stacks/rack.c | 13 ++++---------
>  sys/netinet/tcp_usrreq.c      | 16 ----------------
>  5 files changed, 24 insertions(+), 43 deletions(-)
>
> diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
> index 1dab2e511d95..e71a11bdef05 100644
> --- a/sys/netinet/tcp_input.c
> +++ b/sys/netinet/tcp_input.c
> @@ -3191,15 +3191,9 @@ dodata:                                                  /* XXX */
>                          * when trimming from the head.
>                          */
>                         tcp_seq temp = save_start;
> -                       if (tlen || (th->th_seq != tp->rcv_nxt)) {
> -                               /*
> -                                * We add the th_seq != rcv_nxt to
> -                                * catch the case of a stand alone out
> -                                * of order FIN.
> -                                */
> -                               thflags = tcp_reass(tp, th, &temp, &tlen, m);
> -                               tp->t_flags |= TF_ACKNOW;
> -                       }
> +
> +                       thflags = tcp_reass(tp, th, &temp, &tlen, m);
> +                       tp->t_flags |= TF_ACKNOW;
>                 }
>                 if ((tp->t_flags & TF_SACK_PERMIT) &&
>                     (save_tlen > 0) &&
> diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c
> index 8e24e7412473..5b9255da3acf 100644
> --- a/sys/netinet/tcp_reass.c
> +++ b/sys/netinet/tcp_reass.c
> @@ -571,6 +571,7 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, tcp_seq *seq_start,
>                  * the rcv_nxt <-> rcv_wnd but thats
>                  * already done for us by the caller.
>                  */
> +strip_fin:
>  #ifdef TCP_REASS_COUNTERS
>                 counter_u64_add(tcp_zero_input, 1);
>  #endif
> @@ -579,6 +580,19 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, tcp_seq *seq_start,
>                 tcp_reass_log_dump(tp);
>  #endif
>                 return (0);
> +       } else if ((*tlenp == 0) &&
> +                  (th->th_flags & TH_FIN) &&
> +                  !TCPS_HAVEESTABLISHED(tp->t_state)) {
> +               /*
> +                * We have not established, and we
> +                * have a FIN and no data. Lets treat
> +                * this as the same as if the FIN were
> +                * not present. We don't want to save
> +                * the FIN bit in a reassembly buffer
> +                * we want to get established first before
> +                * we do that (the peer will retransmit).
> +                */
> +               goto strip_fin;
>         }
>         /*
>          * Will it fit?
> diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
> index 5b97c3d7800f..f6388c39cad3 100644
> --- a/sys/netinet/tcp_stacks/bbr.c
> +++ b/sys/netinet/tcp_stacks/bbr.c
> @@ -8320,15 +8320,9 @@ bbr_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
>                          * trimming from the head.
>                          */
>                         tcp_seq temp = save_start;
> -                       if (tlen || (th->th_seq != tp->rcv_nxt)) {
> -                               /*
> -                                * We add the th_seq != rcv_nxt to
> -                                * catch the case of a stand alone out
> -                                * of order FIN.
> -                                */
> -                               thflags = tcp_reass(tp, th, &temp, &tlen, m);
> -                               tp->t_flags |= TF_ACKNOW;
> -                       }
> +
> +                       thflags = tcp_reass(tp, th, &temp, &tlen, m);
> +                       tp->t_flags |= TF_ACKNOW;
>                 }
>                 if ((tp->t_flags & TF_SACK_PERMIT) &&
>                     (save_tlen > 0) &&
> diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
> index 1db09e30d968..98b8ff836ca5 100644
> --- a/sys/netinet/tcp_stacks/rack.c
> +++ b/sys/netinet/tcp_stacks/rack.c
> @@ -10235,15 +10235,10 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
>                          * trimming from the head.
>                          */
>                         tcp_seq temp = save_start;
> -                       if (tlen || (th->th_seq != tp->rcv_nxt)) {
> -                               /*
> -                                * We add the th_seq != rcv_nxt to
> -                                * catch the case of a stand alone out
> -                                * of order FIN.
> -                                */
> -                               thflags = tcp_reass(tp, th, &temp, &tlen, m);
> -                               tp->t_flags |= TF_ACKNOW;
> -                       }
> +
> +                       thflags = tcp_reass(tp, th, &temp, &tlen, m);
> +                       tp->t_flags |= TF_ACKNOW;
> +
>                 }
>                 if ((tp->t_flags & TF_SACK_PERMIT) &&
>                     (save_tlen > 0) &&
> diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
> index 7f1b698408e5..b1fc584f93b2 100644
> --- a/sys/netinet/tcp_usrreq.c
> +++ b/sys/netinet/tcp_usrreq.c
> @@ -2647,22 +2647,6 @@ tcp_usrclosed(struct tcpcb *tp)
>                 tcp_state_change(tp, TCPS_LAST_ACK);
>                 break;
>         }
> -       if ((tp->t_state == TCPS_LAST_ACK) &&
> -           (tp->t_flags & TF_SENTFIN)) {
> -               /*
> -                * If we have reached LAST_ACK, and
> -                * we sent a FIN (e.g. via MSG_EOR), then
> -                * we really should move to either FIN_WAIT_1
> -                * or FIN_WAIT_2 depending on snd_max/snd_una.
> -                */
> -               if (tp->snd_una == tp->snd_max) {
> -                       /* The FIN is acked */
> -                       tcp_state_change(tp, TCPS_FIN_WAIT_2);
> -               } else {
> -                       /* The FIN is still outstanding */
> -                       tcp_state_change(tp, TCPS_FIN_WAIT_1);
> -               }
> -       }
>         if (tp->t_state >= TCPS_FIN_WAIT_2) {
>                 soisdisconnected(tp->t_inpcb->inp_socket);
>                 /* Prevent the connection hanging in FIN_WAIT_2 forever. */
> _______________________________________________
> dev-commits-src-main at freebsd.org mailing list
> https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main
> To unsubscribe, send any mail to "dev-commits-src-main-unsubscribe at freebsd.org"


More information about the dev-commits-src-all mailing list