git: ca1a7e1021a9 - main - tcp: TCP_LRO getting bad checksums and sending it in to TCP incorrectly.

Randall Stewart rrs at FreeBSD.org
Tue Jul 13 16:47:12 UTC 2021


The branch main has been updated by rrs:

URL: https://cgit.FreeBSD.org/src/commit/?id=ca1a7e1021a9d38cca0eb52dd9f018aad74b1960

commit ca1a7e1021a9d38cca0eb52dd9f018aad74b1960
Author:     Randall Stewart <rrs at FreeBSD.org>
AuthorDate: 2021-07-13 16:45:15 +0000
Commit:     Randall Stewart <rrs at FreeBSD.org>
CommitDate: 2021-07-13 16:45:15 +0000

    tcp: TCP_LRO getting bad checksums and sending it in to TCP incorrectly.
    
    In reviewing tcp_lro.c we have a possibility that some drives may send a mbuf into
    LRO without making sure that the checksum passes. Some drivers actually are
    aware of this and do not call lro when the csum failed, others do not do this and
    thus could end up sending data up that we think has a checksum passing when
    it does not.
    
    This change will fix that situation by properly verifying that the mbuf
    has the correct markings (CSUM VALID bits as well as csum in mbuf header
    is set to 0xffff).
    
    Reviewed by: tuexen, hselasky, gallatin
    Sponsored by: Netflix Inc.
    Differential Revision: https://reviews.freebsd.org/D31155
---
 sys/netinet/tcp_lro.c  | 30 ++++++++++++++++++++++++++----
 sys/netinet/tcp_subr.c |  1 +
 sys/netinet/tcp_var.h  |  1 +
 3 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/sys/netinet/tcp_lro.c b/sys/netinet/tcp_lro.c
index 4e1480f2c002..23e64b29b296 100644
--- a/sys/netinet/tcp_lro.c
+++ b/sys/netinet/tcp_lro.c
@@ -101,6 +101,7 @@ counter_u64_t tcp_extra_mbuf;
 counter_u64_t tcp_would_have_but;
 counter_u64_t tcp_comp_total;
 counter_u64_t tcp_uncomp_total;
+counter_u64_t tcp_bad_csums;
 
 static unsigned	tcp_lro_entries = TCP_LRO_ENTRIES;
 SYSCTL_UINT(_net_inet_tcp_lro, OID_AUTO, entries,
@@ -128,6 +129,8 @@ SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, with_m_ackcmp, CTLFLAG_RD,
     &tcp_comp_total, "Number of mbufs queued with M_ACKCMP flags set");
 SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, without_m_ackcmp, CTLFLAG_RD,
     &tcp_uncomp_total, "Number of mbufs queued without M_ACKCMP");
+SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, lro_badcsum, CTLFLAG_RD,
+    &tcp_bad_csums, "Number of packets that the common code saw with bad csums");
 
 void
 tcp_lro_reg_mbufq(void)
@@ -1740,7 +1743,17 @@ tcp_lro_rx_common(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum, bool use_h
 	if (__predict_false(V_ip6_forwarding != 0))
 		return (TCP_LRO_CANNOT);
 #endif
-
+	if (((m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) !=
+	     ((CSUM_DATA_VALID | CSUM_PSEUDO_HDR))) || 
+	    (m->m_pkthdr.csum_data != 0xffff)) {
+		/* 
+		 * The checksum either did not have hardware offload
+		 * or it was a bad checksum. We can't LRO such
+		 * a packet.
+		 */
+		counter_u64_add(tcp_bad_csums, 1);
+		return (TCP_LRO_CANNOT);
+	}
 	/* We expect a contiguous header [eh, ip, tcp]. */
 	pa = tcp_lro_parser(m, &po, &pi, true);
 	if (__predict_false(pa == NULL))
@@ -1858,9 +1871,19 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
 {
 	int error;
 
+	if (((m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) !=
+	     ((CSUM_DATA_VALID | CSUM_PSEUDO_HDR))) || 
+	    (m->m_pkthdr.csum_data != 0xffff)) {
+		/* 
+		 * The checksum either did not have hardware offload
+		 * or it was a bad checksum. We can't LRO such
+		 * a packet.
+		 */
+		counter_u64_add(tcp_bad_csums, 1);
+		return (TCP_LRO_CANNOT);
+	}
 	/* get current time */
 	binuptime(&lc->lro_last_queue_time);
-
 	CURVNET_SET(lc->ifp->if_vnet);
 	error = tcp_lro_rx_common(lc, m, csum, true);
 	CURVNET_RESTORE();
@@ -1880,8 +1903,7 @@ tcp_lro_queue_mbuf(struct lro_ctrl *lc, struct mbuf *mb)
 	}
 
 	/* check if packet is not LRO capable */
-	if (__predict_false(mb->m_pkthdr.csum_flags == 0 ||
-	    (lc->ifp->if_capenable & IFCAP_LRO) == 0)) {
+	if (__predict_false((lc->ifp->if_capenable & IFCAP_LRO) == 0)) {
 		/* input packet to network layer */
 		(*lc->ifp->if_input) (lc->ifp, mb);
 		return;
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index fbd84e763c0f..697ae7d3270b 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -1520,6 +1520,7 @@ tcp_init(void)
 	tcp_would_have_but = counter_u64_alloc(M_WAITOK);
 	tcp_comp_total = counter_u64_alloc(M_WAITOK);
 	tcp_uncomp_total = counter_u64_alloc(M_WAITOK);
+	tcp_bad_csums = counter_u64_alloc(M_WAITOK);
 #ifdef TCPPCAP
 	tcp_pcap_init();
 #endif
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 3f72a821e71f..8cfd2c5417c2 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -1022,6 +1022,7 @@ extern counter_u64_t tcp_extra_mbuf;
 extern counter_u64_t tcp_would_have_but;
 extern counter_u64_t tcp_comp_total;
 extern counter_u64_t tcp_uncomp_total;
+extern counter_u64_t tcp_bad_csums;
 
 #ifdef NETFLIX_EXP_DETECTION
 /* Various SACK attack thresholds */


More information about the dev-commits-src-main mailing list