git: bef079254f3d - main - arm64: Clamp segment sizes properly in bounce_bus_dmamap_load_buffer()

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Thu, 15 Aug 2024 14:21:55 UTC
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=bef079254f3dcf443a61b4c2605cc6e2dcaf043f

commit bef079254f3dcf443a61b4c2605cc6e2dcaf043f
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2024-08-15 14:17:40 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2024-08-15 14:19:22 +0000

    arm64: Clamp segment sizes properly in bounce_bus_dmamap_load_buffer()
    
    Commit 099b59515499 ("Improve loading of multipage aligned buffers.")
    modified bounce_bus_dmamap_load_buffer() with the assumption that busdma
    memory allocations are physically contiguous, which is not always true:
    bounce_bus_dmamem_alloc() will allocate memory with
    kmem_alloc_attr_domainset() in some cases, and this function is not
    guaranteed to return contiguous memory.
    
    The damage seems to have been mitigated for most consumers by clamping
    the segment size to maxsegsz, but this was removed in commit
    a77e1f0f81df ("busdma: better handling of small segment bouncing"); in
    practice, it seems busdma memory is often allocated with maxsegsz ==
    PAGE_SIZE.  In particular, after commit a77e1f0f81df I see occasional
    random kernel memory corruption when benchmarking TCP through mlx5
    interfaces.
    
    Fix the problem by using separate flags for contiguous and
    non-contiguous busdma memory allocations, and using that to decide
    whether to clamp.
    
    Fixes:  099b59515499 ("Improve loading of multipage aligned buffers.")
    Fixes:  a77e1f0f81df ("busdma: better handling of small segment bouncing")
    Sponsored by:   Klara, Inc.
    Sponsored by:   Stormshield
    Differential Revision:  https://reviews.freebsd.org/D46238
---
 sys/arm64/arm64/busdma_bounce.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/sys/arm64/arm64/busdma_bounce.c b/sys/arm64/arm64/busdma_bounce.c
index da605d4e21f5..abfd5c195857 100644
--- a/sys/arm64/arm64/busdma_bounce.c
+++ b/sys/arm64/arm64/busdma_bounce.c
@@ -63,7 +63,9 @@
 enum {
 	BF_COULD_BOUNCE		= 0x01,
 	BF_MIN_ALLOC_COMP	= 0x02,
-	BF_KMEM_ALLOC		= 0x04,
+	BF_KMEM_ALLOC_PAGES	= 0x04,
+	BF_KMEM_ALLOC_CONTIG	= 0x08,
+	BF_KMEM_ALLOC		= BF_KMEM_ALLOC_PAGES | BF_KMEM_ALLOC_CONTIG,
 	BF_COHERENT		= 0x10,
 };
 
@@ -580,14 +582,14 @@ bounce_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
 		*vaddr = kmem_alloc_attr_domainset(
 		    DOMAINSET_PREF(dmat->common.domain), dmat->alloc_size,
 		    mflags, 0ul, dmat->common.lowaddr, attr);
-		dmat->bounce_flags |= BF_KMEM_ALLOC;
+		dmat->bounce_flags |= BF_KMEM_ALLOC_PAGES;
 	} else {
 		*vaddr = kmem_alloc_contig_domainset(
 		    DOMAINSET_PREF(dmat->common.domain), dmat->alloc_size,
 		    mflags, 0ul, dmat->common.lowaddr,
 		    dmat->alloc_alignment != 0 ? dmat->alloc_alignment : 1ul,
 		    dmat->common.boundary, attr);
-		dmat->bounce_flags |= BF_KMEM_ALLOC;
+		dmat->bounce_flags |= BF_KMEM_ALLOC_CONTIG;
 	}
 	if (*vaddr == NULL) {
 		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
@@ -856,7 +858,8 @@ bounce_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf,
 		 * Compute the segment size, and adjust counts.
 		 */
 		sgsize = buflen;
-		if ((map->flags & DMAMAP_FROM_DMAMEM) == 0)
+		if ((map->flags & DMAMAP_FROM_DMAMEM) == 0 ||
+		    (dmat->bounce_flags & BF_KMEM_ALLOC_CONTIG) == 0)
 			sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK));
 
 		if (map->pagesneeded != 0 &&