arm/174461: [patch] Fix off-by-one in arm9/arm10 cache maintenance routines

Ian Lepore freebsd at damnhippie.dyndns.org
Sat Dec 15 20:00:00 UTC 2012


>Number:         174461
>Category:       arm
>Synopsis:       [patch] Fix off-by-one in arm9/arm10 cache maintenance routines
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    freebsd-arm
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Sat Dec 15 20:00:00 UTC 2012
>Closed-Date:
>Last-Modified:
>Originator:     Ian Lepore <freebsd at damnhippie.dyndns.org>
>Release:        FreeBSD 10.0-CURRENT arm
>Organization:
Symmetricom, Inc.
>Environment:
FreeBSD dpcur 10.0-CURRENT FreeBSD 10.0-CURRENT #23 r243920M: Sat Dec 15 11:31:47 MST 2012     ilepore at revolution.hippie.lan:/local/build/staging/freebsd/dp10/obj/arm.arm/local/build/staging/freebsd/dp10/src/sys/DP  arm

>Description:
In all the routines that loop through a range of virtual addresses, the loop
is controlled by subtracting the cache line size from the total length of the
request.  After the subtract, a 'bpl' instruction was used, which branches if
the result of the subtraction is zero or greater, but we need to exit the
loop when the count hits zero.  Thus, all the bpl instructions in those loops
have been changed to 'bhi' (branch if greater than zero).

In addition, the two routines that walk through the cache using set-and-index
were correct, but confusing.  The loop control for those has been simplified,
just so that it's easier to see by examination that the code is correct.

Routines for other arm architectures and generations still have the bpl
instruction, but compensate for the off-by-one situation by decrementing
the count register by one before entering the loop.  Just for the sake of
consistancy, these should probably all be changed to remove the decrement
and use the correct branch instruction.  That would then make it easier to
see what appears superficially to be lots of duplication in these routines,
and some consolidation could happen.

>How-To-Repeat:
Build a kernel for Atmel ARM with INVARIANTS and INVARIANT_SUPPORT enabled.
When it boots, the uart devices fail to instantiate.

The reason was that the driver allocates a dma tag, then a dma buffer,
and accidentally the tag ended up laid out in memory immediately after the
buffer.  When the driver did the bus_dmamap_sync(PREREAD) on the buffer, the
off-by-one error caused the cache line immediately following that buffer (the 
first 32 bytes of the dma tag) to be erroniously invalidated.  Because of 
INVARIANTS, the physical memory under that dirty cache line was full of 
0xdeadc0de, so the invalidate operation corrupted the dma tag.  When the 
driver then attempted to allocate another buffer using that tag it failed, 
because the 0xdeadc0de values in the tag led to insane allocation decisions 
that caused malloc() to fail.  

Without INVARIANTS, either things end up in different places in memory, or 
the values in underlying memory that become exposed after the bad invalidate 
are harmless; either way, it accidentally works right most of the time.

>Fix:

--- arm9_arm10_cacheops_offbyone_fix.diff begins here ---
diff -r 0f2004466772 sys/arm/arm/cpufunc_asm_arm10.S
--- sys/arm/arm/cpufunc_asm_arm10.S	Thu Dec 06 08:24:00 2012 -0700
+++ sys/arm/arm/cpufunc_asm_arm10.S	Sat Dec 15 11:30:41 2012 -0700
@@ -87,7 +87,7 @@ ENTRY_NP(arm10_icache_sync_range)
 	mcr	p15, 0, r0, c7, c10, 1	/* Clean D cache SE with VA */
 	add	r0, r0, ip
 	subs	r1, r1, ip
-	bpl	.Larm10_sync_next
+	bhi	.Larm10_sync_next
 	mcr	p15, 0, r0, c7, c10, 4	/* drain the write buffer */
 	bx	lr
 
@@ -108,12 +108,10 @@ ENTRY_NP(arm10_icache_sync_all)
 	orr	ip, s_max, i_max
 .Lnext_index:
 	mcr	p15, 0, ip, c7, c10, 2	/* Clean D cache SE with Set/Index */
-	sub	ip, ip, i_inc
-	tst	ip, i_max		/* Index 0 is last one */
-	bne	.Lnext_index		/* Next index */
-	mcr	p15, 0, ip, c7, c10, 2	/* Clean D cache SE with Set/Index */
+	subs	ip, ip, i_inc
+	bhs	.Lnext_index		/* Next index */
 	subs	s_max, s_max, s_inc
-	bpl	.Lnext_set		/* Next set */
+	bhs	.Lnext_set		/* Next set */
 	mcr	p15, 0, r0, c7, c10, 4	/* drain the write buffer */
 	bx	lr
 
@@ -133,7 +131,7 @@ ENTRY(arm10_dcache_wb_range)
 	mcr	p15, 0, r0, c7, c10, 1	/* Clean D cache SE with VA */
 	add	r0, r0, ip
 	subs	r1, r1, ip
-	bpl	.Larm10_wb_next
+	bhi	.Larm10_wb_next
 	mcr	p15, 0, r0, c7, c10, 4	/* drain the write buffer */
 	bx	lr
 	
@@ -150,7 +148,7 @@ ENTRY(arm10_dcache_wbinv_range)
 	mcr	p15, 0, r0, c7, c14, 1	/* Purge D cache SE with VA */
 	add	r0, r0, ip
 	subs	r1, r1, ip
-	bpl	.Larm10_wbinv_next
+	bhi	.Larm10_wbinv_next
 	mcr	p15, 0, r0, c7, c10, 4	/* drain the write buffer */
 	bx	lr
 	
@@ -171,7 +169,7 @@ ENTRY(arm10_dcache_inv_range)
 	mcr	p15, 0, r0, c7, c6, 1	/* Invalidate D cache SE with VA */
 	add	r0, r0, ip
 	subs	r1, r1, ip
-	bpl	.Larm10_inv_next
+	bhi	.Larm10_inv_next
 	mcr	p15, 0, r0, c7, c10, 4	/* drain the write buffer */
 	bx	lr
 
@@ -189,7 +187,7 @@ ENTRY(arm10_idcache_wbinv_range)
 	mcr	p15, 0, r0, c7, c14, 1	/* Purge D cache SE with VA */
 	add	r0, r0, ip
 	subs	r1, r1, ip
-	bpl	.Larm10_id_wbinv_next
+	bhi	.Larm10_id_wbinv_next
 	mcr	p15, 0, r0, c7, c10, 4	/* drain the write buffer */
 	bx	lr
 
@@ -211,12 +209,10 @@ ENTRY(arm10_dcache_wbinv_all)
 	orr	ip, s_max, i_max
 .Lnext_index_inv:
 	mcr	p15, 0, ip, c7, c14, 2	/* Purge D cache SE with Set/Index */
-	sub	ip, ip, i_inc
-	tst	ip, i_max		/* Index 0 is last one */
-	bne	.Lnext_index_inv		/* Next index */
-	mcr	p15, 0, ip, c7, c14, 2	/* Purge D cache SE with Set/Index */
+	subs	ip, ip, i_inc
+	bhs	.Lnext_index_inv		/* Next index */
 	subs	s_max, s_max, s_inc
-	bpl	.Lnext_set_inv		/* Next set */
+	bhs	.Lnext_set_inv		/* Next set */
 	mcr	p15, 0, r0, c7, c10, 4	/* drain the write buffer */
 	bx	lr
 
diff -r 0f2004466772 sys/arm/arm/cpufunc_asm_arm9.S
--- sys/arm/arm/cpufunc_asm_arm9.S	Thu Dec 06 08:24:00 2012 -0700
+++ sys/arm/arm/cpufunc_asm_arm9.S	Sat Dec 15 11:30:41 2012 -0700
@@ -81,7 +81,7 @@ ENTRY_NP(arm9_icache_sync_range)
 	mcr	p15, 0, r0, c7, c10, 1	/* Clean D cache SE with VA */
 	add	r0, r0, ip
 	subs	r1, r1, ip
-	bpl	.Larm9_sync_next
+	bhi	.Larm9_sync_next
 	mov	pc, lr
 
 ENTRY_NP(arm9_icache_sync_all)
@@ -101,12 +101,10 @@ ENTRY_NP(arm9_icache_sync_all)
 	orr	ip, s_max, i_max
 .Lnext_index:
 	mcr	p15, 0, ip, c7, c10, 2	/* Clean D cache SE with Set/Index */
-	sub	ip, ip, i_inc
-	tst	ip, i_max		/* Index 0 is last one */
-	bne	.Lnext_index		/* Next index */
-	mcr	p15, 0, ip, c7, c10, 2	/* Clean D cache SE with Set/Index */
+	subs	ip, ip, i_inc
+	bhs	.Lnext_index		/* Next index */
 	subs	s_max, s_max, s_inc
-	bpl	.Lnext_set		/* Next set */
+	bhs	.Lnext_set		/* Next set */
 	mov	pc, lr
 
 .Larm9_line_size:
@@ -125,7 +123,7 @@ ENTRY(arm9_dcache_wb_range)
 	mcr	p15, 0, r0, c7, c10, 1	/* Clean D cache SE with VA */
 	add	r0, r0, ip
 	subs	r1, r1, ip
-	bpl	.Larm9_wb_next
+	bhi	.Larm9_wb_next
 	mov	pc, lr
 	
 ENTRY(arm9_dcache_wbinv_range)
@@ -141,7 +139,7 @@ ENTRY(arm9_dcache_wbinv_range)
 	mcr	p15, 0, r0, c7, c14, 1	/* Purge D cache SE with VA */
 	add	r0, r0, ip
 	subs	r1, r1, ip
-	bpl	.Larm9_wbinv_next
+	bhi	.Larm9_wbinv_next
 	mov	pc, lr
 	
 /*
@@ -161,7 +159,7 @@ ENTRY(arm9_dcache_inv_range)
 	mcr	p15, 0, r0, c7, c6, 1	/* Invalidate D cache SE with VA */
 	add	r0, r0, ip
 	subs	r1, r1, ip
-	bpl	.Larm9_inv_next
+	bhi	.Larm9_inv_next
 	mov	pc, lr
 
 ENTRY(arm9_idcache_wbinv_range)
@@ -178,7 +176,7 @@ ENTRY(arm9_idcache_wbinv_range)
 	mcr	p15, 0, r0, c7, c14, 1	/* Purge D cache SE with VA */
 	add	r0, r0, ip
 	subs	r1, r1, ip
-	bpl	.Larm9_id_wbinv_next
+	bhi	.Larm9_id_wbinv_next
 	mov	pc, lr
 
 ENTRY_NP(arm9_idcache_wbinv_all)
@@ -199,12 +197,10 @@ ENTRY(arm9_dcache_wbinv_all)
 	orr	ip, s_max, i_max
 .Lnext_index_inv:
 	mcr	p15, 0, ip, c7, c14, 2	/* Purge D cache SE with Set/Index */
-	sub	ip, ip, i_inc
-	tst	ip, i_max		/* Index 0 is last one */
-	bne	.Lnext_index_inv		/* Next index */
-	mcr	p15, 0, ip, c7, c14, 2	/* Purge D cache SE with Set/Index */
+	subs	ip, ip, i_inc
+	bhs	.Lnext_index_inv		/* Next index */
 	subs	s_max, s_max, s_inc
-	bpl	.Lnext_set_inv		/* Next set */
+	bhs	.Lnext_set_inv		/* Next set */
 	mov	pc, lr
 
 .Larm9_cache_data:
--- arm9_arm10_cacheops_offbyone_fix.diff ends here ---

>Release-Note:
>Audit-Trail:
>Unformatted:


More information about the freebsd-arm mailing list