PERFORCE change 98497 for review

Kip Macy kmacy at FreeBSD.org
Sun Jun 4 20:23:21 UTC 2006


http://perforce.freebsd.org/chv.cgi?CH=98497

Change 98497 by kmacy at kmacy_storage:sun4v_work on 2006/06/04 20:21:27

	align stack to 64-bytes
	optimize spill/fill handlers to use block initializing stores

Affected files ...

.. //depot/projects/kmacy_sun4v/src/contrib/gcc/config/sparc/sparc.h#3 edit
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/exception.S#61 edit

Differences ...

==== //depot/projects/kmacy_sun4v/src/contrib/gcc/config/sparc/sparc.h#3 (text+ko) ====

@@ -797,7 +797,7 @@
 /* ALIGN FRAMES on double word boundaries */
 
 #define SPARC_STACK_ALIGN(LOC) \
-  (TARGET_ARCH64 ? (((LOC)+15) & ~15) : (((LOC)+7) & ~7))
+  (TARGET_ARCH64 ? (((LOC)+63) & ~63) : (((LOC)+7) & ~7))
 
 /* Allocation boundary (in *bits*) for the code of a function.  */
 #define FUNCTION_BOUNDARY 32

==== //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/exception.S#61 (text+ko) ====

@@ -140,6 +140,26 @@
 	storer	%i6, [%g5 + %g3]asi             ;\
 	storer	%i7, [%g5 + %g4]asi  
 
+/* 16 instructions */
+#define	SPILL_ASI_64 \
+	stxa	%l0, [%g1 + 0x0]%asi             ;\
+	stxa	%i0, [%g1 + 0x40]%asi             ;\
+	stxa	%l1, [%g1 + 0x8]%asi              ;\
+	stxa	%l2, [%g1 + 0x10]%asi             ;\
+	stxa	%l3, [%g1 + 0x18]%asi             ;\
+	stxa	%l4, [%g1 + 0x20]%asi             ;\
+	stxa	%l5, [%g1 + 0x28]%asi             ;\
+	stxa	%l6, [%g1 + 0x30]%asi             ;\
+	stxa	%l7, [%g1 + 0x38]%asi             ;\
+	stxa	%i1, [%g1 + 0x48]%asi             ;\
+	stxa	%i2, [%g1 + 0x50]%asi             ;\
+	stxa	%i3, [%g1 + 0x58]%asi             ;\
+	stxa	%i4, [%g1 + 0x60]%asi             ;\
+	stxa	%i5, [%g1 + 0x68]%asi             ;\
+	stxa	%i6, [%g1 + 0x70]%asi             ;\
+	stxa	%i7, [%g1 + 0x78]%asi  
+
+/* 23 instructions */
 #define	FILL(loader, bias, size, asi) \
 	mov	0 + bias, %g1			;\
 	loader	[%sp + %g1]asi, %l0		;\
@@ -165,20 +185,54 @@
 	loader	[%g5 + %g3]asi, %i6		;\
 	loader	[%g5 + %g4]asi, %i7
 
-#define	FILL_DW(asi) \
-	mov	0 + SPOFF, %g1			;\
-	ldda	[%sp + %g1]asi, %l0		;\
-	mov	16 + SPOFF, %g2			;\
-	ldda	[%sp + %g2]asi, %l2		;\
-	add	%sp, 32, %g3			;\
-	ldda	[%g3 + %g1]asi, %l4		;\
-	ldda	[%g3 + %g2]asi, %l6		;\
-	add	%g3, 32, %g3			;\
-	ldda	[%g3 + %g1]asi, %i0		;\
-	ldda	[%g3 + %g2]asi, %i2		;\
-	add	%g3, 32, %g3			;\
-	ldda	[%g3 + %g1]asi, %i4		;\
-	ldda	[%g3 + %g2]asi, %i6		
+#define	SPILL_ASI_SET(storer, size) \
+	storer	%l0, [%g1 + (0 * size)]%asi     ;\
+	storer	%l1, [%g1 + (1 * size)]%asi     ;\
+	storer	%l2, [%g1 + (2 * size)]%asi     ;\
+	storer	%l3, [%g1 + (3 * size)]%asi     ;\
+	storer	%l4, [%g1 + (4 * size)]%asi     ;\
+	storer	%l5, [%g1 + (5 * size)]%asi     ;\
+	storer	%l6, [%g1 + (6 * size)]%asi     ;\
+	storer	%l7, [%g1 + (7 * size)]%asi     ;\
+	storer	%i0, [%g1 + (8 * size)]%asi     ;\
+	storer	%i1, [%g1 + (9 * size)]%asi     ;\
+	storer	%i2, [%g1 + (10 * size)]%asi     ;\
+	storer	%i3, [%g1 + (11 * size)]%asi     ;\
+	storer	%i4, [%g1 + (12 * size)]%asi     ;\
+	storer	%i5, [%g1 + (13 * size)]%asi     ;\
+	storer	%i6, [%g1 + (14 * size)]%asi     ;\
+	storer	%i7, [%g1 + (15 * size)]%asi 
+
+/* 16 instructions */
+#define	FILL_ASI_SET(loader, size) \
+	loader	[%g1 + 0x0]%asi, %l0		;\
+	loader	[%g1 + (size * 1)]%asi, %l1	;\
+	loader	[%g1 + (size * 2)]%asi, %l2	;\
+	loader	[%g1 + (size * 3)]%asi, %l3	;\
+	loader	[%g1 + (size * 4)]%asi, %l4	;\
+	loader	[%g1 + (size * 5)]%asi, %l5	;\
+	loader	[%g1 + (size * 6)]%asi, %l6	;\
+	loader	[%g1 + (size * 7)]%asi, %l7	;\
+	loader	[%g1 + (size * 8)]%asi, %i0	;\
+	loader	[%g1 + (size * 9)]%asi, %i1	;\
+	loader	[%g1 + (size * 10)]%asi, %i2	;\
+	loader	[%g1 + (size * 11)]%asi, %i3	;\
+	loader	[%g1 + (size * 12)]%asi, %i4	;\
+	loader	[%g1 + (size * 13)]%asi, %i5	;\
+	loader	[%g1 + (size * 14)]%asi, %i6	;\
+	loader	[%g1 + (size * 15)]%asi, %i7	
+	
+/* 9 instructions */
+#define	FILL_DW \
+	prefetch [%g1 + 0x40], #one_read        ;\
+	ldda	[%g1 + 0]%asi, %l0		;\
+	ldda	[%g1 + 0x10]%asi, %l2		;\
+	ldda	[%g1 + 0x20]%asi, %l4		;\
+	ldda	[%g1 + 0x30]%asi, %l6		;\
+	ldda	[%g1 + 0x40]%asi, %i0		;\
+	ldda	[%g1 + 0x50]%asi, %i2		;\
+	ldda	[%g1 + 0x60]%asi, %i4		;\
+	ldda	[%g1 + 0x70]%asi, %i6		
 
 #include <sun4v/sun4v/wbuf.S>	
 	/*
@@ -372,8 +426,76 @@
 	.endm
 
 
-#define ALIGN_128   .align  128
+#define ALIGN_128	.align  128
+#define SYNC		#Sync
+#define LOOKASIDE	#Lookaside
+
+#define USE_FAST_SPILLFILL
 	
+#ifdef USE_FAST_SPILLFILL
+#define spill_64bit_asi(asival, asival_unaligned, target)	\
+	wr	%g0, asival, %asi  ;            \
+	add	%sp, SPOFF, %g1	   ;            \
+	SPILL_ASI_64               ;	        \
+	membar	LOOKASIDE          ;            \
+	saved			   ;		\
+	retry			   ;		\
+	.skip (31-21)*4		   ;		\
+	ba,a,pt %xcc, fault_64bit_##target ; \
+	ALIGN_128	
+
+#define	spill_64clean(asival, asival_unaligned, target)		\
+	wr	%g0, asival, %asi  ;            \
+	add	%sp, SPOFF, %g1	   ;            \
+	SPILL_ASI_64               ; 	        \
+	membar	LOOKASIDE          ;            \
+	b	spill_clean	   ;		\
+	  mov	WSTATE_USER64, %g7 ; 		\
+	.skip (31-21)*4		   ;		\
+	ba,a,pt %xcc, fault_64bit_##target ; 	\
+	ALIGN_128	
+
+#define fill_64bit_asi(asival, asival_unaligned, target)	\
+	add	%sp, SPOFF, %g1	   ;            \
+	wr	%g0, asival, %asi  ;            \
+	FILL_DW                    ; 		\
+	restored		   ;		\
+	retry			   ;		\
+	.skip (31-13)*4		   ;		\
+	ba,a,pt %xcc, fault_64bit_##target ; \
+	ALIGN_128	
+#else
+#define spill_64bit_asi(asival, asival_unaligned, target)	\
+	wr	%g0, asival_unaligned, %asi ;   \
+	add	%sp, SPOFF, %g1	  ;             \
+	SPILL_ASI_SET(stxa, 8)     ;	        \
+	saved			   ;		\
+	retry			   ;		\
+	.skip (31-20)*4		   ;		\
+	ba,a,pt %xcc, fault_64bit_##target ; \
+	ALIGN_128	
+
+#define	spill_64clean(asival, asival_unaligned, target)		\
+	wr	%g0, asival_unaligned, %asi  ;            \
+	add	%sp, SPOFF, %g1	   ;            \
+	SPILL_ASI_SET(stxa, 8)     ; 	        \
+	b	spill_clean	   ;		\
+	  mov	WSTATE_USER64, %g7 ; 		\
+	.skip (31-20)*4		   ;		\
+	ba,a,pt %xcc, fault_64bit_##target ; 	\
+	ALIGN_128	
+
+#define fill_64bit_asi(asival, asival_unaligned, target)	\
+	wr	%g0, asival_unaligned, %asi  ;  \
+	add	%sp, SPOFF, %g1	  ;             \
+	FILL_ASI_SET(ldxa, 8)      ; 		\
+	restored		   ;		\
+	retry			   ;		\
+	.skip (31-20)*4		   ;		\
+	ba,a,pt %xcc, fault_64bit_##target ; \
+	ALIGN_128	
+#endif
+
 #define spill_32bit_asi(asi, target)		\
 	srl	%sp, 0, %sp	;		\
 	SPILL_FILL_MAGIC_TRAP_ON;               \
@@ -385,16 +507,6 @@
 	ba,a,pt %xcc, fault_32bit_##target ; \
 	ALIGN_128
 
-#define spill_64bit_asi(asi, target)		\
-	SPILL_FILL_MAGIC_TRAP_ON   ;            \
-	SPILL_ASI(stxa, SPOFF, 8, asi) ;	\
-	saved			   ;		\
-	SPILL_FILL_MAGIC_TRAP_OFF  ;		\
-	retry			   ;		\
-	.skip (31-27)*4		   ;		\
-	ba,a,pt %xcc, fault_64bit_##target ; \
-	ALIGN_128	
-
 #define	spill_32clean(asi, target)		\
 	srl	%sp, 0, %sp	; 		\
 	SPILL_FILL_MAGIC_TRAP_ON;               \
@@ -405,15 +517,6 @@
 	ba,a,pt    %xcc, fault_32bit_##target ; \
 	ALIGN_128	
 	
-#define	spill_64clean(asi, target)		\
-	SPILL_FILL_MAGIC_TRAP_ON;               \
-	SPILL_ASI(stxa, SPOFF, 8, asi) ; 	\
-	b	spill_clean	   ;		\
-	  mov	WSTATE_USER64, %g7 ; 		\
-	.skip (31-26)*4		   ;		\
-	ba,a,pt %xcc, fault_64bit_##target ; 	\
-	ALIGN_128	
-
 #define fill_32bit_asi(asi, target)		\
 	srl	%sp, 0, %sp	;		\
 	SPILL_FILL_MAGIC_TRAP_ON;               \
@@ -424,22 +527,32 @@
 	ba,a,pt %xcc, fault_32bit_##target ; \
 	ALIGN_128	
 
-#define fill_64bit_asi(asi, target)		\
-	SPILL_FILL_MAGIC_TRAP_ON;               \
-	FILL(ldxa, SPOFF, 8, asi)  ; 		\
-	restored		   ;		\
-	SPILL_FILL_MAGIC_TRAP_OFF  ;		\
-	retry			   ;		\
-	.skip (31-27)*4		   ;		\
-	ba,a,pt %xcc, fault_64bit_##target ; \
-	.align 128
+.align 128
+ENTRY(fill_64bit_slow_fn0)                      
+fill_slow_start:		
+	FILL_ASI_SET(ldxa, 8);                  
+	restored		;               
+	retry 			;               
+	.skip (31-18)*4		   ;		
+	ba,a,pt %xcc, fault_64bit_fn0 ;
+	.align 128	
+END(fill_64bit_slow_fn0)
+ENTRY(fill_64bit_slow_not)                      
+	FILL_ASI_SET(ldxa, 8);                  
+	restored		;               
+	retry 			;               
+	.skip (31-18)*4		   ;		
+	ba,a,pt %xcc, fault_64bit_not ; 
+	.align 128	
+END(fill_64bit_slow_not)
+fill_slow_end:	
 		
 	.macro	spill_32bit_primary_sn0
 	spill_32bit_asi(ASI_AIUP, sn0)
 	.endm
 
 	.macro	spill_64bit_primary_sn0
-	spill_64bit_asi(ASI_AIUP, sn0)
+	spill_64bit_asi(ASI_LDSTBI_AIUP, ASI_AIUP, sn0)
 	.endm
 
 	.macro spill_32clean_primary_sn0
@@ -447,7 +560,7 @@
 	.endm
 			
 	.macro spill_64clean_primary_sn0
-	spill_64clean(ASI_AIUP, sn0)
+	spill_64clean(ASI_LDSTBI_AIUP, ASI_AIUP, sn0)
 	.endm
 
 	.macro spill_32bit_nucleus_not
@@ -455,7 +568,7 @@
 	.endm
 
 	.macro spill_64bit_nucleus_not
-	spill_64bit_asi(ASI_N,not)
+	spill_64bit_asi(ASI_LDSTBI_N, ASI_N, not)
 	.endm
 
 	.macro	spill_32bit_secondary_so0
@@ -463,7 +576,7 @@
 	.endm
 
 	.macro	spill_64bit_secondary_so0
-	spill_64bit_asi(ASI_AIUS, so0)
+	spill_64bit_asi(ASI_LDSTBI_AIUS, ASI_AIUS, so0)
 	.endm
 	
 	.macro	fill_32bit_primary_fn0
@@ -471,7 +584,7 @@
 	.endm
 
 	.macro	fill_64bit_primary_fn0
-	fill_64bit_asi(ASI_AIUP, fn0)
+	fill_64bit_asi(ASI_LDSTBI_AIUP, ASI_AIUP, fn0)
 	.endm
 
 	.macro fill_32bit_nucleus_not
@@ -479,7 +592,7 @@
 	.endm
 
 	.macro fill_64bit_nucleus_not
-	fill_64bit_asi(ASI_N, not)
+	fill_64bit_asi(ASI_LDSTBI_N, ASI_N, not)
 	.endm
 
 	.macro	spill_32bit_tt1_primary_sn1
@@ -748,12 +861,19 @@
 	tl0_reserved		4			! 0xbc
 tl0_fill_n_normal:
 	tl0_reserved		4			! 0xc0
+tl0_fill_1_normal:	
 	fill_32bit_primary_fn0				! 0xc4 
+tl0_fill_2_normal:	
 	fill_64bit_primary_fn0				! 0xc8
+tl0_fill_3_normal:	
 	fill_32bit_primary_fn0				! 0xcc
+tl0_fill_4_normal:	
 	fill_64bit_primary_fn0				! 0xd0
+tl0_fill_5_normal:	
 	fill_32bit_nucleus_not				! 0xd4
+tl0_fill_6_normal:	
 	fill_64bit_nucleus_not				! 0xd8
+tl0_fill_7_normal:	
 	fill_mixed					! 0xdc
 tl0_fill_n_other_e0:
 	tl0_reserved		32			! 0xe0-0xff
@@ -1074,6 +1194,7 @@
 	ba,a	3f
 #endif
 4:
+	membar	#Lookaside
 	fill_64bit_rtt(ASI_AIUP)
 	.global	rtt_fill_end
 rtt_fill_end:
@@ -1343,6 +1464,8 @@
 #define LOADSTORE #LoadStore 
 #define STORESTORE #StoreStore
 
+
+#define WORKING
 #ifdef WORKING
 #define ENTER LOADLOAD
 #define EXIT LOADSTORE|STORESTORE
@@ -1741,8 +1864,17 @@
 	bgeu,pn %xcc, 1f
 	 nop
 	set	fault_rtt_fn1, %g7
-	ba,a	2f
+	ba,a	4f
 1:
+	set	fill_slow_start, %g6
+	cmp	%g7, %g6
+	bleu,a,pn %xcc, 2f
+	  nop
+	set	fill_slow_end, %g6
+	cmp	%g7, %g6
+	blu,a,pn %xcc, 3f
+	  nop
+2:		
 	set	tl1_end, %g6
 	cmp	%g7, %g6
 	bgeu,a,pn %xcc, ptl1_panic
@@ -1756,9 +1888,10 @@
 	cmp	%g6, WTRAP_TYPE
 	bne,a,pn %xcc, ptl1_panic
 	  mov	PTL1_BAD_TRAP, %g1
+3:		
 	andn	%g7, WTRAP_ALIGN, %g7
 	add	%g7, WTRAP_FAULTOFF, %g7
-2:	
+4:	
 	wrpr	%g0, %g7, %tnpc
 	wrpr	%g0, 1, %gl
 	rdpr	%tt, %g5
@@ -1767,13 +1900,13 @@
 	ldxa	[%g7 + MMFSA_D_ADDR]%asi, %g6
 	ldxa	[%g7 + MMFSA_D_CTX]%asi, %g7
 	cmp	%g5, T_ALIGNMENT
-	be,pn	%xcc, 3f
+	be,pn	%xcc, 5f
 	  nop
 	srlx	%g6, PAGE_SHIFT, %g6 
 	sllx	%g6, PAGE_SHIFT, %g6	! mask off bottom
 	or	%g6, %g7, %g6
 	done
-3:
+5:
 	sllx	%g7, TRAP_CTX_SHIFT, %g7
 	or	%g7, %g5, %g5
 	done


More information about the p4-projects mailing list