PERFORCE change 98497 for review
Kip Macy
kmacy at FreeBSD.org
Sun Jun 4 20:23:21 UTC 2006
http://perforce.freebsd.org/chv.cgi?CH=98497
Change 98497 by kmacy at kmacy_storage:sun4v_work on 2006/06/04 20:21:27
align stack to 64-bytes
optimize spill/fill handlers to use block initializing stores
Affected files ...
.. //depot/projects/kmacy_sun4v/src/contrib/gcc/config/sparc/sparc.h#3 edit
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/exception.S#61 edit
Differences ...
==== //depot/projects/kmacy_sun4v/src/contrib/gcc/config/sparc/sparc.h#3 (text+ko) ====
@@ -797,7 +797,7 @@
/* ALIGN FRAMES on double word boundaries */
#define SPARC_STACK_ALIGN(LOC) \
- (TARGET_ARCH64 ? (((LOC)+15) & ~15) : (((LOC)+7) & ~7))
+ (TARGET_ARCH64 ? (((LOC)+63) & ~63) : (((LOC)+7) & ~7))
/* Allocation boundary (in *bits*) for the code of a function. */
#define FUNCTION_BOUNDARY 32
==== //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/exception.S#61 (text+ko) ====
@@ -140,6 +140,26 @@
storer %i6, [%g5 + %g3]asi ;\
storer %i7, [%g5 + %g4]asi
+/* 16 instructions */
+#define SPILL_ASI_64 \
+ stxa %l0, [%g1 + 0x0]%asi ;\
+ stxa %i0, [%g1 + 0x40]%asi ;\
+ stxa %l1, [%g1 + 0x8]%asi ;\
+ stxa %l2, [%g1 + 0x10]%asi ;\
+ stxa %l3, [%g1 + 0x18]%asi ;\
+ stxa %l4, [%g1 + 0x20]%asi ;\
+ stxa %l5, [%g1 + 0x28]%asi ;\
+ stxa %l6, [%g1 + 0x30]%asi ;\
+ stxa %l7, [%g1 + 0x38]%asi ;\
+ stxa %i1, [%g1 + 0x48]%asi ;\
+ stxa %i2, [%g1 + 0x50]%asi ;\
+ stxa %i3, [%g1 + 0x58]%asi ;\
+ stxa %i4, [%g1 + 0x60]%asi ;\
+ stxa %i5, [%g1 + 0x68]%asi ;\
+ stxa %i6, [%g1 + 0x70]%asi ;\
+ stxa %i7, [%g1 + 0x78]%asi
+
+/* 23 instructions */
#define FILL(loader, bias, size, asi) \
mov 0 + bias, %g1 ;\
loader [%sp + %g1]asi, %l0 ;\
@@ -165,20 +185,54 @@
loader [%g5 + %g3]asi, %i6 ;\
loader [%g5 + %g4]asi, %i7
-#define FILL_DW(asi) \
- mov 0 + SPOFF, %g1 ;\
- ldda [%sp + %g1]asi, %l0 ;\
- mov 16 + SPOFF, %g2 ;\
- ldda [%sp + %g2]asi, %l2 ;\
- add %sp, 32, %g3 ;\
- ldda [%g3 + %g1]asi, %l4 ;\
- ldda [%g3 + %g2]asi, %l6 ;\
- add %g3, 32, %g3 ;\
- ldda [%g3 + %g1]asi, %i0 ;\
- ldda [%g3 + %g2]asi, %i2 ;\
- add %g3, 32, %g3 ;\
- ldda [%g3 + %g1]asi, %i4 ;\
- ldda [%g3 + %g2]asi, %i6
+#define SPILL_ASI_SET(storer, size) \
+ storer %l0, [%g1 + (0 * size)]%asi ;\
+ storer %l1, [%g1 + (1 * size)]%asi ;\
+ storer %l2, [%g1 + (2 * size)]%asi ;\
+ storer %l3, [%g1 + (3 * size)]%asi ;\
+ storer %l4, [%g1 + (4 * size)]%asi ;\
+ storer %l5, [%g1 + (5 * size)]%asi ;\
+ storer %l6, [%g1 + (6 * size)]%asi ;\
+ storer %l7, [%g1 + (7 * size)]%asi ;\
+ storer %i0, [%g1 + (8 * size)]%asi ;\
+ storer %i1, [%g1 + (9 * size)]%asi ;\
+ storer %i2, [%g1 + (10 * size)]%asi ;\
+ storer %i3, [%g1 + (11 * size)]%asi ;\
+ storer %i4, [%g1 + (12 * size)]%asi ;\
+ storer %i5, [%g1 + (13 * size)]%asi ;\
+ storer %i6, [%g1 + (14 * size)]%asi ;\
+ storer %i7, [%g1 + (15 * size)]%asi
+
+/* 16 instructions */
+#define FILL_ASI_SET(loader, size) \
+ loader [%g1 + 0x0]%asi, %l0 ;\
+ loader [%g1 + (size * 1)]%asi, %l1 ;\
+ loader [%g1 + (size * 2)]%asi, %l2 ;\
+ loader [%g1 + (size * 3)]%asi, %l3 ;\
+ loader [%g1 + (size * 4)]%asi, %l4 ;\
+ loader [%g1 + (size * 5)]%asi, %l5 ;\
+ loader [%g1 + (size * 6)]%asi, %l6 ;\
+ loader [%g1 + (size * 7)]%asi, %l7 ;\
+ loader [%g1 + (size * 8)]%asi, %i0 ;\
+ loader [%g1 + (size * 9)]%asi, %i1 ;\
+ loader [%g1 + (size * 10)]%asi, %i2 ;\
+ loader [%g1 + (size * 11)]%asi, %i3 ;\
+ loader [%g1 + (size * 12)]%asi, %i4 ;\
+ loader [%g1 + (size * 13)]%asi, %i5 ;\
+ loader [%g1 + (size * 14)]%asi, %i6 ;\
+ loader [%g1 + (size * 15)]%asi, %i7
+
+/* 9 instructions */
+#define FILL_DW \
+ prefetch [%g1 + 0x40], #one_read ;\
+ ldda [%g1 + 0]%asi, %l0 ;\
+ ldda [%g1 + 0x10]%asi, %l2 ;\
+ ldda [%g1 + 0x20]%asi, %l4 ;\
+ ldda [%g1 + 0x30]%asi, %l6 ;\
+ ldda [%g1 + 0x40]%asi, %i0 ;\
+ ldda [%g1 + 0x50]%asi, %i2 ;\
+ ldda [%g1 + 0x60]%asi, %i4 ;\
+ ldda [%g1 + 0x70]%asi, %i6
#include <sun4v/sun4v/wbuf.S>
/*
@@ -372,8 +426,76 @@
.endm
-#define ALIGN_128 .align 128
+#define ALIGN_128 .align 128
+#define SYNC #Sync
+#define LOOKASIDE #Lookaside
+
+#define USE_FAST_SPILLFILL
+#ifdef USE_FAST_SPILLFILL
+#define spill_64bit_asi(asival, asival_unaligned, target) \
+ wr %g0, asival, %asi ; \
+ add %sp, SPOFF, %g1 ; \
+ SPILL_ASI_64 ; \
+ membar LOOKASIDE ; \
+ saved ; \
+ retry ; \
+ .skip (31-21)*4 ; \
+ ba,a,pt %xcc, fault_64bit_##target ; \
+ ALIGN_128
+
+#define spill_64clean(asival, asival_unaligned, target) \
+ wr %g0, asival, %asi ; \
+ add %sp, SPOFF, %g1 ; \
+ SPILL_ASI_64 ; \
+ membar LOOKASIDE ; \
+ b spill_clean ; \
+ mov WSTATE_USER64, %g7 ; \
+ .skip (31-21)*4 ; \
+ ba,a,pt %xcc, fault_64bit_##target ; \
+ ALIGN_128
+
+#define fill_64bit_asi(asival, asival_unaligned, target) \
+ add %sp, SPOFF, %g1 ; \
+ wr %g0, asival, %asi ; \
+ FILL_DW ; \
+ restored ; \
+ retry ; \
+ .skip (31-13)*4 ; \
+ ba,a,pt %xcc, fault_64bit_##target ; \
+ ALIGN_128
+#else
+#define spill_64bit_asi(asival, asival_unaligned, target) \
+ wr %g0, asival_unaligned, %asi ; \
+ add %sp, SPOFF, %g1 ; \
+ SPILL_ASI_SET(stxa, 8) ; \
+ saved ; \
+ retry ; \
+ .skip (31-20)*4 ; \
+ ba,a,pt %xcc, fault_64bit_##target ; \
+ ALIGN_128
+
+#define spill_64clean(asival, asival_unaligned, target) \
+ wr %g0, asival_unaligned, %asi ; \
+ add %sp, SPOFF, %g1 ; \
+ SPILL_ASI_SET(stxa, 8) ; \
+ b spill_clean ; \
+ mov WSTATE_USER64, %g7 ; \
+ .skip (31-20)*4 ; \
+ ba,a,pt %xcc, fault_64bit_##target ; \
+ ALIGN_128
+
+#define fill_64bit_asi(asival, asival_unaligned, target) \
+ wr %g0, asival_unaligned, %asi ; \
+ add %sp, SPOFF, %g1 ; \
+ FILL_ASI_SET(ldxa, 8) ; \
+ restored ; \
+ retry ; \
+ .skip (31-20)*4 ; \
+ ba,a,pt %xcc, fault_64bit_##target ; \
+ ALIGN_128
+#endif
+
#define spill_32bit_asi(asi, target) \
srl %sp, 0, %sp ; \
SPILL_FILL_MAGIC_TRAP_ON; \
@@ -385,16 +507,6 @@
ba,a,pt %xcc, fault_32bit_##target ; \
ALIGN_128
-#define spill_64bit_asi(asi, target) \
- SPILL_FILL_MAGIC_TRAP_ON ; \
- SPILL_ASI(stxa, SPOFF, 8, asi) ; \
- saved ; \
- SPILL_FILL_MAGIC_TRAP_OFF ; \
- retry ; \
- .skip (31-27)*4 ; \
- ba,a,pt %xcc, fault_64bit_##target ; \
- ALIGN_128
-
#define spill_32clean(asi, target) \
srl %sp, 0, %sp ; \
SPILL_FILL_MAGIC_TRAP_ON; \
@@ -405,15 +517,6 @@
ba,a,pt %xcc, fault_32bit_##target ; \
ALIGN_128
-#define spill_64clean(asi, target) \
- SPILL_FILL_MAGIC_TRAP_ON; \
- SPILL_ASI(stxa, SPOFF, 8, asi) ; \
- b spill_clean ; \
- mov WSTATE_USER64, %g7 ; \
- .skip (31-26)*4 ; \
- ba,a,pt %xcc, fault_64bit_##target ; \
- ALIGN_128
-
#define fill_32bit_asi(asi, target) \
srl %sp, 0, %sp ; \
SPILL_FILL_MAGIC_TRAP_ON; \
@@ -424,22 +527,32 @@
ba,a,pt %xcc, fault_32bit_##target ; \
ALIGN_128
-#define fill_64bit_asi(asi, target) \
- SPILL_FILL_MAGIC_TRAP_ON; \
- FILL(ldxa, SPOFF, 8, asi) ; \
- restored ; \
- SPILL_FILL_MAGIC_TRAP_OFF ; \
- retry ; \
- .skip (31-27)*4 ; \
- ba,a,pt %xcc, fault_64bit_##target ; \
- .align 128
+.align 128
+ENTRY(fill_64bit_slow_fn0)
+fill_slow_start:
+ FILL_ASI_SET(ldxa, 8);
+ restored ;
+ retry ;
+ .skip (31-18)*4 ;
+ ba,a,pt %xcc, fault_64bit_fn0 ;
+ .align 128
+END(fill_64bit_slow_fn0)
+ENTRY(fill_64bit_slow_not)
+ FILL_ASI_SET(ldxa, 8);
+ restored ;
+ retry ;
+ .skip (31-18)*4 ;
+ ba,a,pt %xcc, fault_64bit_not ;
+ .align 128
+END(fill_64bit_slow_not)
+fill_slow_end:
.macro spill_32bit_primary_sn0
spill_32bit_asi(ASI_AIUP, sn0)
.endm
.macro spill_64bit_primary_sn0
- spill_64bit_asi(ASI_AIUP, sn0)
+ spill_64bit_asi(ASI_LDSTBI_AIUP, ASI_AIUP, sn0)
.endm
.macro spill_32clean_primary_sn0
@@ -447,7 +560,7 @@
.endm
.macro spill_64clean_primary_sn0
- spill_64clean(ASI_AIUP, sn0)
+ spill_64clean(ASI_LDSTBI_AIUP, ASI_AIUP, sn0)
.endm
.macro spill_32bit_nucleus_not
@@ -455,7 +568,7 @@
.endm
.macro spill_64bit_nucleus_not
- spill_64bit_asi(ASI_N,not)
+ spill_64bit_asi(ASI_LDSTBI_N, ASI_N, not)
.endm
.macro spill_32bit_secondary_so0
@@ -463,7 +576,7 @@
.endm
.macro spill_64bit_secondary_so0
- spill_64bit_asi(ASI_AIUS, so0)
+ spill_64bit_asi(ASI_LDSTBI_AIUS, ASI_AIUS, so0)
.endm
.macro fill_32bit_primary_fn0
@@ -471,7 +584,7 @@
.endm
.macro fill_64bit_primary_fn0
- fill_64bit_asi(ASI_AIUP, fn0)
+ fill_64bit_asi(ASI_LDSTBI_AIUP, ASI_AIUP, fn0)
.endm
.macro fill_32bit_nucleus_not
@@ -479,7 +592,7 @@
.endm
.macro fill_64bit_nucleus_not
- fill_64bit_asi(ASI_N, not)
+ fill_64bit_asi(ASI_LDSTBI_N, ASI_N, not)
.endm
.macro spill_32bit_tt1_primary_sn1
@@ -748,12 +861,19 @@
tl0_reserved 4 ! 0xbc
tl0_fill_n_normal:
tl0_reserved 4 ! 0xc0
+tl0_fill_1_normal:
fill_32bit_primary_fn0 ! 0xc4
+tl0_fill_2_normal:
fill_64bit_primary_fn0 ! 0xc8
+tl0_fill_3_normal:
fill_32bit_primary_fn0 ! 0xcc
+tl0_fill_4_normal:
fill_64bit_primary_fn0 ! 0xd0
+tl0_fill_5_normal:
fill_32bit_nucleus_not ! 0xd4
+tl0_fill_6_normal:
fill_64bit_nucleus_not ! 0xd8
+tl0_fill_7_normal:
fill_mixed ! 0xdc
tl0_fill_n_other_e0:
tl0_reserved 32 ! 0xe0-0xff
@@ -1074,6 +1194,7 @@
ba,a 3f
#endif
4:
+ membar #Lookaside
fill_64bit_rtt(ASI_AIUP)
.global rtt_fill_end
rtt_fill_end:
@@ -1343,6 +1464,8 @@
#define LOADSTORE #LoadStore
#define STORESTORE #StoreStore
+
+#define WORKING
#ifdef WORKING
#define ENTER LOADLOAD
#define EXIT LOADSTORE|STORESTORE
@@ -1741,8 +1864,17 @@
bgeu,pn %xcc, 1f
nop
set fault_rtt_fn1, %g7
- ba,a 2f
+ ba,a 4f
1:
+ set fill_slow_start, %g6
+ cmp %g7, %g6
+ bleu,a,pn %xcc, 2f
+ nop
+ set fill_slow_end, %g6
+ cmp %g7, %g6
+ blu,a,pn %xcc, 3f
+ nop
+2:
set tl1_end, %g6
cmp %g7, %g6
bgeu,a,pn %xcc, ptl1_panic
@@ -1756,9 +1888,10 @@
cmp %g6, WTRAP_TYPE
bne,a,pn %xcc, ptl1_panic
mov PTL1_BAD_TRAP, %g1
+3:
andn %g7, WTRAP_ALIGN, %g7
add %g7, WTRAP_FAULTOFF, %g7
-2:
+4:
wrpr %g0, %g7, %tnpc
wrpr %g0, 1, %gl
rdpr %tt, %g5
@@ -1767,13 +1900,13 @@
ldxa [%g7 + MMFSA_D_ADDR]%asi, %g6
ldxa [%g7 + MMFSA_D_CTX]%asi, %g7
cmp %g5, T_ALIGNMENT
- be,pn %xcc, 3f
+ be,pn %xcc, 5f
nop
srlx %g6, PAGE_SHIFT, %g6
sllx %g6, PAGE_SHIFT, %g6 ! mask off bottom
or %g6, %g7, %g6
done
-3:
+5:
sllx %g7, TRAP_CTX_SHIFT, %g7
or %g7, %g5, %g5
done
More information about the p4-projects
mailing list