svn commit: r341272 - in head: lib/libc/amd64/string sys/amd64/amd64
Mateusz Guzik
mjg at FreeBSD.org
Fri Nov 30 00:45:11 UTC 2018
Author: mjg
Date: Fri Nov 30 00:45:10 2018
New Revision: 341272
URL: https://svnweb.freebsd.org/changeset/base/341272
Log:
amd64: tidy up copying backwards in memmove
For non-ERMS case the code used handle possible trailing bytes with
movsb first and then followed it up with movsq. This also happened
to alter how calculations were done for other cases.
Handle the tail with regular movs, just like when copying forward.
Use leaq to calculate the right offset from the get go, instead of
doing separate add and sub.
This adjusts the offset for non-rep cases so that they can be used
to handle the tail.
The routine is still a work in progress.
Sponsored by: The FreeBSD Foundation
Modified:
head/lib/libc/amd64/string/memmove.S
head/sys/amd64/amd64/support.S
Modified: head/lib/libc/amd64/string/memmove.S
==============================================================================
--- head/lib/libc/amd64/string/memmove.S Fri Nov 30 00:00:51 2018 (r341271)
+++ head/lib/libc/amd64/string/memmove.S Fri Nov 30 00:45:10 2018 (r341272)
@@ -150,24 +150,24 @@ __FBSDID("$FreeBSD$");
*/
ALIGN_TEXT
2:
- addq %rcx,%rdi
- addq %rcx,%rsi
+ cmpq $256,%rcx
+ ja 2256f
+ leaq -8(%rdi,%rcx),%rdi
+ leaq -8(%rsi,%rcx),%rsi
+
cmpq $32,%rcx
jb 2016f
- cmpq $256,%rcx
- ja 2256f
-
2032:
+ movq (%rsi),%rdx
+ movq %rdx,(%rdi)
movq -8(%rsi),%rdx
movq %rdx,-8(%rdi)
movq -16(%rsi),%rdx
movq %rdx,-16(%rdi)
movq -24(%rsi),%rdx
movq %rdx,-24(%rdi)
- movq -32(%rsi),%rdx
- movq %rdx,-32(%rdi)
leaq -32(%rsi),%rsi
leaq -32(%rdi),%rdi
subq $32,%rcx
@@ -181,10 +181,10 @@ __FBSDID("$FreeBSD$");
2016:
cmpb $16,%cl
jl 2008f
+ movq (%rsi),%rdx
+ movq %rdx,(%rdi)
movq -8(%rsi),%rdx
movq %rdx,-8(%rdi)
- movq -16(%rsi),%rdx
- movq %rdx,-16(%rdi)
subb $16,%cl
jz 2000f
leaq -16(%rsi),%rsi
@@ -192,8 +192,8 @@ __FBSDID("$FreeBSD$");
2008:
cmpb $8,%cl
jl 2004f
- movq -8(%rsi),%rdx
- movq %rdx,-8(%rdi)
+ movq (%rsi),%rdx
+ movq %rdx,(%rdi)
subb $8,%cl
jz 2000f
leaq -8(%rsi),%rsi
@@ -201,8 +201,8 @@ __FBSDID("$FreeBSD$");
2004:
cmpb $4,%cl
jl 2002f
- movl -4(%rsi),%edx
- movl %edx,-4(%rdi)
+ movl 4(%rsi),%edx
+ movl %edx,4(%rdi)
subb $4,%cl
jz 2000f
leaq -4(%rsi),%rsi
@@ -210,8 +210,8 @@ __FBSDID("$FreeBSD$");
2002:
cmpb $2,%cl
jl 2001f
- movw -2(%rsi),%dx
- movw %dx,-2(%rdi)
+ movw 6(%rsi),%dx
+ movw %dx,6(%rdi)
subb $2,%cl
jz 2000f
leaq -2(%rsi),%rsi
@@ -219,33 +219,31 @@ __FBSDID("$FreeBSD$");
2001:
cmpb $1,%cl
jl 2000f
- movb -1(%rsi),%dl
- movb %dl,-1(%rdi)
+ movb 7(%rsi),%dl
+ movb %dl,7(%rdi)
2000:
\end
ret
ALIGN_TEXT
2256:
- decq %rdi
- decq %rsi
std
.if \erms == 1
+ leaq -1(%rdi,%rcx),%rdi
+ leaq -1(%rsi,%rcx),%rsi
rep
movsb
+ cld
.else
- andq $7,%rcx /* any fractional bytes? */
- je 3f
- rep
- movsb
-3:
- movq %rdx,%rcx /* copy remainder by 32-bit words */
+ leaq -8(%rdi,%rcx),%rdi
+ leaq -8(%rsi,%rcx),%rsi
shrq $3,%rcx
- subq $7,%rsi
- subq $7,%rdi
rep
movsq
-.endif
cld
+ movq %rdx,%rcx
+ andb $7,%cl
+ jne 2004b
+.endif
\end
ret
.endif
Modified: head/sys/amd64/amd64/support.S
==============================================================================
--- head/sys/amd64/amd64/support.S Fri Nov 30 00:00:51 2018 (r341271)
+++ head/sys/amd64/amd64/support.S Fri Nov 30 00:45:10 2018 (r341272)
@@ -313,24 +313,24 @@ END(memcmp)
*/
ALIGN_TEXT
2:
- addq %rcx,%rdi
- addq %rcx,%rsi
+ cmpq $256,%rcx
+ ja 2256f
+ leaq -8(%rdi,%rcx),%rdi
+ leaq -8(%rsi,%rcx),%rsi
+
cmpq $32,%rcx
jb 2016f
- cmpq $256,%rcx
- ja 2256f
-
2032:
+ movq (%rsi),%rdx
+ movq %rdx,(%rdi)
movq -8(%rsi),%rdx
movq %rdx,-8(%rdi)
movq -16(%rsi),%rdx
movq %rdx,-16(%rdi)
movq -24(%rsi),%rdx
movq %rdx,-24(%rdi)
- movq -32(%rsi),%rdx
- movq %rdx,-32(%rdi)
leaq -32(%rsi),%rsi
leaq -32(%rdi),%rdi
subq $32,%rcx
@@ -344,10 +344,10 @@ END(memcmp)
2016:
cmpb $16,%cl
jl 2008f
+ movq (%rsi),%rdx
+ movq %rdx,(%rdi)
movq -8(%rsi),%rdx
movq %rdx,-8(%rdi)
- movq -16(%rsi),%rdx
- movq %rdx,-16(%rdi)
subb $16,%cl
jz 2000f
leaq -16(%rsi),%rsi
@@ -355,8 +355,8 @@ END(memcmp)
2008:
cmpb $8,%cl
jl 2004f
- movq -8(%rsi),%rdx
- movq %rdx,-8(%rdi)
+ movq (%rsi),%rdx
+ movq %rdx,(%rdi)
subb $8,%cl
jz 2000f
leaq -8(%rsi),%rsi
@@ -364,8 +364,8 @@ END(memcmp)
2004:
cmpb $4,%cl
jl 2002f
- movl -4(%rsi),%edx
- movl %edx,-4(%rdi)
+ movl 4(%rsi),%edx
+ movl %edx,4(%rdi)
subb $4,%cl
jz 2000f
leaq -4(%rsi),%rsi
@@ -373,8 +373,8 @@ END(memcmp)
2002:
cmpb $2,%cl
jl 2001f
- movw -2(%rsi),%dx
- movw %dx,-2(%rdi)
+ movw 6(%rsi),%dx
+ movw %dx,6(%rdi)
subb $2,%cl
jz 2000f
leaq -2(%rsi),%rsi
@@ -382,33 +382,31 @@ END(memcmp)
2001:
cmpb $1,%cl
jl 2000f
- movb -1(%rsi),%dl
- movb %dl,-1(%rdi)
+ movb 7(%rsi),%dl
+ movb %dl,7(%rdi)
2000:
\end
ret
ALIGN_TEXT
2256:
- decq %rdi
- decq %rsi
std
.if \erms == 1
+ leaq -1(%rdi,%rcx),%rdi
+ leaq -1(%rsi,%rcx),%rsi
rep
movsb
+ cld
.else
- andq $7,%rcx /* any fractional bytes? */
- je 3f
- rep
- movsb
-3:
- movq %rdx,%rcx /* copy remainder by 32-bit words */
+ leaq -8(%rdi,%rcx),%rdi
+ leaq -8(%rsi,%rcx),%rsi
shrq $3,%rcx
- subq $7,%rsi
- subq $7,%rdi
rep
movsq
-.endif
cld
+ movq %rdx,%rcx
+ andb $7,%cl
+ jne 2004b
+.endif
\end
ret
.endif
More information about the svn-src-all
mailing list