svn commit: r342240 - in stable/12: lib/libc/amd64/string sys/amd64/amd64
Mateusz Guzik
mjg at FreeBSD.org
Wed Dec 19 21:25:44 UTC 2018
Author: mjg
Date: Wed Dec 19 21:25:43 2018
New Revision: 342240
URL: https://svnweb.freebsd.org/changeset/base/342240
Log:
MFC r341272,r341273,r341351
amd64: tidy up copying backwards in memmove
amd64: remove stale attribution for memmove work
amd64: handle small memmove buffers with overlapping stores
Modified:
stable/12/lib/libc/amd64/string/memmove.S
stable/12/sys/amd64/amd64/support.S
Directory Properties:
stable/12/ (props changed)
Modified: stable/12/lib/libc/amd64/string/memmove.S
==============================================================================
--- stable/12/lib/libc/amd64/string/memmove.S Wed Dec 19 21:04:06 2018 (r342239)
+++ stable/12/lib/libc/amd64/string/memmove.S Wed Dec 19 21:25:43 2018 (r342240)
@@ -34,8 +34,6 @@ __FBSDID("$FreeBSD$");
/*
* memmove(dst, src, cnt)
* rdi, rsi, rdx
- * Contains parts of bcopy written by:
- * ws at tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
*/
/*
@@ -44,11 +42,19 @@ __FBSDID("$FreeBSD$");
* rsi - source
* rdx - count
*
- * The macro possibly clobbers the above and: rcx, r8.
- * It does not clobber rax, r10 nor r11.
+ * The macro possibly clobbers the above and: rcx, r8, r9, 10
+ * It does not clobber rax nor r11.
*/
.macro MEMMOVE erms overlap begin end
\begin
+
+ /*
+ * For sizes 0..32 all data is read before it is written, so there
+ * is no correctness issue with direction of copying.
+ */
+ cmpq $32,%rcx
+ jbe 101632f
+
.if \overlap == 1
movq %rdi,%r8
subq %rsi,%r8
@@ -56,13 +62,10 @@ __FBSDID("$FreeBSD$");
jb 2f
.endif
- cmpq $32,%rcx
- jb 1016f
-
cmpq $256,%rcx
ja 1256f
-1032:
+103200:
movq (%rsi),%rdx
movq %rdx,(%rdi)
movq 8(%rsi),%rdx
@@ -75,56 +78,62 @@ __FBSDID("$FreeBSD$");
leaq 32(%rdi),%rdi
subq $32,%rcx
cmpq $32,%rcx
- jae 1032b
+ jae 103200b
cmpb $0,%cl
- jne 1016f
+ jne 101632f
\end
ret
ALIGN_TEXT
-1016:
+101632:
cmpb $16,%cl
- jl 1008f
+ jl 100816f
movq (%rsi),%rdx
+ movq 8(%rsi),%r8
+ movq -16(%rsi,%rcx),%r9
+ movq -8(%rsi,%rcx),%r10
movq %rdx,(%rdi)
- movq 8(%rsi),%rdx
- movq %rdx,8(%rdi)
- subb $16,%cl
- jz 1000f
- leaq 16(%rsi),%rsi
- leaq 16(%rdi),%rdi
-1008:
+ movq %r8,8(%rdi)
+ movq %r9,-16(%rdi,%rcx)
+ movq %r10,-8(%rdi,%rcx)
+ \end
+ ret
+ ALIGN_TEXT
+100816:
cmpb $8,%cl
- jl 1004f
+ jl 100408f
movq (%rsi),%rdx
+ movq -8(%rsi,%rcx),%r8
movq %rdx,(%rdi)
- subb $8,%cl
- jz 1000f
- leaq 8(%rsi),%rsi
- leaq 8(%rdi),%rdi
-1004:
+ movq %r8,-8(%rdi,%rcx,)
+ \end
+ ret
+ ALIGN_TEXT
+100408:
cmpb $4,%cl
- jl 1002f
+ jl 100204f
movl (%rsi),%edx
+ movl -4(%rsi,%rcx),%r8d
movl %edx,(%rdi)
- subb $4,%cl
- jz 1000f
- leaq 4(%rsi),%rsi
- leaq 4(%rdi),%rdi
-1002:
+ movl %r8d,-4(%rdi,%rcx)
+ \end
+ ret
+ ALIGN_TEXT
+100204:
cmpb $2,%cl
- jl 1001f
- movw (%rsi),%dx
+ jl 100001f
+ movzwl (%rsi),%edx
+ movzwl -2(%rsi,%rcx),%r8d
movw %dx,(%rdi)
- subb $2,%cl
- jz 1000f
- leaq 2(%rsi),%rsi
- leaq 2(%rdi),%rdi
-1001:
+ movw %r8w,-2(%rdi,%rcx)
+ \end
+ ret
+ ALIGN_TEXT
+100001:
cmpb $1,%cl
- jl 1000f
+ jl 100000f
movb (%rsi),%dl
movb %dl,(%rdi)
-1000:
+100000:
\end
ret
@@ -140,8 +149,8 @@ __FBSDID("$FreeBSD$");
rep
movsq
movq %rdx,%rcx
- andb $7,%cl /* any bytes left? */
- jne 1004b
+ andl $7,%ecx /* any bytes left? */
+ jne 100408b
.endif
\end
ret
@@ -180,24 +189,24 @@ __FBSDID("$FreeBSD$");
*/
ALIGN_TEXT
2:
- addq %rcx,%rdi
- addq %rcx,%rsi
+ cmpq $256,%rcx
+ ja 2256f
+ leaq -8(%rdi,%rcx),%rdi
+ leaq -8(%rsi,%rcx),%rsi
+
cmpq $32,%rcx
jb 2016f
- cmpq $256,%rcx
- ja 2256f
-
2032:
+ movq (%rsi),%rdx
+ movq %rdx,(%rdi)
movq -8(%rsi),%rdx
movq %rdx,-8(%rdi)
movq -16(%rsi),%rdx
movq %rdx,-16(%rdi)
movq -24(%rsi),%rdx
movq %rdx,-24(%rdi)
- movq -32(%rsi),%rdx
- movq %rdx,-32(%rdi)
leaq -32(%rsi),%rsi
leaq -32(%rdi),%rdi
subq $32,%rcx
@@ -211,10 +220,10 @@ __FBSDID("$FreeBSD$");
2016:
cmpb $16,%cl
jl 2008f
+ movq (%rsi),%rdx
+ movq %rdx,(%rdi)
movq -8(%rsi),%rdx
movq %rdx,-8(%rdi)
- movq -16(%rsi),%rdx
- movq %rdx,-16(%rdi)
subb $16,%cl
jz 2000f
leaq -16(%rsi),%rsi
@@ -222,8 +231,8 @@ __FBSDID("$FreeBSD$");
2008:
cmpb $8,%cl
jl 2004f
- movq -8(%rsi),%rdx
- movq %rdx,-8(%rdi)
+ movq (%rsi),%rdx
+ movq %rdx,(%rdi)
subb $8,%cl
jz 2000f
leaq -8(%rsi),%rsi
@@ -231,8 +240,8 @@ __FBSDID("$FreeBSD$");
2004:
cmpb $4,%cl
jl 2002f
- movl -4(%rsi),%edx
- movl %edx,-4(%rdi)
+ movl 4(%rsi),%edx
+ movl %edx,4(%rdi)
subb $4,%cl
jz 2000f
leaq -4(%rsi),%rsi
@@ -240,8 +249,8 @@ __FBSDID("$FreeBSD$");
2002:
cmpb $2,%cl
jl 2001f
- movw -2(%rsi),%dx
- movw %dx,-2(%rdi)
+ movw 6(%rsi),%dx
+ movw %dx,6(%rdi)
subb $2,%cl
jz 2000f
leaq -2(%rsi),%rsi
@@ -249,37 +258,36 @@ __FBSDID("$FreeBSD$");
2001:
cmpb $1,%cl
jl 2000f
- movb -1(%rsi),%dl
- movb %dl,-1(%rdi)
+ movb 7(%rsi),%dl
+ movb %dl,7(%rdi)
2000:
\end
ret
ALIGN_TEXT
2256:
- decq %rdi
- decq %rsi
std
.if \erms == 1
+ leaq -1(%rdi,%rcx),%rdi
+ leaq -1(%rsi,%rcx),%rsi
rep
movsb
+ cld
.else
- andq $7,%rcx /* any fractional bytes? */
- je 3f
- rep
- movsb
-3:
- movq %rdx,%rcx /* copy remainder by 32-bit words */
+ leaq -8(%rdi,%rcx),%rdi
+ leaq -8(%rsi,%rcx),%rsi
shrq $3,%rcx
- subq $7,%rsi
- subq $7,%rdi
rep
movsq
-.endif
cld
+ movq %rdx,%rcx
+ andb $7,%cl
+ jne 2004b
+.endif
\end
ret
.endif
.endm
+
.macro MEMMOVE_BEGIN
movq %rdi,%rax
Modified: stable/12/sys/amd64/amd64/support.S
==============================================================================
--- stable/12/sys/amd64/amd64/support.S Wed Dec 19 21:04:06 2018 (r342239)
+++ stable/12/sys/amd64/amd64/support.S Wed Dec 19 21:25:43 2018 (r342240)
@@ -197,8 +197,6 @@ END(memcmp)
/*
* memmove(dst, src, cnt)
* rdi, rsi, rdx
- * Adapted from bcopy written by:
- * ws at tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
*/
/*
@@ -207,11 +205,19 @@ END(memcmp)
* rsi - source
* rdx - count
*
- * The macro possibly clobbers the above and: rcx, r8.
- * It does not clobber rax, r10 nor r11.
+ * The macro possibly clobbers the above and: rcx, r8, r9, 10
+ * It does not clobber rax nor r11.
*/
.macro MEMMOVE erms overlap begin end
\begin
+
+ /*
+ * For sizes 0..32 all data is read before it is written, so there
+ * is no correctness issue with direction of copying.
+ */
+ cmpq $32,%rcx
+ jbe 101632f
+
.if \overlap == 1
movq %rdi,%r8
subq %rsi,%r8
@@ -219,13 +225,10 @@ END(memcmp)
jb 2f
.endif
- cmpq $32,%rcx
- jb 1016f
-
cmpq $256,%rcx
ja 1256f
-1032:
+103200:
movq (%rsi),%rdx
movq %rdx,(%rdi)
movq 8(%rsi),%rdx
@@ -238,56 +241,62 @@ END(memcmp)
leaq 32(%rdi),%rdi
subq $32,%rcx
cmpq $32,%rcx
- jae 1032b
+ jae 103200b
cmpb $0,%cl
- jne 1016f
+ jne 101632f
\end
ret
ALIGN_TEXT
-1016:
+101632:
cmpb $16,%cl
- jl 1008f
+ jl 100816f
movq (%rsi),%rdx
+ movq 8(%rsi),%r8
+ movq -16(%rsi,%rcx),%r9
+ movq -8(%rsi,%rcx),%r10
movq %rdx,(%rdi)
- movq 8(%rsi),%rdx
- movq %rdx,8(%rdi)
- subb $16,%cl
- jz 1000f
- leaq 16(%rsi),%rsi
- leaq 16(%rdi),%rdi
-1008:
+ movq %r8,8(%rdi)
+ movq %r9,-16(%rdi,%rcx)
+ movq %r10,-8(%rdi,%rcx)
+ \end
+ ret
+ ALIGN_TEXT
+100816:
cmpb $8,%cl
- jl 1004f
+ jl 100408f
movq (%rsi),%rdx
+ movq -8(%rsi,%rcx),%r8
movq %rdx,(%rdi)
- subb $8,%cl
- jz 1000f
- leaq 8(%rsi),%rsi
- leaq 8(%rdi),%rdi
-1004:
+ movq %r8,-8(%rdi,%rcx,)
+ \end
+ ret
+ ALIGN_TEXT
+100408:
cmpb $4,%cl
- jl 1002f
+ jl 100204f
movl (%rsi),%edx
+ movl -4(%rsi,%rcx),%r8d
movl %edx,(%rdi)
- subb $4,%cl
- jz 1000f
- leaq 4(%rsi),%rsi
- leaq 4(%rdi),%rdi
-1002:
+ movl %r8d,-4(%rdi,%rcx)
+ \end
+ ret
+ ALIGN_TEXT
+100204:
cmpb $2,%cl
- jl 1001f
- movw (%rsi),%dx
+ jl 100001f
+ movzwl (%rsi),%edx
+ movzwl -2(%rsi,%rcx),%r8d
movw %dx,(%rdi)
- subb $2,%cl
- jz 1000f
- leaq 2(%rsi),%rsi
- leaq 2(%rdi),%rdi
-1001:
+ movw %r8w,-2(%rdi,%rcx)
+ \end
+ ret
+ ALIGN_TEXT
+100001:
cmpb $1,%cl
- jl 1000f
+ jl 100000f
movb (%rsi),%dl
movb %dl,(%rdi)
-1000:
+100000:
\end
ret
@@ -303,8 +312,8 @@ END(memcmp)
rep
movsq
movq %rdx,%rcx
- andb $7,%cl /* any bytes left? */
- jne 1004b
+ andl $7,%ecx /* any bytes left? */
+ jne 100408b
.endif
\end
ret
@@ -343,24 +352,24 @@ END(memcmp)
*/
ALIGN_TEXT
2:
- addq %rcx,%rdi
- addq %rcx,%rsi
+ cmpq $256,%rcx
+ ja 2256f
+ leaq -8(%rdi,%rcx),%rdi
+ leaq -8(%rsi,%rcx),%rsi
+
cmpq $32,%rcx
jb 2016f
- cmpq $256,%rcx
- ja 2256f
-
2032:
+ movq (%rsi),%rdx
+ movq %rdx,(%rdi)
movq -8(%rsi),%rdx
movq %rdx,-8(%rdi)
movq -16(%rsi),%rdx
movq %rdx,-16(%rdi)
movq -24(%rsi),%rdx
movq %rdx,-24(%rdi)
- movq -32(%rsi),%rdx
- movq %rdx,-32(%rdi)
leaq -32(%rsi),%rsi
leaq -32(%rdi),%rdi
subq $32,%rcx
@@ -374,10 +383,10 @@ END(memcmp)
2016:
cmpb $16,%cl
jl 2008f
+ movq (%rsi),%rdx
+ movq %rdx,(%rdi)
movq -8(%rsi),%rdx
movq %rdx,-8(%rdi)
- movq -16(%rsi),%rdx
- movq %rdx,-16(%rdi)
subb $16,%cl
jz 2000f
leaq -16(%rsi),%rsi
@@ -385,8 +394,8 @@ END(memcmp)
2008:
cmpb $8,%cl
jl 2004f
- movq -8(%rsi),%rdx
- movq %rdx,-8(%rdi)
+ movq (%rsi),%rdx
+ movq %rdx,(%rdi)
subb $8,%cl
jz 2000f
leaq -8(%rsi),%rsi
@@ -394,8 +403,8 @@ END(memcmp)
2004:
cmpb $4,%cl
jl 2002f
- movl -4(%rsi),%edx
- movl %edx,-4(%rdi)
+ movl 4(%rsi),%edx
+ movl %edx,4(%rdi)
subb $4,%cl
jz 2000f
leaq -4(%rsi),%rsi
@@ -403,8 +412,8 @@ END(memcmp)
2002:
cmpb $2,%cl
jl 2001f
- movw -2(%rsi),%dx
- movw %dx,-2(%rdi)
+ movw 6(%rsi),%dx
+ movw %dx,6(%rdi)
subb $2,%cl
jz 2000f
leaq -2(%rsi),%rsi
@@ -412,33 +421,31 @@ END(memcmp)
2001:
cmpb $1,%cl
jl 2000f
- movb -1(%rsi),%dl
- movb %dl,-1(%rdi)
+ movb 7(%rsi),%dl
+ movb %dl,7(%rdi)
2000:
\end
ret
ALIGN_TEXT
2256:
- decq %rdi
- decq %rsi
std
.if \erms == 1
+ leaq -1(%rdi,%rcx),%rdi
+ leaq -1(%rsi,%rcx),%rsi
rep
movsb
+ cld
.else
- andq $7,%rcx /* any fractional bytes? */
- je 3f
- rep
- movsb
-3:
- movq %rdx,%rcx /* copy remainder by 32-bit words */
+ leaq -8(%rdi,%rcx),%rdi
+ leaq -8(%rsi,%rcx),%rsi
shrq $3,%rcx
- subq $7,%rsi
- subq $7,%rdi
rep
movsq
-.endif
cld
+ movq %rdx,%rcx
+ andb $7,%cl
+ jne 2004b
+.endif
\end
ret
.endif
More information about the svn-src-all
mailing list