svn commit: r340252 - head/sys/amd64/amd64
Mateusz Guzik
mjg at FreeBSD.org
Thu Nov 8 15:12:38 UTC 2018
Author: mjg
Date: Thu Nov 8 15:12:36 2018
New Revision: 340252
URL: https://svnweb.freebsd.org/changeset/base/340252
Log:
amd64: align memset buffers to 16 bytes before using rep stos
Both Intel manual and Agner Fog's docs suggest aligning to 16.
See the review for benchmark results.
Reviewed by: kib (previous version)
Sponsored by: The FreeBSD Foundation
Differential Revision: https://reviews.freebsd.org/D17661
Modified:
head/sys/amd64/amd64/support.S
Modified: head/sys/amd64/amd64/support.S
==============================================================================
--- head/sys/amd64/amd64/support.S Thu Nov 8 14:46:21 2018 (r340251)
+++ head/sys/amd64/amd64/support.S Thu Nov 8 15:12:36 2018 (r340252)
@@ -515,24 +515,38 @@ END(memcpy_erms)
1256:
movq %rdi,%r9
movq %r10,%rax
+ testl $15,%edi
+ jnz 3f
+1:
.if \erms == 1
rep
stosb
movq %r9,%rax
.else
+ movq %rcx,%rdx
shrq $3,%rcx
rep
stosq
movq %r9,%rax
andl $7,%edx
- jnz 1f
+ jnz 2f
POP_FRAME_POINTER
ret
-1:
+2:
movq %r10,-8(%rdi,%rdx)
.endif
POP_FRAME_POINTER
ret
+ ALIGN_TEXT
+3:
+ movq %r10,(%rdi)
+ movq %r10,8(%rdi)
+ movq %rdi,%r8
+ andq $15,%r8
+ leaq -16(%rcx,%r8),%rcx
+ neg %r8
+ leaq 16(%rdi,%r8),%rdi
+ jmp 1b
.endm
ENTRY(memset_std)
More information about the svn-src-all
mailing list