svn commit: r338853 - head/sys/amd64/amd64
Mateusz Guzik
mjg at FreeBSD.org
Fri Sep 21 12:27:37 UTC 2018
Author: mjg
Date: Fri Sep 21 12:27:36 2018
New Revision: 338853
URL: https://svnweb.freebsd.org/changeset/base/338853
Log:
amd64: check for small size in memmove, memcpy and memset
If the size is 15 bytes or less avoid spinning up rep just to copy the 8
bytes. In my tests on EPYC and old Intel microarchs without ERMS (like
Westmere) it provided a nice win over the current version (e.g. for EPYC
memset with 15 bytes of size goes from 59712651 ops/s to 70600095) all
while almost not pessimizing the other cases.
Data collected during package building shows that < 16 sizes are pretty
common.
Verified with the glibc test suite.
Approved by: re (kib)
Modified:
head/sys/amd64/amd64/support.S
Modified: head/sys/amd64/amd64/support.S
==============================================================================
--- head/sys/amd64/amd64/support.S Fri Sep 21 09:27:32 2018 (r338852)
+++ head/sys/amd64/amd64/support.S Fri Sep 21 12:27:36 2018 (r338853)
@@ -116,6 +116,8 @@ ENTRY(memmove_std)
cmpq %rcx,%r8 /* overlapping && src < dst? */
jb 2f
+ cmpq $15,%rcx
+ jbe 1f
shrq $3,%rcx /* copy by 64-bit words */
rep
movsq
@@ -124,6 +126,7 @@ ENTRY(memmove_std)
jne 1f
POP_FRAME_POINTER
ret
+ ALIGN_TEXT
1:
rep
movsb
@@ -191,6 +194,8 @@ ENTRY(memcpy_std)
PUSH_FRAME_POINTER
movq %rdi,%rax
movq %rdx,%rcx
+ cmpq $15,%rcx
+ jbe 1f
shrq $3,%rcx /* copy by 64-bit words */
rep
movsq
@@ -199,6 +204,7 @@ ENTRY(memcpy_std)
jne 1f
POP_FRAME_POINTER
ret
+ ALIGN_TEXT
1:
rep
movsb
@@ -227,6 +233,8 @@ ENTRY(memset_std)
movzbq %sil,%r8
movabs $0x0101010101010101,%rax
imulq %r8,%rax
+ cmpq $15,%rcx
+ jbe 1f
shrq $3,%rcx
rep
stosq
@@ -236,6 +244,7 @@ ENTRY(memset_std)
movq %r9,%rax
POP_FRAME_POINTER
ret
+ ALIGN_TEXT
1:
rep
stosb
More information about the svn-src-head
mailing list