svn commit: r338853 - head/sys/amd64/amd64

Mateusz Guzik mjg at FreeBSD.org
Fri Sep 21 12:27:37 UTC 2018


Author: mjg
Date: Fri Sep 21 12:27:36 2018
New Revision: 338853
URL: https://svnweb.freebsd.org/changeset/base/338853

Log:
  amd64: check for small size in memmove, memcpy and memset
  
  If the size is 15 bytes or less avoid spinning up rep just to copy the 8
  bytes. In my tests on EPYC and old Intel microarchs without ERMS (like
  Westmere) it provided a nice win over the current version (e.g. for EPYC
  memset with 15 bytes of size goes from 59712651 ops/s to 70600095) all
  while almost not pessimizing the other cases.
  
  Data collected during package building shows that < 16 sizes are pretty
  common.
  
  Verified with the glibc test suite.
  
  Approved by:	re (kib)

Modified:
  head/sys/amd64/amd64/support.S

Modified: head/sys/amd64/amd64/support.S
==============================================================================
--- head/sys/amd64/amd64/support.S	Fri Sep 21 09:27:32 2018	(r338852)
+++ head/sys/amd64/amd64/support.S	Fri Sep 21 12:27:36 2018	(r338853)
@@ -116,6 +116,8 @@ ENTRY(memmove_std)
 	cmpq	%rcx,%r8			/* overlapping && src < dst? */
 	jb	2f
 
+	cmpq	$15,%rcx
+	jbe	1f
 	shrq	$3,%rcx				/* copy by 64-bit words */
 	rep
 	movsq
@@ -124,6 +126,7 @@ ENTRY(memmove_std)
 	jne	1f
 	POP_FRAME_POINTER
 	ret
+	ALIGN_TEXT
 1:
 	rep
 	movsb
@@ -191,6 +194,8 @@ ENTRY(memcpy_std)
 	PUSH_FRAME_POINTER
 	movq	%rdi,%rax
 	movq	%rdx,%rcx
+	cmpq	$15,%rcx
+	jbe	1f
 	shrq	$3,%rcx				/* copy by 64-bit words */
 	rep
 	movsq
@@ -199,6 +204,7 @@ ENTRY(memcpy_std)
 	jne	1f
 	POP_FRAME_POINTER
 	ret
+	ALIGN_TEXT
 1:
 	rep
 	movsb
@@ -227,6 +233,8 @@ ENTRY(memset_std)
 	movzbq	%sil,%r8
 	movabs	$0x0101010101010101,%rax
 	imulq	%r8,%rax
+	cmpq	$15,%rcx
+	jbe	1f
 	shrq	$3,%rcx
 	rep
 	stosq
@@ -236,6 +244,7 @@ ENTRY(memset_std)
 	movq	%r9,%rax
 	POP_FRAME_POINTER
 	ret
+	ALIGN_TEXT
 1:
 	rep
 	stosb


More information about the svn-src-head mailing list