svn commit: r340252 - head/sys/amd64/amd64

Mateusz Guzik mjg at FreeBSD.org
Thu Nov 8 15:12:38 UTC 2018


Author: mjg
Date: Thu Nov  8 15:12:36 2018
New Revision: 340252
URL: https://svnweb.freebsd.org/changeset/base/340252

Log:
  amd64: align memset buffers to 16 bytes before using rep stos
  
  Both Intel manual and Agner Fog's docs suggest aligning to 16.
  
  See the review for benchmark results.
  
  Reviewed by:	kib (previous version)
  Sponsored by:	The FreeBSD Foundation
  Differential Revision:	https://reviews.freebsd.org/D17661

Modified:
  head/sys/amd64/amd64/support.S

Modified: head/sys/amd64/amd64/support.S
==============================================================================
--- head/sys/amd64/amd64/support.S	Thu Nov  8 14:46:21 2018	(r340251)
+++ head/sys/amd64/amd64/support.S	Thu Nov  8 15:12:36 2018	(r340252)
@@ -515,24 +515,38 @@ END(memcpy_erms)
 1256:
 	movq	%rdi,%r9
 	movq	%r10,%rax
+	testl	$15,%edi
+	jnz	3f
+1:
 .if \erms == 1
 	rep
 	stosb
 	movq	%r9,%rax
 .else
+	movq	%rcx,%rdx
 	shrq	$3,%rcx
 	rep
 	stosq
 	movq	%r9,%rax
 	andl	$7,%edx
-	jnz	1f
+	jnz	2f
 	POP_FRAME_POINTER
 	ret
-1:
+2:
 	movq	%r10,-8(%rdi,%rdx)
 .endif
 	POP_FRAME_POINTER
 	ret
+	ALIGN_TEXT
+3:
+	movq	%r10,(%rdi)
+	movq	%r10,8(%rdi)
+	movq	%rdi,%r8
+	andq	$15,%r8
+	leaq	-16(%rcx,%r8),%rcx
+	neg	%r8
+	leaq	16(%rdi,%r8),%rdi
+	jmp	1b
 .endm
 
 ENTRY(memset_std)


More information about the svn-src-all mailing list