svn commit: r339579 - head/sys/amd64/amd64

Mateusz Guzik mjg at FreeBSD.org
Mon Oct 22 06:44:21 UTC 2018


Author: mjg
Date: Mon Oct 22 06:44:20 2018
New Revision: 339579
URL: https://svnweb.freebsd.org/changeset/base/339579

Log:
  amd64: finish the tail in memset with an overlapping store
  
  Instead of finding the exact size to fit in we can just shift the target
  by -8 + tail. Doing a blind write to a previously rep stosq'ed area comes
  with a penalty so do it conditionally.
  
  Sample win on EPYC when zeroing a 257 sized buffer (tail = 1) aligned to
  16 bytes:
  before: 44782846 ops/s
  after:  46118614 ops/s
  
  Idea stolen from NetBSD.
  
  Sponsored by:	The FreeBSD Foundation

Modified:
  head/sys/amd64/amd64/support.S

Modified: head/sys/amd64/amd64/support.S
==============================================================================
--- head/sys/amd64/amd64/support.S	Mon Oct 22 04:12:51 2018	(r339578)
+++ head/sys/amd64/amd64/support.S	Mon Oct 22 06:44:20 2018	(r339579)
@@ -524,9 +524,12 @@ END(memcpy_erms)
 	rep
 	stosq
 	movq	%r9,%rax
-	movq	%rdx,%rcx
-	andb	$7,%cl
-	jne	1004b
+	andl	$7,%edx
+	jnz	1f
+	POP_FRAME_POINTER
+	ret
+1:
+	movq	%r10,-8(%rdi,%rdx)
 .endif
 	POP_FRAME_POINTER
 	ret


More information about the svn-src-head mailing list