svn commit: r338968 - head/sys/amd64/amd64

Mateusz Guzik mjg at FreeBSD.org
Thu Sep 27 15:27:54 UTC 2018


Author: mjg
Date: Thu Sep 27 15:27:53 2018
New Revision: 338968
URL: https://svnweb.freebsd.org/changeset/base/338968

Log:
  amd64: mostly depessimize copystr
  
  - remove a forward branch in the common case
  - replace xchg + lodsb/stosb loop with simple movs
  
  A simple test on Intel(R) Core(TM) i7-4600U CPU @ 2.10GH copying
  /foo/bar/baz in a loop goes from 295715863 ops/s to 465807408.
  
  Further changes are pending.
  
  Reviewed by:	kib
  Approved by:	re (gjb)
  Differential Revision:	https://reviews.freebsd.org/D17281

Modified:
  head/sys/amd64/amd64/support.S

Modified: head/sys/amd64/amd64/support.S
==============================================================================
--- head/sys/amd64/amd64/support.S	Thu Sep 27 15:24:16 2018	(r338967)
+++ head/sys/amd64/amd64/support.S	Thu Sep 27 15:27:53 2018	(r338968)
@@ -1122,34 +1122,33 @@ ENTRY(copystr)
 	PUSH_FRAME_POINTER
 	movq	%rdx,%r8			/* %r8 = maxlen */
 
-	xchgq	%rdi,%rsi
-	incq	%rdx
+	incq    %rdx
 1:
 	decq	%rdx
 	jz	4f
-	lodsb
-	stosb
-	orb	%al,%al
+	movb	(%rdi),%al
+	movb	%al,(%rsi)
+	incq	%rsi
+	incq	%rdi
+	testb	%al,%al
 	jnz	1b
 
 	/* Success -- 0 byte reached */
 	decq	%rdx
 	xorl	%eax,%eax
-	jmp	6f
-4:
-	/* rdx is zero -- return ENAMETOOLONG */
-	movq	$ENAMETOOLONG,%rax
-
-6:
-
+2:
 	testq	%rcx,%rcx
-	jz	7f
+	jz      3f
 	/* set *lencopied and return %rax */
 	subq	%rdx,%r8
 	movq	%r8,(%rcx)
-7:
+3:
 	POP_FRAME_POINTER
 	ret
+4:
+	/* rdx is zero -- return ENAMETOOLONG */
+	movl    $ENAMETOOLONG,%eax
+	jmp	2b
 END(copystr)
 
 /*


More information about the svn-src-head mailing list