svn commit: r333413 - head/sys/amd64/amd64

Mateusz Guzik mjg at FreeBSD.org
Wed May 9 15:16:26 UTC 2018


Author: mjg
Date: Wed May  9 15:16:25 2018
New Revision: 333413
URL: https://svnweb.freebsd.org/changeset/base/333413

Log:
  amd64: depessimize bcmp for small buffers
  
  Adapt assembly generated by clang for memcmp and use it for <= 64 sized
  compares (which are the vast majority).
  
  Sample result of doing stats on Broadwell (% of samples):
  before: 4.0 kernel     bcmp                 cache_lookup
  after : 0.7 kernel     bcmp                 cache_lookup
  
  The routine is most definitely still not optimal. Anyone interested in
  spending time improving it is welcome to take over.
  
  Reviewed by:	kib

Modified:
  head/sys/amd64/amd64/support.S

Modified: head/sys/amd64/amd64/support.S
==============================================================================
--- head/sys/amd64/amd64/support.S	Wed May  9 14:50:32 2018	(r333412)
+++ head/sys/amd64/amd64/support.S	Wed May  9 15:16:25 2018	(r333413)
@@ -98,17 +98,40 @@ END(sse2_pagezero)
 
 ENTRY(bcmp)
 	PUSH_FRAME_POINTER
+	test	%rdx,%rdx
+	je	1f
+	cmpq	$64,%rdx
+	jg	4f
+
+	xor	%ecx,%ecx
+2:
+	movzbl	(%rdi,%rcx,1),%eax
+	movzbl	(%rsi,%rcx,1),%r8d
+	cmp	%r8b,%al
+	jne	3f
+	add	$0x1,%rcx
+	cmp	%rcx,%rdx
+	jne	2b
+1:
+	xor	%eax,%eax
+	POP_FRAME_POINTER
+	retq
+3:
+	mov	$1,%eax
+	POP_FRAME_POINTER
+	retq
+4:
 	movq	%rdx,%rcx
 	shrq	$3,%rcx
 	repe
 	cmpsq
-	jne	1f
+	jne	5f
 
 	movq	%rdx,%rcx
 	andq	$7,%rcx
 	repe
 	cmpsb
-1:
+5:
 	setne	%al
 	movsbl	%al,%eax
 	POP_FRAME_POINTER


More information about the svn-src-all mailing list