svn commit: r333413 - head/sys/amd64/amd64
Mateusz Guzik
mjg at FreeBSD.org
Wed May 9 15:16:26 UTC 2018
Author: mjg
Date: Wed May 9 15:16:25 2018
New Revision: 333413
URL: https://svnweb.freebsd.org/changeset/base/333413
Log:
amd64: depessimize bcmp for small buffers
Adapt assembly generated by clang for memcmp and use it for <= 64 sized
compares (which are the vast majority).
Sample result of doing stats on Broadwell (% of samples):
before: 4.0 kernel bcmp cache_lookup
after : 0.7 kernel bcmp cache_lookup
The routine is most definitely still not optimal. Anyone interested in
spending time improving it is welcome to take over.
Reviewed by: kib
Modified:
head/sys/amd64/amd64/support.S
Modified: head/sys/amd64/amd64/support.S
==============================================================================
--- head/sys/amd64/amd64/support.S Wed May 9 14:50:32 2018 (r333412)
+++ head/sys/amd64/amd64/support.S Wed May 9 15:16:25 2018 (r333413)
@@ -98,17 +98,40 @@ END(sse2_pagezero)
ENTRY(bcmp)
PUSH_FRAME_POINTER
+ test %rdx,%rdx
+ je 1f
+ cmpq $64,%rdx
+ jg 4f
+
+ xor %ecx,%ecx
+2:
+ movzbl (%rdi,%rcx,1),%eax
+ movzbl (%rsi,%rcx,1),%r8d
+ cmp %r8b,%al
+ jne 3f
+ add $0x1,%rcx
+ cmp %rcx,%rdx
+ jne 2b
+1:
+ xor %eax,%eax
+ POP_FRAME_POINTER
+ retq
+3:
+ mov $1,%eax
+ POP_FRAME_POINTER
+ retq
+4:
movq %rdx,%rcx
shrq $3,%rcx
repe
cmpsq
- jne 1f
+ jne 5f
movq %rdx,%rcx
andq $7,%rcx
repe
cmpsb
-1:
+5:
setne %al
movsbl %al,%eax
POP_FRAME_POINTER
More information about the svn-src-all
mailing list