Page MenuHomeFreeBSD

D41442.diff
No OneTemporary

D41442.diff

diff --git a/lib/libc/amd64/string/memcmp.S b/lib/libc/amd64/string/memcmp.S
--- a/lib/libc/amd64/string/memcmp.S
+++ b/lib/libc/amd64/string/memcmp.S
@@ -1,9 +1,12 @@
/*-
- * Copyright (c) 2018 The FreeBSD Foundation
+ * Copyright (c) 2018, 2023 The FreeBSD Foundation
*
* This software was developed by Mateusz Guzik <mjg@FreeBSD.org>
* under sponsorship from the FreeBSD Foundation.
*
+ * Portions of this software were developed by Robert Clausecker
+ * <fuz@FreeBSD.org> under sponsorship from the FreeBSD Foundation.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -27,6 +30,10 @@
*/
#include <machine/asm.h>
+#include <machine/param.h>
+
+#include "amd64_archlevel.h"
+
/*
* Note: this routine was written with kernel use in mind (read: no simd),
* it is only present in userspace as a temporary measure until something
@@ -36,10 +43,15 @@
#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
#ifdef BCMP
-ENTRY(bcmp)
-#else
-ENTRY(memcmp)
+#define memcmp bcmp
#endif
+
+ARCHFUNCS(memcmp)
+ ARCHFUNC(memcmp, scalar)
+ ARCHFUNC(memcmp, baseline)
+ENDARCHFUNCS(memcmp)
+
+ARCHENTRY(memcmp, scalar)
xorl %eax,%eax
10:
cmpq $16,%rdx
@@ -157,7 +169,6 @@
1:
leal 1(%eax),%eax
ret
-END(bcmp)
#else
/*
* We need to compute the difference between strings.
@@ -230,7 +241,165 @@
2:
subl %r8d,%eax
ret
-END(memcmp)
#endif
+ARCHEND(memcmp, scalar)
+
+ARCHENTRY(memcmp, baseline)
+ cmp $32, %rdx # enough to permit use of the long kernel?
+ ja .Llong
+
+ test %rdx, %rdx # zero bytes buffer?
+ je .L0
+
+ /*
+ * Compare strings of 1--32 bytes. We want to do this by
+ * loading into two xmm registers and then comparing. To avoid
+ * crossing into unmapped pages, we either load 32 bytes from
+ * the start of the buffer or 32 bytes before its end, depending
+ * on whether there is a page boundary between the overread area
+ * or not.
+ */
+
+ /* check for page boundaries overreads */
+ lea 31(%rdi), %eax # end of overread
+ lea 31(%rsi), %r8d
+ lea -1(%rdi, %rdx, 1), %ecx # last character in buffer
+ lea -1(%rsi, %rdx, 1), %r9d
+ xor %ecx, %eax
+ xor %r9d, %r8d
+ test $PAGE_SIZE, %eax # are they on different pages?
+ jz 0f
+
+ /* fix up rdi */
+ movdqu -32(%rdi, %rdx, 1), %xmm0
+ movdqu -16(%rdi, %rdx, 1), %xmm1
+ lea -8(%rsp), %rdi # end of replacement buffer
+ sub %rdx, %rdi # start of replacement buffer
+ movdqa %xmm0, -40(%rsp) # copy to replacement buffer
+ movdqa %xmm1, -24(%rsp)
+
+0: test $PAGE_SIZE, %r8d
+ jz 0f
+
+ /* fix up rsi */
+ movdqu -32(%rsi, %rdx, 1), %xmm0
+ movdqu -16(%rsi, %rdx, 1), %xmm1
+ lea -40(%rsp), %rsi # end of replacement buffer
+ sub %rdx, %rsi # start of replacement buffer
+ movdqa %xmm0, -72(%rsp) # copy to replacement buffer
+ movdqa %xmm1, -56(%rsp)
+
+ /* load data and compare properly */
+0: movdqu 16(%rdi), %xmm1
+ movdqu 16(%rsi), %xmm3
+ movdqu (%rdi), %xmm0
+ movdqu (%rsi), %xmm2
+ mov %edx, %ecx
+ mov $-1, %edx
+ shl %cl, %rdx # ones where the buffer is not
+ pcmpeqb %xmm3, %xmm1
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm1, %ecx
+ pmovmskb %xmm0, %eax
+ shl $16, %ecx
+ or %ecx, %eax # ones where the buffers match
+ or %edx, %eax # including where the buffer is not
+ not %eax # ones where there is a mismatch
+#ifndef BCMP
+ bsf %eax, %edx # location of the first mismatch
+ cmovz %eax, %edx # including if there is no mismatch
+ movzbl (%rdi, %rdx, 1), %eax # mismatching bytes
+ movzbl (%rsi, %rdx, 1), %edx
+ sub %edx, %eax
+#endif
+ ret
+
+ /* empty input */
+.L0: xor %eax, %eax
+ ret
+
+ /* compare 33+ bytes */
+ ALIGN_TEXT
+.Llong: movdqu (%rdi), %xmm0 # load head
+ movdqu (%rsi), %xmm2
+ mov %rdi, %rcx
+ sub %rdi, %rsi # express rsi as distance from rdi
+ and $~0xf, %rdi # align rdi to 16 bytes
+ movdqu 16(%rsi, %rdi, 1), %xmm1
+ pcmpeqb 16(%rdi), %xmm1 # compare second half of this iteration
+ add %rcx, %rdx # pointer to last byte in buffer
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %eax
+ xor $0xffff, %eax # any mismatch?
+ jne .Lmismatch_head
+ add $64, %rdi # advance to next iteration
+ jmp 1f # and get going with the loop
+
+ /* process buffer 32 bytes at a time */
+ ALIGN_TEXT
+0: movdqu -32(%rsi, %rdi, 1), %xmm0
+ movdqu -16(%rsi, %rdi, 1), %xmm1
+ pcmpeqb -32(%rdi), %xmm0
+ pcmpeqb -16(%rdi), %xmm1
+ add $32, %rdi # advance to next iteration
+1: pand %xmm0, %xmm1 # 0xff where both halves matched
+ pmovmskb %xmm1, %eax
+ cmp $0xffff, %eax # all bytes matched?
+ jne .Lmismatch
+ cmp %rdx, %rdi # end of buffer reached?
+ jb 0b
+
+ /* less than 32 bytes left to compare */
+ movdqu -16(%rdx), %xmm1 # load 32 byte tail through end pointer
+ movdqu -16(%rdx, %rsi, 1), %xmm3
+ movdqu -32(%rdx), %xmm0
+ movdqu -32(%rdx, %rsi, 1), %xmm2
+ pcmpeqb %xmm3, %xmm1
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm1, %ecx
+ pmovmskb %xmm0, %eax
+ shl $16, %ecx
+ or %ecx, %eax # ones where the buffers match
+ not %eax # ones where there is a mismatch
+#ifndef BCMP
+ bsf %eax, %ecx # location of the first mismatch
+ cmovz %eax, %ecx # including if there is no mismatch
+ add %rcx, %rdx # pointer to potential mismatch
+ movzbl -32(%rdx), %eax # mismatching bytes
+ movzbl -32(%rdx, %rsi, 1), %edx
+ sub %edx, %eax
+#endif
+ ret
+
+#ifdef BCMP
+.Lmismatch:
+ mov $1, %eax
+.Lmismatch_head:
+ ret
+#else /* memcmp */
+.Lmismatch_head:
+ tzcnt %eax, %eax # location of mismatch
+ add %rax, %rcx # pointer to mismatch
+ movzbl (%rcx), %eax # mismatching bytes
+ movzbl (%rcx, %rsi, 1), %ecx
+ sub %ecx, %eax
+ ret
+
+.Lmismatch:
+ movdqu -48(%rsi, %rdi, 1), %xmm1
+ pcmpeqb -48(%rdi), %xmm1 # reconstruct xmm1 before PAND
+ pmovmskb %xmm0, %eax # mismatches in first 16 bytes
+ pmovmskb %xmm1, %edx # mismatches in second 16 bytes
+ shl $16, %edx
+ or %edx, %eax # mismatches in both
+ not %eax # matches in both
+ tzcnt %eax, %eax # location of mismatch
+ add %rax, %rdi # pointer to mismatch
+ movzbl -64(%rdi), %eax # mismatching bytes
+ movzbl -64(%rdi, %rsi, 1), %ecx
+ sub %ecx, %eax
+ ret
+#endif
+ARCHEND(memcmp, baseline)
.section .note.GNU-stack,"",%progbits

File Metadata

Mime Type
text/plain
Expires
Sat, Sep 28, 4:09 AM (20 h, 51 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
12976327
Default Alt Text
D41442.diff (6 KB)

Event Timeline