Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F96911470
D41442.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
6 KB
Referenced Files
None
Subscribers
None
D41442.diff
View Options
diff --git a/lib/libc/amd64/string/memcmp.S b/lib/libc/amd64/string/memcmp.S
--- a/lib/libc/amd64/string/memcmp.S
+++ b/lib/libc/amd64/string/memcmp.S
@@ -1,9 +1,12 @@
/*-
- * Copyright (c) 2018 The FreeBSD Foundation
+ * Copyright (c) 2018, 2023 The FreeBSD Foundation
*
* This software was developed by Mateusz Guzik <mjg@FreeBSD.org>
* under sponsorship from the FreeBSD Foundation.
*
+ * Portions of this software were developed by Robert Clausecker
+ * <fuz@FreeBSD.org> under sponsorship from the FreeBSD Foundation.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -27,6 +30,10 @@
*/
#include <machine/asm.h>
+#include <machine/param.h>
+
+#include "amd64_archlevel.h"
+
/*
* Note: this routine was written with kernel use in mind (read: no simd),
* it is only present in userspace as a temporary measure until something
@@ -36,10 +43,15 @@
#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
#ifdef BCMP
-ENTRY(bcmp)
-#else
-ENTRY(memcmp)
+#define memcmp bcmp
#endif
+
+ARCHFUNCS(memcmp)
+ ARCHFUNC(memcmp, scalar)
+ ARCHFUNC(memcmp, baseline)
+ENDARCHFUNCS(memcmp)
+
+ARCHENTRY(memcmp, scalar)
xorl %eax,%eax
10:
cmpq $16,%rdx
@@ -157,7 +169,6 @@
1:
leal 1(%eax),%eax
ret
-END(bcmp)
#else
/*
* We need to compute the difference between strings.
@@ -230,7 +241,165 @@
2:
subl %r8d,%eax
ret
-END(memcmp)
#endif
+ARCHEND(memcmp, scalar)
+
+ARCHENTRY(memcmp, baseline)
+ cmp $32, %rdx # enough to permit use of the long kernel?
+ ja .Llong
+
+ test %rdx, %rdx # zero bytes buffer?
+ je .L0
+
+ /*
+ * Compare strings of 1--32 bytes. We want to do this by
+ * loading into two xmm registers and then comparing. To avoid
+ * crossing into unmapped pages, we either load 32 bytes from
+ * the start of the buffer or 32 bytes before its end, depending
+ * on whether there is a page boundary between the overread area
+ * or not.
+ */
+
+ /* check for page boundaries overreads */
+ lea 31(%rdi), %eax # end of overread
+ lea 31(%rsi), %r8d
+ lea -1(%rdi, %rdx, 1), %ecx # last character in buffer
+ lea -1(%rsi, %rdx, 1), %r9d
+ xor %ecx, %eax
+ xor %r9d, %r8d
+ test $PAGE_SIZE, %eax # are they on different pages?
+ jz 0f
+
+ /* fix up rdi */
+ movdqu -32(%rdi, %rdx, 1), %xmm0
+ movdqu -16(%rdi, %rdx, 1), %xmm1
+ lea -8(%rsp), %rdi # end of replacement buffer
+ sub %rdx, %rdi # start of replacement buffer
+ movdqa %xmm0, -40(%rsp) # copy to replacement buffer
+ movdqa %xmm1, -24(%rsp)
+
+0: test $PAGE_SIZE, %r8d
+ jz 0f
+
+ /* fix up rsi */
+ movdqu -32(%rsi, %rdx, 1), %xmm0
+ movdqu -16(%rsi, %rdx, 1), %xmm1
+ lea -40(%rsp), %rsi # end of replacement buffer
+ sub %rdx, %rsi # start of replacement buffer
+ movdqa %xmm0, -72(%rsp) # copy to replacement buffer
+ movdqa %xmm1, -56(%rsp)
+
+ /* load data and compare properly */
+0: movdqu 16(%rdi), %xmm1
+ movdqu 16(%rsi), %xmm3
+ movdqu (%rdi), %xmm0
+ movdqu (%rsi), %xmm2
+ mov %edx, %ecx
+ mov $-1, %edx
+ shl %cl, %rdx # ones where the buffer is not
+ pcmpeqb %xmm3, %xmm1
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm1, %ecx
+ pmovmskb %xmm0, %eax
+ shl $16, %ecx
+ or %ecx, %eax # ones where the buffers match
+ or %edx, %eax # including where the buffer is not
+ not %eax # ones where there is a mismatch
+#ifndef BCMP
+ bsf %eax, %edx # location of the first mismatch
+ cmovz %eax, %edx # including if there is no mismatch
+ movzbl (%rdi, %rdx, 1), %eax # mismatching bytes
+ movzbl (%rsi, %rdx, 1), %edx
+ sub %edx, %eax
+#endif
+ ret
+
+ /* empty input */
+.L0: xor %eax, %eax
+ ret
+
+ /* compare 33+ bytes */
+ ALIGN_TEXT
+.Llong: movdqu (%rdi), %xmm0 # load head
+ movdqu (%rsi), %xmm2
+ mov %rdi, %rcx
+ sub %rdi, %rsi # express rsi as distance from rdi
+ and $~0xf, %rdi # align rdi to 16 bytes
+ movdqu 16(%rsi, %rdi, 1), %xmm1
+ pcmpeqb 16(%rdi), %xmm1 # compare second half of this iteration
+ add %rcx, %rdx # pointer to last byte in buffer
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm0, %eax
+ xor $0xffff, %eax # any mismatch?
+ jne .Lmismatch_head
+ add $64, %rdi # advance to next iteration
+ jmp 1f # and get going with the loop
+
+ /* process buffer 32 bytes at a time */
+ ALIGN_TEXT
+0: movdqu -32(%rsi, %rdi, 1), %xmm0
+ movdqu -16(%rsi, %rdi, 1), %xmm1
+ pcmpeqb -32(%rdi), %xmm0
+ pcmpeqb -16(%rdi), %xmm1
+ add $32, %rdi # advance to next iteration
+1: pand %xmm0, %xmm1 # 0xff where both halves matched
+ pmovmskb %xmm1, %eax
+ cmp $0xffff, %eax # all bytes matched?
+ jne .Lmismatch
+ cmp %rdx, %rdi # end of buffer reached?
+ jb 0b
+
+ /* less than 32 bytes left to compare */
+ movdqu -16(%rdx), %xmm1 # load 32 byte tail through end pointer
+ movdqu -16(%rdx, %rsi, 1), %xmm3
+ movdqu -32(%rdx), %xmm0
+ movdqu -32(%rdx, %rsi, 1), %xmm2
+ pcmpeqb %xmm3, %xmm1
+ pcmpeqb %xmm2, %xmm0
+ pmovmskb %xmm1, %ecx
+ pmovmskb %xmm0, %eax
+ shl $16, %ecx
+ or %ecx, %eax # ones where the buffers match
+ not %eax # ones where there is a mismatch
+#ifndef BCMP
+ bsf %eax, %ecx # location of the first mismatch
+ cmovz %eax, %ecx # including if there is no mismatch
+ add %rcx, %rdx # pointer to potential mismatch
+ movzbl -32(%rdx), %eax # mismatching bytes
+ movzbl -32(%rdx, %rsi, 1), %edx
+ sub %edx, %eax
+#endif
+ ret
+
+#ifdef BCMP
+.Lmismatch:
+ mov $1, %eax
+.Lmismatch_head:
+ ret
+#else /* memcmp */
+.Lmismatch_head:
+ tzcnt %eax, %eax # location of mismatch
+ add %rax, %rcx # pointer to mismatch
+ movzbl (%rcx), %eax # mismatching bytes
+ movzbl (%rcx, %rsi, 1), %ecx
+ sub %ecx, %eax
+ ret
+
+.Lmismatch:
+ movdqu -48(%rsi, %rdi, 1), %xmm1
+ pcmpeqb -48(%rdi), %xmm1 # reconstruct xmm1 before PAND
+ pmovmskb %xmm0, %eax # mismatches in first 16 bytes
+ pmovmskb %xmm1, %edx # mismatches in second 16 bytes
+ shl $16, %edx
+ or %edx, %eax # mismatches in both
+ not %eax # matches in both
+ tzcnt %eax, %eax # location of mismatch
+ add %rax, %rdi # pointer to mismatch
+ movzbl -64(%rdi), %eax # mismatching bytes
+ movzbl -64(%rdi, %rsi, 1), %ecx
+ sub %ecx, %eax
+ ret
+#endif
+ARCHEND(memcmp, baseline)
.section .note.GNU-stack,"",%progbits
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Sep 28, 4:09 AM (20 h, 51 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
12976327
Default Alt Text
D41442.diff (6 KB)
Attached To
Mode
D41442: lib/libc/amd64/string: add baseline implementation of memcmp, bcmp
Attached
Detach File
Event Timeline
Log In to Comment