Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F102144601
D41333.id125633.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
5 KB
Referenced Files
None
Subscribers
None
D41333.id125633.diff
View Options
diff --git a/lib/libc/amd64/string/Makefile.inc b/lib/libc/amd64/string/Makefile.inc
--- a/lib/libc/amd64/string/Makefile.inc
+++ b/lib/libc/amd64/string/Makefile.inc
@@ -8,6 +8,7 @@
memmove.S \
memset.S \
strcat.S \
+ strchrnul.S \
strcmp.S \
strlen.S \
stpcpy.S
diff --git a/lib/libc/amd64/string/strchrnul.S b/lib/libc/amd64/string/strchrnul.S
new file mode 100644
--- /dev/null
+++ b/lib/libc/amd64/string/strchrnul.S
@@ -0,0 +1,167 @@
+/*-
+ * Copyright (c) 2023 The FreeBSD Foundation
+ *
+ * This software was developed by Robert Clausecker <fuz@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE
+ */
+
+#include <machine/asm.h>
+
+#include "amd64_archlevel.h"
+
+#define ALIGN_TEXT .p2align 4,0x90 # 16-byte alignment, nop-filled
+
+ .weak strchrnul
+ .set strchrnul, __strchrnul
+
+ARCHFUNCS(__strchrnul)
+ ARCHFUNC(__strchrnul, scalar)
+ ARCHFUNC(__strchrnul, baseline)
+ENDARCHFUNCS(__strchrnul)
+
+/*
+ * strchrnul(str, c)
+ * This is implemented like strlen(str), but we check for the
+ * presence of both NUL and c in each iteration.
+ */
+ARCHENTRY(__strchrnul, scalar)
+ mov %edi, %ecx
+ and $~7, %rdi # align to 8 byte
+ movzbl %sil, %esi # clear stray high bits
+ movabs $0x0101010101010101, %r8
+ mov (%rdi), %rax # load first word
+ imul %r8, %rsi # replicate char 8 times
+ movabs $0x8080808080808080, %r9
+
+ /*
+ * Unaligned input: align to 8 bytes. Then proceed the same
+ * way as with aligned input, but ignore matches before the
+ * beginning of the string. This is achieved by shifting r9
+ * into r10 to have 0x00 bytes before the string begins.
+ */
+ shl $3, %ecx
+ mov %r9, %r10
+ add $8, %rdi
+ shl %cl, %r10 # 0x80 where the string is
+ neg %r8 # negate 01..01 so we can use lea
+
+ mov %rsi, %rcx
+ xor %rax, %rcx # str ^ c
+ lea (%rax, %r8, 1), %rdx # str - 0x01..01
+ lea (%rcx, %r8, 1), %r11 # (str ^ c) - 0x01..01
+ not %rax # ~str
+ not %rcx # ~(str ^ c)
+ and %rdx, %rax # (str - 0x01..01) & ~str
+ and %r11, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c)
+ or %rcx, %rax # matches for both
+ and %r10, %rax # not including junk bytes or bytes before the string
+ jnz 1f
+
+ /* main loop unrolled twice */
+ ALIGN_TEXT
+0: mov (%rdi), %rax # str
+ mov %rsi, %rcx
+ xor %rax, %rcx # str ^ c
+ lea (%rax, %r8, 1), %rdx # str - 0x01..01
+ lea (%rcx, %r8, 1), %r11 # (str ^ c) - 0x01..01
+ not %rax # ~str
+ not %rcx # ~(str ^ c)
+ and %rdx, %rax # (str - 0x01..01) & ~str
+ and %r11, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c)
+ or %rcx, %rax # matches for both
+ and %r9, %rax # not including junk bits
+ jnz 2f
+
+ mov 8(%rdi), %rax # str
+ add $16, %rdi
+ mov %rsi, %rcx
+ xor %rax, %rcx # str ^ c
+ lea (%rax, %r8, 1), %rdx # str - 0x01..01
+ lea (%rcx, %r8, 1), %r11 # (str ^ c) - 0x01..01
+ not %rax # ~str
+ not %rcx # ~(str ^ c)
+ and %rdx, %rax # (str - 0x01..01) & ~str
+ and %r11, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c)
+ or %rcx, %rax # matches for both
+ and %r9, %rax # not including junk bits
+ jz 0b
+
+ /* NUL or c found */
+1: sub $8, %rdi # undo advance past buffer
+2: tzcnt %rax, %rax # first NUL or c byte match
+ shr $3, %eax # scale from bit to byte index
+ add %rdi, %rax # pointer to found c or NUL
+ ret
+ARCHEND(__strchrnul, scalar)
+
+ARCHENTRY(__strchrnul, baseline)
+ mov %edi, %ecx
+ and $~0xf, %rdi # align to 16 byte
+ movdqa (%rdi), %xmm1
+ movd %esi, %xmm0
+ and $0xf, %ecx # distance from (%rdi) to start of string
+ pxor %xmm2, %xmm2
+ mov $-1, %edx
+ punpcklbw %xmm0, %xmm0 # c -> cc
+ shl %cl, %edx # bits corresponding to bytes in the string
+ punpcklwd %xmm0, %xmm0 # cc -> cccc
+ add $16, %rdi
+
+ /* check for match in head */
+ pcmpeqb %xmm1, %xmm2 # NUL bytes present?
+ pshufd $0, %xmm0, %xmm0 # cccc -> cccccccccccccccc
+ pcmpeqb %xmm0, %xmm1 # c present?
+ por %xmm2, %xmm1 # either present?
+ pmovmskb %xmm1, %eax
+ and %edx, %eax # match in the string?
+ jnz 1f
+
+ /* main loop unrolled twice */
+ ALIGN_TEXT
+0: movdqa (%rdi), %xmm1
+ pxor %xmm2, %xmm2
+ pcmpeqb %xmm1, %xmm2 # NUL bytes present?
+ pcmpeqb %xmm0, %xmm1 # c present?
+ por %xmm2, %xmm1 # either present?
+ pmovmskb %xmm1, %eax
+ test %eax, %eax # match in the string?
+ jnz 2f
+
+ movdqa 16(%rdi), %xmm1
+ add $32, %rdi
+ pxor %xmm2, %xmm2
+ pcmpeqb %xmm1, %xmm2 # NUL bytes present?
+ pcmpeqb %xmm0, %xmm1 # c present?
+ por %xmm2, %xmm1 # either present?
+ pmovmskb %xmm1, %eax
+ test %eax, %eax # match in the string?
+ jz 0b
+
+1: sub $16, %rdi # undo advance past buffer
+2: tzcnt %eax, %eax # where is the match?
+ add %rdi, %rax # pointer to found c or NUL
+ ret
+ARCHEND(__strchrnul, baseline)
+
+ .section .note.GNU-stack,"",%progbits
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Nov 9, 4:24 AM (10 h, 37 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14549936
Default Alt Text
D41333.id125633.diff (5 KB)
Attached To
Mode
D41333: SIMD-enhanced strchrnul(3)
Attached
Detach File
Event Timeline
Log In to Comment