Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F102171796
D41557.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
9 KB
Referenced Files
None
Subscribers
None
D41557.diff
View Options
diff --git a/lib/libc/amd64/string/Makefile.inc b/lib/libc/amd64/string/Makefile.inc
--- a/lib/libc/amd64/string/Makefile.inc
+++ b/lib/libc/amd64/string/Makefile.inc
@@ -10,5 +10,6 @@
strcat.S \
strchrnul.S \
strcmp.S \
+ strcspn.S \
strlen.S \
strcpy.c
diff --git a/lib/libc/amd64/string/strcspn.S b/lib/libc/amd64/string/strcspn.S
new file mode 100644
--- /dev/null
+++ b/lib/libc/amd64/string/strcspn.S
@@ -0,0 +1,368 @@
+/*
+ * Copyright (c) 2023 The FreeBSD Foundation
+ *
+ * This software was developed by Robert Clausecker <fuz@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE
+ */
+
+#include <machine/asm.h>
+#include <machine/param.h>
+
+#include "amd64_archlevel.h"
+
+#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
+
+ARCHFUNCS(strcspn)
+ ARCHFUNC(strcspn, scalar)
+ NOARCHFUNC
+ ARCHFUNC(strcspn, x86_64_v2)
+ENDARCHFUNCS(strcspn)
+
+ARCHENTRY(strcspn, scalar)
+ push %rbp # align stack to enable function call
+ mov %rsp, %rbp
+ sub $256, %rsp # allocate space for lookup table
+
+ /* check for special cases */
+ movzbl (%rsi), %eax # first character in the set
+ test %eax, %eax
+ jz .Lstrlen
+
+ movzbl 1(%rsi), %edx # second character in the set
+ test %edx, %edx
+ jz .Lstrchr
+
+ /* no special case matches -- prepare lookup table */
+ xor %r8d, %r8d
+ mov $28, %ecx
+0: mov %r8, (%rsp, %rcx, 8)
+ mov %r8, 8(%rsp, %rcx, 8)
+ mov %r8, 16(%rsp, %rcx, 8)
+ mov %r8, 24(%rsp, %rcx, 8)
+ sub $4, %ecx
+ jnc 0b
+
+ add $2, %rsi
+ movb $1, (%rsp, %rax, 1) # register first chars in set
+ movb $1, (%rsp, %rdx, 1)
+ mov %rdi, %rax # a copy of the source to iterate over
+
+ /* process remaining chars in set */
+ ALIGN_TEXT
+0: movzbl (%rsi), %ecx
+ movb $1, (%rsp, %rcx, 1)
+ test %ecx, %ecx
+ jz 1f
+
+ movzbl 1(%rsi), %ecx
+ movb $1, (%rsp, %rcx, 1)
+ test %ecx, %ecx
+ jz 1f
+
+ add $2, %rsi
+ jmp 0b
+
+ /* find match */
+ ALIGN_TEXT
+1: movzbl (%rax), %ecx
+ cmpb $0, (%rsp, %rcx, 1)
+ jne 2f
+
+ movzbl 1(%rax), %ecx
+ cmpb $0, (%rsp, %rcx, 1)
+ jne 3f
+
+ movzbl 2(%rax), %ecx
+ cmpb $0, (%rsp, %rcx, 1)
+ jne 4f
+
+ movzbl 3(%rax), %ecx
+ add $4, %rax
+ cmpb $0, (%rsp, %rcx, 1)
+ je 1b
+
+ sub $3, %rax
+4: dec %rdi
+3: inc %rax
+2: sub %rdi, %rax # number of characters preceding match
+ leave
+ ret
+
+ /* set is empty, degrades to strlen */
+.Lstrlen:
+ leave
+ jmp CNAME(strlen)
+
+ /* just one character in set, degrades to strchr */
+.Lstrchr:
+ mov %rdi, (%rsp) # stash a copy of the string
+ mov %eax, %esi # find the character in the set
+ call CNAME(strchrnul)
+ sub (%rsp), %rax # length of prefix before match
+ leave
+ ret
+ARCHEND(strcspn, scalar)
+
+ /*
+ * This kernel uses pcmpistri to do the heavy lifting.
+ * We provide five code paths, depending on set size:
+ *
+ * 0: call strlen()
+ * 1: call strchr()
+ * 2--16: one pcmpistri per 16 bytes of input
+ * 17--32: two pcmpistri per 16 bytes of input
+ * >=33: fall back to look up table
+ */
+ARCHENTRY(strcspn, x86_64_v2)
+ push %rbp
+ mov %rsp, %rbp
+ sub $256, %rsp
+
+ /* check for special cases */
+ movzbl (%rsi), %eax
+ test %eax, %eax # empty string?
+ jz .Lstrlenv2
+
+ cmpb $0, 1(%rsi) # single character string?
+ jz .Lstrchrv2
+
+ /* find set size and copy up to 32 bytes to (%rsp) */
+ mov %esi, %ecx
+ and $~0xf, %rsi # align set pointer
+ movdqa (%rsi), %xmm0
+ pxor %xmm1, %xmm1
+ and $0xf, %ecx # amount of bytes rsi is past alignment
+ xor %edx, %edx
+ pcmpeqb %xmm0, %xmm1 # end of string reached?
+ movdqa %xmm0, 32(%rsp) # transfer head of set to stack
+ pmovmskb %xmm1, %eax
+ shr %cl, %eax # clear out junk before string
+ test %eax, %eax # end of set reached?
+ jnz 0f
+
+ movdqa 16(%rsi), %xmm0 # second chunk of the set
+ mov $16, %edx
+ sub %ecx, %edx # length of set preceding xmm0
+ pxor %xmm1, %xmm1
+ pcmpeqb %xmm0, %xmm1
+ movdqa %xmm0, 48(%rsp)
+ movdqu 32(%rsp, %rcx, 1), %xmm2 # head of set
+ pmovmskb %xmm1, %eax
+ test %eax, %eax
+ jnz 1f
+
+ movdqa 32(%rsi), %xmm0 # third chunk
+ add $16, %edx
+ pxor %xmm1, %xmm1
+ pcmpeqb %xmm0, %xmm1
+ movdqa %xmm0, 64(%rsp)
+ pmovmskb %xmm1, %eax
+ test %eax, %eax # still not done?
+ jz .Lgt32v2
+
+0: movdqu 32(%rsp, %rcx, 1), %xmm2 # head of set
+1: tzcnt %eax, %eax
+ add %eax, %edx # length of set (excluding NUL byte)
+ cmp $32, %edx # above 32 bytes?
+ ja .Lgt32v2
+
+ /*
+ * At this point we know that we want to use pcmpistri.
+ * one last problem obtains: the head of the string is not
+ * aligned and may cross a cacheline. If this is the case,
+ * we take the part before the page boundary and repeat the
+ * last byte to fill up the xmm register.
+ */
+ mov %rdi, %rax # save original string pointer
+ lea 15(%rdi), %esi # last byte of the head
+ xor %edi, %esi
+ test $PAGE_SIZE, %esi # does the head cross a page?
+ jz 0f
+
+ /* head crosses page: copy to stack to fix up */
+ and $~0xf, %rax # align head pointer temporarily
+ movzbl 15(%rax), %esi # last head byte on the page
+ movdqa (%rax), %xmm0
+ movabs $0x0101010101010101, %r8
+ imul %r8, %rsi # repeated 8 times
+ movdqa %xmm0, (%rsp) # head word on stack
+ mov %rsi, 16(%rsp) # followed by filler (last byte x8)
+ mov %rsi, 24(%rsp)
+ mov %edi, %eax
+ and $0xf, %eax # offset of head from alignment
+ add %rsp, %rax # pointer to fake head
+
+0: movdqu (%rax), %xmm0 # load head (fake or real)
+ lea 16(%rdi), %rax
+ and $~0xf, %rax # second 16 bytes of string (aligned)
+1: cmp $16, %edx # 16--32 bytes?
+ ja .Lgt16v2
+
+
+ /* set is 2--16 bytes in size */
+
+ /* _SIDD_UBYTE_OPS|_SIDD_CMP_EQUAL_ANY|_SIDD_LEAST_SIGNIFICANT */
+ pcmpistri $0, %xmm0, %xmm2 # match in head?
+ jbe .Lheadmatchv2
+
+ ALIGN_TEXT
+0: pcmpistri $0, (%rax), %xmm2
+ jbe 1f # match or end of string?
+ pcmpistri $0, 16(%rax), %xmm2
+ lea 32(%rax), %rax
+ ja 0b # match or end of string?
+
+3: lea -16(%rax), %rax # go back to second half
+1: jc 2f # jump if match found
+ movdqa (%rax), %xmm0 # reload string piece
+ pxor %xmm1, %xmm1
+ pcmpeqb %xmm1, %xmm0 # where is the NUL byte?
+ pmovmskb %xmm0, %ecx
+ tzcnt %ecx, %ecx # location of NUL byte in (%rax)
+2: sub %rdi, %rax # offset of %xmm0 from beginning of string
+ add %rcx, %rax # prefix length before match/NUL
+ leave
+ ret
+
+.Lheadmatchv2:
+ jc 2f # jump if match found
+ pxor %xmm1, %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %ecx
+ tzcnt %ecx, %ecx # location of NUL byte
+2: mov %ecx, %eax # prefix length before match/NUL
+ leave
+ ret
+
+.Lgt16v2:
+ movdqu 48(%rsp, %rcx, 1), %xmm3 # second part of set
+
+ /* set is 17--32 bytes in size */
+ pcmpistri $0, %xmm0, %xmm2 # match in head?
+ jbe .Lheadmatchv2
+ pcmpistri $0, %xmm0, %xmm3 # ZF=1 not possible here
+ jb .Lheadmatchv2
+
+ ALIGN_TEXT
+0: movdqa (%rax), %xmm0
+ pcmpistri $0, %xmm0, %xmm2
+ jbe 1b
+ pcmpistri $0, %xmm0, %xmm3
+ jb 1f # ZF=1 not possible here
+ movdqa 16(%rax), %xmm0
+ add $32, %rax
+ pcmpistri $0, %xmm0, %xmm2
+ jbe 3b
+ pcmpistri $0, %xmm0, %xmm3
+ jae 0b # ZF=1 not possible here
+
+ sub $16, %rax # go back to second half
+1: add %rcx, %rax
+ sub %rdi, %rax
+ leave
+ ret
+
+ /* set is empty, degrades to strlen */
+.Lstrlenv2:
+ leave
+ jmp CNAME(strlen)
+
+ /* just one character in set, degrades to strchr */
+.Lstrchrv2:
+ mov %rdi, (%rsp) # stash a copy of the string
+ mov %eax, %esi # find this character
+ call CNAME(strchrnul)
+ sub (%rsp), %rax # length of prefix before match
+ leave
+ ret
+
+ /* set is >=33 bytes in size */
+.Lgt32v2:
+ xorps %xmm0, %xmm0
+ mov $256-64, %edx
+
+ /* clear out look up table */
+0: movaps %xmm0, (%rsp, %rdx, 1)
+ movaps %xmm0, 16(%rsp, %rdx, 1)
+ movaps %xmm0, 32(%rsp, %rdx, 1)
+ movaps %xmm0, 48(%rsp, %rdx, 1)
+ sub $64, %edx
+ jnc 0b
+
+ add %rcx, %rsi # restore string pointer
+ mov %rdi, %rax # keep a copy of the string
+
+ /* initialise look up table */
+ ALIGN_TEXT
+0: movzbl (%rsi), %ecx
+ movb $1, (%rsp, %rcx, 1)
+ test %ecx, %ecx
+ jz 1f
+
+ movzbl 1(%rsi), %ecx
+ movb $1, (%rsp, %rcx, 1)
+ test %ecx, %ecx
+ jz 1f
+
+ movzbl 2(%rsi), %ecx
+ movb $1, (%rsp, %rcx, 1)
+ test %ecx, %ecx
+ jz 1f
+
+ movzbl 3(%rsi), %ecx
+ movb $1, (%rsp, %rcx, 1)
+ test %ecx, %ecx
+ jz 1f
+
+ add $4, %rsi
+ jmp 0b
+
+ /* find match */
+ ALIGN_TEXT
+1: movzbl (%rax), %ecx
+ cmpb $0, (%rsp, %rcx, 1)
+ jne 2f
+
+ movzbl 1(%rax), %ecx
+ cmpb $0, (%rsp, %rcx, 1)
+ jne 3f
+
+ movzbl 2(%rax), %ecx
+ cmpb $0, (%rsp, %rcx, 1)
+ jne 4f
+
+ movzbl 3(%rax), %ecx
+ add $4, %rax
+ cmpb $0, (%rsp, %rcx, 1)
+ je 1b
+
+ sub $3, %rax
+4: dec %rdi
+3: inc %rax
+2: sub %rdi, %rax # number of characters preceding match
+ leave
+ ret
+ARCHEND(strcspn, x86_64_v2)
+
+ .section .note.GNU-stack,"",%progbits
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Nov 9, 11:55 AM (20 h, 48 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14559649
Default Alt Text
D41557.diff (9 KB)
Attached To
Mode
D41557: lib/libc/amd64/string: add strcspn(3) scalar, x86-64-v2 implementation, unit tests
Attached
Detach File
Event Timeline
Log In to Comment