Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F109462940
D48500.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
237 KB
Referenced Files
None
Subscribers
None
D48500.diff
View Options
diff --git a/contrib/cortex-strings/.gitignore b/contrib/cortex-strings/.gitignore
deleted file mode 100644
--- a/contrib/cortex-strings/.gitignore
+++ /dev/null
@@ -1,11 +0,0 @@
-*.a
-*.o
-*.la
-*.lo
-*.png
-*.pyc
-.deps
-.dirstamp
-.libs
-try-*
-cache.txt
diff --git a/contrib/cortex-strings/Makefile.am b/contrib/cortex-strings/Makefile.am
deleted file mode 100644
--- a/contrib/cortex-strings/Makefile.am
+++ /dev/null
@@ -1,327 +0,0 @@
-# Copyright (c) 2011, Linaro Limited
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# * Neither the name of the Linaro nor the
-# names of its contributors may be used to endorse or promote products
-# derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
-# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-
-# Top level Makefile for cortex-strings
-
-# Used to record the compiler version in the executables
-COMPILER = $(shell $(CC) --version 2>&1 | head -n1)
-
-# The main library
-lib_LTLIBRARIES = \
- libcortex-strings.la
-
-## Test suite
-check_PROGRAMS = \
- tests/test-memchr \
- tests/test-memcmp \
- tests/test-memcpy \
- tests/test-memmove \
- tests/test-memset \
- tests/test-strchr \
- tests/test-strcmp \
- tests/test-strcpy \
- tests/test-strlen \
- tests/test-strncmp \
- tests/test-strnlen
-
-# Options for the tests
-tests_cflags = -I$(srcdir)/tests $(AM_CFLAGS)
-tests_ldadd = libcortex-strings.la
-tests_test_memchr_LDADD = $(tests_ldadd)
-tests_test_memchr_CFLAGS = $(tests_cflags)
-tests_test_memcmp_LDADD = $(tests_ldadd)
-tests_test_memcmp_CFLAGS = $(tests_cflags)
-tests_test_memcpy_LDADD = $(tests_ldadd)
-tests_test_memcpy_CFLAGS = $(tests_cflags)
-tests_test_memmove_LDADD = $(tests_ldadd)
-tests_test_memmove_CFLAGS = $(tests_cflags)
-tests_test_memset_LDADD = $(tests_ldadd)
-tests_test_memset_CFLAGS = $(tests_cflags)
-tests_test_strchr_LDADD = $(tests_ldadd)
-tests_test_strchr_CFLAGS = $(tests_cflags)
-tests_test_strcmp_LDADD = $(tests_ldadd)
-tests_test_strcmp_CFLAGS = $(tests_cflags)
-tests_test_strcpy_LDADD = $(tests_ldadd)
-tests_test_strcpy_CFLAGS = $(tests_cflags)
-tests_test_strlen_LDADD = $(tests_ldadd)
-tests_test_strlen_CFLAGS = $(tests_cflags)
-tests_test_strncmp_LDADD = $(tests_ldadd)
-tests_test_strncmp_CFLAGS = $(tests_cflags)
-
-TESTS = $(check_PROGRAMS)
-
-## Benchmarks
-noinst_PROGRAMS = \
- dhry \
- dhry-native \
- try-none \
- try-this \
- try-plain \
- try-newlib-c \
- try-bionic-c \
- try-glibc-c
-
-# Good 'ol Dhrystone
-dhry_SOURCES = \
- benchmarks/dhry/dhry_1.c \
- benchmarks/dhry/dhry_2.c \
- benchmarks/dhry/dhry.h
-
-dhry_CFLAGS = -Dcompiler="\"$(COMPILER)\"" -Doptions="\"$(CFLAGS)\""
-dhry_LDADD = libcortex-strings.la
-
-dhry_native_SOURCES = $(dhry_SOURCES)
-dhry_native_CFLAGS = $(dhry_CFLAGS)
-
-# Benchmark harness
-noinst_LIBRARIES = \
- libmulti.a \
- libbionic-c.a \
- libglibc-c.a \
- libnewlib-c.a \
- libplain.a
-
-libmulti_a_SOURCES = \
- benchmarks/multi/harness.c
-
-libmulti_a_CFLAGS = -DVERSION=\"$(VERSION)\" $(AM_CFLAGS)
-
-## Other architecture independant implementaions
-libbionic_c_a_SOURCES = \
- reference/bionic-c/bcopy.c \
- reference/bionic-c/memchr.c \
- reference/bionic-c/memcmp.c \
- reference/bionic-c/memcpy.c \
- reference/bionic-c/memset.c \
- reference/bionic-c/strchr.c \
- reference/bionic-c/strcmp.c \
- reference/bionic-c/strcpy.c \
- reference/bionic-c/strlen.c
-
-libglibc_c_a_SOURCES = \
- reference/glibc-c/memchr.c \
- reference/glibc-c/memcmp.c \
- reference/glibc-c/memcpy.c \
- reference/glibc-c/memset.c \
- reference/glibc-c/strchr.c \
- reference/glibc-c/strcmp.c \
- reference/glibc-c/strcpy.c \
- reference/glibc-c/strlen.c \
- reference/glibc-c/wordcopy.c \
- reference/glibc-c/memcopy.h \
- reference/glibc-c/pagecopy.h
-
-libnewlib_c_a_SOURCES = \
- reference/newlib-c/memchr.c \
- reference/newlib-c/memcmp.c \
- reference/newlib-c/memcpy.c \
- reference/newlib-c/memset.c \
- reference/newlib-c/strchr.c \
- reference/newlib-c/strcmp.c \
- reference/newlib-c/strcpy.c \
- reference/newlib-c/strlen.c \
- reference/newlib-c/shim.h
-
-libplain_a_SOURCES = \
- reference/plain/memset.c \
- reference/plain/memcpy.c \
- reference/plain/strcmp.c \
- reference/plain/strcpy.c
-
-try_none_SOURCES =
-try_none_LDADD = libmulti.a -lrt
-try_this_SOURCES =
-try_this_LDADD = libmulti.a libcortex-strings.la -lrt
-try_bionic_c_SOURCES =
-try_bionic_c_LDADD = libmulti.a libbionic-c.a -lrt
-try_glibc_c_SOURCES =
-try_glibc_c_LDADD = libmulti.a libglibc-c.a -lrt
-try_newlib_c_SOURCES =
-try_newlib_c_LDADD = libmulti.a libnewlib-c.a -lrt
-try_plain_SOURCES =
-try_plain_LDADD = libmulti.a libplain.a -lrt
-
-# Architecture specific
-
-if HOST_AARCH32
-
-if WITH_NEON
-# Pull in the NEON specific files
-neon_bionic_a9_sources = \
- reference/bionic-a9/memcpy.S \
- reference/bionic-a9/memset.S
-neon_bionic_a15_sources = \
- reference/bionic-a15/memcpy.S \
- reference/bionic-a15/memset.S
-fpu_flags = -mfpu=neon
-else
-if WITH_VFP
-fpu_flags = -mfpu=vfp
-else
-fpu_flags = -msoft-float
-endif
-endif
-
-# Benchmarks and example programs
-noinst_PROGRAMS += \
- try-bionic-a9 \
- try-bionic-a15 \
- try-csl \
- try-glibc \
- try-newlib \
- try-newlib-xscale
-
-# Libraries used in the benchmarks and examples
-noinst_LIBRARIES += \
- libbionic-a9.a \
- libbionic-a15.a \
- libcsl.a \
- libglibc.a \
- libnewlib.a \
- libnewlib-xscale.a
-
-# Main library
-libcortex_strings_la_SOURCES = \
- src/thumb-2/strcpy.c \
- src/arm/memchr.S \
- src/arm/strchr.S \
- src/thumb-2/strlen.S \
- src/arm/memset.S \
- src/arm/memcpy.S \
- src/arm/strcmp.S
-
-# Libraries containing the difference reference versions
-libbionic_a9_a_SOURCES = \
- $(neon_bionic_a9_sources) \
- reference/bionic-a9/memcmp.S \
- reference/bionic-a9/strcmp.S \
- reference/bionic-a9/strcpy.S \
- reference/bionic-a9/strlen.c
-
-libbionic_a9_a_CFLAGS = -Wa,-mimplicit-it=thumb
-
-libbionic_a15_a_SOURCES = \
- $(neon_bionic_a15_sources) \
- reference/bionic-a15/memcmp.S \
- reference/bionic-a15/strcmp.S \
- reference/bionic-a15/strcpy.S \
- reference/bionic-a15/strlen.c
-
-libbionic_a15_a_CFLAGS = -Wa,-mimplicit-it=thumb
-
-libcsl_a_SOURCES = \
- reference/csl/memcpy.c \
- reference/csl/memset.c \
- reference/csl/arm_asm.h
-
-libglibc_a_SOURCES = \
- reference/glibc/memcpy.S \
- reference/glibc/memset.S \
- reference/glibc/strchr.S \
- reference/glibc/strlen.S
-
-libnewlib_a_SOURCES = \
- reference/newlib/memcpy.S \
- reference/newlib/strcmp.S \
- reference/newlib/strcpy.c \
- reference/newlib/strlen.c \
- reference/newlib/arm_asm.h \
- reference/newlib/shim.h
-
-libnewlib_xscale_a_SOURCES = \
- reference/newlib-xscale/memchr.c \
- reference/newlib-xscale/memcpy.c \
- reference/newlib-xscale/memset.c \
- reference/newlib-xscale/strchr.c \
- reference/newlib-xscale/strcmp.c \
- reference/newlib-xscale/strcpy.c \
- reference/newlib-xscale/strlen.c \
- reference/newlib-xscale/xscale.h
-
-# Flags for the benchmark helpers
-try_bionic_a9_SOURCES =
-try_bionic_a9_LDADD = libmulti.a libbionic-a9.a -lrt
-try_bionic_a15_SOURCES =
-try_bionic_a15_LDADD = libmulti.a libbionic-a15.a -lrt
-try_csl_SOURCES =
-try_csl_LDADD = libmulti.a libcsl.a -lrt
-try_glibc_SOURCES =
-try_glibc_LDADD = libmulti.a libglibc.a -lrt
-try_newlib_SOURCES =
-try_newlib_LDADD = libmulti.a libnewlib.a -lrt
-try_newlib_xscale_SOURCES =
-try_newlib_xscale_LDADD = libmulti.a libnewlib-xscale.a -lrt
-
-AM_CPPFLAGS = $(fpu_flags)
-AM_LDFLAGS = $(fpu_flags)
-
-endif
-
-# aarch64 specific
-if HOST_AARCH64
-
-libcortex_strings_la_SOURCES = \
- src/aarch64/memchr.S \
- src/aarch64/memcmp.S \
- src/aarch64/memcpy.S \
- src/aarch64/memmove.S \
- src/aarch64/memset.S \
- src/aarch64/strchr.S \
- src/aarch64/strchrnul.S \
- src/aarch64/strcmp.S \
- src/aarch64/strcpy.S \
- src/aarch64/strlen.S \
- src/aarch64/strncmp.S \
- src/aarch64/strnlen.S
-
-endif
-
-libcortex_strings_la_LDFLAGS = -version-info 1:0:0
-
-AM_CFLAGS = \
- -std=gnu99 -Wall \
- -fno-builtin -fno-stack-protector -U_FORTIFY_SOURCE \
- $(AM_CPPFLAGS)
-
-if WITH_SUBMACHINE
-AM_CFLAGS += \
- -mtune=$(submachine)
-endif
-
-EXTRA_DIST = \
- tests/hp-timing.h \
- tests/test-string.h \
- tests/test-skeleton.c \
- scripts/add-license.sh \
- scripts/bench.py \
- scripts/fixup.py \
- scripts/libplot.py \
- scripts/plot-align.py \
- scripts/plot.py \
- scripts/plot-sizes.py \
- scripts/plot-top.py \
- scripts/trim.sh \
- autogen.sh
diff --git a/contrib/cortex-strings/README b/contrib/cortex-strings/README
deleted file mode 100644
--- a/contrib/cortex-strings/README
+++ /dev/null
@@ -1,111 +0,0 @@
-= Cortex-A String Routines =
-
-This package contains optimised string routines including memcpy(), memset(),
-strcpy(), strlen() for the ARM Cortex-A series of cores.
-
-Various implementations of these routines are provided, including generic
-implementations for ARMv7-A cores with/without Neon, Thumb2 implementations
-and generic implementations for cores supporting AArch64.
-
-== Getting started ==
-First configure and then install libcortex-strings.so. To make other
-applications use this library, either add -lcortex-strings to the link
-command or use LD_PRELOAD to load the library into existing applications.
-
-Our intent is to get these routines into the common C libraries such
-as GLIBC, Bionic, and Newlib. Your system may already include them!
-
-== Contents ==
- * src/ contains the routines themselves
- * tests/ contains the unit tests
- * reference/ contains reference copies of other ARM-focused
- implementations gathered from around the Internet
- * benchmarks/ contains various benchmarks, tools, and scripts used to
- check and report on the different implementations.
-
-The src directory contains different variants organised by the
-implementation they run on and optional features used. For example:
- * src/thumb-2 contains generic non-NEON routines for AArch32 (with Thumb-2).
- * src/arm contains tuned routines for Cortex-A class processors.
- * src/aarch64 contains generic routines for AArch64.
- * src/thumb contains generic routines for armv6-M (with Thumb).
-
-== Reference versions ==
-reference/ contains versions collected from various popular Open
-Source libraries. These have been modified for use in benchmarking.
-Please refer to the individual files for any licensing terms.
-
-The routines were collected from the following releases:
- * EGLIBC 2.13
- * Newlib 1.19.0
- * Bionic android-2.3.5_r1
-
-== Licensing ==
-All Linaro-authored routines are under the modified BSD license:
-
-Copyright (c) 2011, Linaro Limited
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the Linaro nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-All ARM-authored routines are under the modified BSD license:
-
-Copyright (c) 2014 ARM Ltd
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the Linaro nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-All third party routines are under a GPL compatible license.
-
-== Notes and Limitations ==
-Some of the implementations have been collected from other
-projects and have a variety of licenses and copyright holders.
-
-== Style ==
-Assembly code attempts to follow the GLIBC coding convetions. They
-are:
- * Copyright headers in C style comment blocks
- * Instructions indented with one tab
- * Operands indented with one tab
- * Text is wrapped at 70 characters
- * End of line comments are fine
diff --git a/contrib/cortex-strings/autogen.sh b/contrib/cortex-strings/autogen.sh
deleted file mode 100755
--- a/contrib/cortex-strings/autogen.sh
+++ /dev/null
@@ -1,69 +0,0 @@
-#!/bin/sh
-#
-# autogen.sh glue for hplip
-#
-# HPLIP used to have five or so different autotools trees. Upstream
-# has reduced it to two. Still, this script is capable of cleaning
-# just about any possible mess of autoconf files.
-#
-# BE CAREFUL with trees that are not completely automake-generated,
-# this script deletes all Makefile.in files it can find.
-#
-# Requires: automake 1.9, autoconf 2.57+
-# Conflicts: autoconf 2.13
-set -e
-
-# Refresh GNU autotools toolchain.
-echo Cleaning autotools files...
-find -type d -name autom4te.cache -print0 | xargs -0 rm -rf \;
-find -type f \( -name missing -o -name install-sh -o -name mkinstalldirs \
- -o -name depcomp -o -name ltmain.sh -o -name configure \
- -o -name config.sub -o -name config.guess \
- -o -name Makefile.in \) -print0 | xargs -0 rm -f
-
-echo Running autoreconf...
-autoreconf --force --install
-
-# For the Debian package build
-test -d debian && {
- # link these in Debian builds
- rm -f config.sub config.guess
- ln -s /usr/share/misc/config.sub .
- ln -s /usr/share/misc/config.guess .
-
- # refresh list of executable scripts, to avoid possible breakage if
- # upstream tarball does not include the file or if it is mispackaged
- # for whatever reason.
- [ "$1" = "updateexec" ] && {
- echo Generating list of executable files...
- rm -f debian/executable.files
- find -type f -perm +111 ! -name '.*' -fprint debian/executable.files
- }
-
- # Remove any files in upstream tarball that we don't have in the Debian
- # package (because diff cannot remove files)
- version=`dpkg-parsechangelog | awk '/Version:/ { print $2 }' | sed -e 's/-[^-]\+$//'`
- source=`dpkg-parsechangelog | awk '/Source:/ { print $2 }' | tr -d ' '`
- if test -r ../${source}_${version}.orig.tar.gz ; then
- echo Generating list of files that should be removed...
- rm -f debian/deletable.files
- touch debian/deletable.files
- [ -e debian/tmp ] && rm -rf debian/tmp
- mkdir debian/tmp
- ( cd debian/tmp ; tar -zxf ../../../${source}_${version}.orig.tar.gz )
- find debian/tmp/ -type f ! -name '.*' -print0 | xargs -0 -ri echo '{}' | \
- while read -r i ; do
- if test -e "${i}" ; then
- filename=$(echo "${i}" | sed -e 's#.*debian/tmp/[^/]\+/##')
- test -e "${filename}" || echo "${filename}" >>debian/deletable.files
- fi
- done
- rm -fr debian/tmp
- else
- echo Emptying list of files that should be deleted...
- rm -f debian/deletable.files
- touch debian/deletable.files
- fi
-}
-
-exit 0
diff --git a/contrib/cortex-strings/benchmarks/dhry/dhry.h b/contrib/cortex-strings/benchmarks/dhry/dhry.h
deleted file mode 100644
--- a/contrib/cortex-strings/benchmarks/dhry/dhry.h
+++ /dev/null
@@ -1,311 +0,0 @@
-/*
- **************************************************************************
- * DHRYSTONE 2.1 BENCHMARK PC VERSION
- **************************************************************************
- *
- * "DHRYSTONE" Benchmark Program
- * -----------------------------
- *
- * Version: C, Version 2.1
- *
- * File: dhry.h (part 1 of 3)
- *
- * Date: May 25, 1988
- *
- * Author: Reinhold P. Weicker
- * Siemens AG, AUT E 51
- * Postfach 3220
- * 8520 Erlangen
- * Germany (West)
- * Phone: [+49]-9131-7-20330
- * (8-17 Central European Time)
- * Usenet: ..!mcsun!unido!estevax!weicker
- *
- * Original Version (in Ada) published in
- * "Communications of the ACM" vol. 27., no. 10 (Oct. 1984),
- * pp. 1013 - 1030, together with the statistics
- * on which the distribution of statements etc. is based.
- *
- * In this C version, the following C library functions are used:
- * - strcpy, strcmp (inside the measurement loop)
- * - printf, scanf (outside the measurement loop)
- * In addition, Berkeley UNIX system calls "times ()" or "time ()"
- * are used for execution time measurement. For measurements
- * on other systems, these calls have to be changed.
- *
- * Collection of Results:
- * Reinhold Weicker (address see above) and
- *
- * Rick Richardson
- * PC Research. Inc.
- * 94 Apple Orchard Drive
- * Tinton Falls, NJ 07724
- * Phone: (201) 389-8963 (9-17 EST)
- * Usenet: ...!uunet!pcrat!rick
- *
- * Please send results to Rick Richardson and/or Reinhold Weicker.
- * Complete information should be given on hardware and software used.
- * Hardware information includes: Machine type, CPU, type and size
- * of caches; for microprocessors: clock frequency, memory speed
- * (number of wait states).
- * Software information includes: Compiler (and runtime library)
- * manufacturer and version, compilation switches, OS version.
- * The Operating System version may give an indication about the
- * compiler; Dhrystone itself performs no OS calls in the measurement
- * loop.
- *
- * The complete output generated by the program should be mailed
- * such that at least some checks for correctness can be made.
- *
- **************************************************************************
- *
- * This version has changes made by Roy Longbottom to conform to a common
- * format for a series of standard benchmarks for PCs:
- *
- * Running time greater than 5 seconds due to inaccuracy of the PC clock.
- *
- * Automatic adjustment of run time, no manually inserted parameters.
- *
- * Initial display of calibration times to confirm linearity.
- *
- * Display of results within one screen (or at a slow speed as the test
- * progresses) so that it can be seen to have run successfully.
- *
- * Facilities to type in details of system used etc.
- *
- * All results and details appended to a results file.
- *
- *
- * Roy Longbottom
- * 101323.2241@compuserve.com
- *
- **************************************************************************
- *
- * For details of history, changes, other defines, benchmark construction
- * statistics see official versions from ftp.nosc.mil/pub/aburto where
- * the latest table of results (dhry.tbl) are available. See also
- * netlib@ornl.gov
- *
- **************************************************************************
- *
- * Defines: The following "Defines" are possible:
- * -DREG=register (default: Not defined)
- * As an approximation to what an average C programmer
- * might do, the "register" storage class is applied
- * (if enabled by -DREG=register)
- * - for local variables, if they are used (dynamically)
- * five or more times
- * - for parameters if they are used (dynamically)
- * six or more times
- * Note that an optimal "register" strategy is
- * compiler-dependent, and that "register" declarations
- * do not necessarily lead to faster execution.
- * -DNOSTRUCTASSIGN (default: Not defined)
- * Define if the C compiler does not support
- * assignment of structures.
- * -DNOENUMS (default: Not defined)
- * Define if the C compiler does not support
- * enumeration types.
- ***************************************************************************
- *
- * Compilation model and measurement (IMPORTANT):
- *
- * This C version of Dhrystone consists of three files:
- * - dhry.h (this file, containing global definitions and comments)
- * - dhry_1.c (containing the code corresponding to Ada package Pack_1)
- * - dhry_2.c (containing the code corresponding to Ada package Pack_2)
- *
- * The following "ground rules" apply for measurements:
- * - Separate compilation
- * - No procedure merging
- * - Otherwise, compiler optimizations are allowed but should be indicated
- * - Default results are those without register declarations
- * See the companion paper "Rationale for Dhrystone Version 2" for a more
- * detailed discussion of these ground rules.
- *
- * For 16-Bit processors (e.g. 80186, 80286), times for all compilation
- * models ("small", "medium", "large" etc.) should be given if possible,
- * together with a definition of these models for the compiler system used.
- *
- **************************************************************************
- * Examples of Pentium Results
- *
- * Dhrystone Benchmark Version 2.1 (Language: C)
- *
- * Month run 4/1996
- * PC model Escom
- * CPU Pentium
- * Clock MHz 100
- * Cache 256K
- * Options Neptune chipset
- * OS/DOS Windows 95
- * Compiler Watcom C/ C++ 10.5 Win386
- * OptLevel -otexan -zp8 -fp5 -5r
- * Run by Roy Longbottom
- * From UK
- * Mail 101323.2241@compuserve.com
- *
- * Final values (* implementation-dependent):
- *
- * Int_Glob: O.K. 5
- * Bool_Glob: O.K. 1
- * Ch_1_Glob: O.K. A
- * Ch_2_Glob: O.K. B
- * Arr_1_Glob[8]: O.K. 7
- * Arr_2_Glob8/7: O.K. 1600010
- * Ptr_Glob->
- * Ptr_Comp: * 98008
- * Discr: O.K. 0
- * Enum_Comp: O.K. 2
- * Int_Comp: O.K. 17
- * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING
- * Next_Ptr_Glob->
- * Ptr_Comp: * 98008 same as above
- * Discr: O.K. 0
- * Enum_Comp: O.K. 1
- * Int_Comp: O.K. 18
- * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING
- * Int_1_Loc: O.K. 5
- * Int_2_Loc: O.K. 13
- * Int_3_Loc: O.K. 7
- * Enum_Loc: O.K. 1
- * Str_1_Loc: O.K. DHRYSTONE PROGRAM, 1'ST STRING
- * Str_2_Loc: O.K. DHRYSTONE PROGRAM, 2'ND STRING
- *
- * Register option Selected.
- *
- * Microseconds 1 loop: 4.53
- * Dhrystones / second: 220690
- * VAX MIPS rating: 125.61
- *
- *
- * Dhrystone Benchmark Version 2.1 (Language: C)
- *
- * Month run 4/1996
- * PC model Escom
- * CPU Pentium
- * Clock MHz 100
- * Cache 256K
- * Options Neptune chipset
- * OS/DOS Windows 95
- * Compiler Watcom C/ C++ 10.5 Win386
- * OptLevel No optimisation
- * Run by Roy Longbottom
- * From UK
- * Mail 101323.2241@compuserve.com
- *
- * Final values (* implementation-dependent):
- *
- * Int_Glob: O.K. 5
- * Bool_Glob: O.K. 1
- * Ch_1_Glob: O.K. A
- * Ch_2_Glob: O.K. B
- * Arr_1_Glob[8]: O.K. 7
- * Arr_2_Glob8/7: O.K. 320010
- * Ptr_Glob->
- * Ptr_Comp: * 98004
- * Discr: O.K. 0
- * Enum_Comp: O.K. 2
- * Int_Comp: O.K. 17
- * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING
- * Next_Ptr_Glob->
- * Ptr_Comp: * 98004 same as above
- * Discr: O.K. 0
- * Enum_Comp: O.K. 1
- * Int_Comp: O.K. 18
- * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING
- * Int_1_Loc: O.K. 5
- * Int_2_Loc: O.K. 13
- * Int_3_Loc: O.K. 7
- * Enum_Loc: O.K. 1
- * Str_1_Loc: O.K. DHRYSTONE PROGRAM, 1'ST STRING
- * Str_2_Loc: O.K. DHRYSTONE PROGRAM, 2'ND STRING
- *
- * Register option Not selected.
- *
- * Microseconds 1 loop: 20.06
- * Dhrystones / second: 49844
- * VAX MIPS rating: 28.37
- *
- **************************************************************************
- */
-
-/* Compiler and system dependent definitions: */
-
-#ifndef TIME
-#define TIMES
-#endif
- /* Use times(2) time function unless */
- /* explicitly defined otherwise */
-
-#ifdef TIMES
-/* #include <sys/types.h>
- #include <sys/times.h> */
- /* for "times" */
-#endif
-
-#define Mic_secs_Per_Second 1000000.0
- /* Berkeley UNIX C returns process times in seconds/HZ */
-
-#ifdef NOSTRUCTASSIGN
-#define structassign(d, s) memcpy(&(d), &(s), sizeof(d))
-#else
-#define structassign(d, s) d = s
-#endif
-
-#ifdef NOENUM
-#define Ident_1 0
-#define Ident_2 1
-#define Ident_3 2
-#define Ident_4 3
-#define Ident_5 4
- typedef int Enumeration;
-#else
- typedef enum {Ident_1, Ident_2, Ident_3, Ident_4, Ident_5}
- Enumeration;
-#endif
- /* for boolean and enumeration types in Ada, Pascal */
-
-/* General definitions: */
-
-#include <stdio.h>
-#include <string.h>
-
- /* for strcpy, strcmp */
-
-#define Null 0
- /* Value of a Null pointer */
-#define true 1
-#define false 0
-
-typedef int One_Thirty;
-typedef int One_Fifty;
-typedef char Capital_Letter;
-typedef int Boolean;
-typedef char Str_30 [31];
-typedef int Arr_1_Dim [50];
-typedef int Arr_2_Dim [50] [50];
-
-typedef struct record
- {
- struct record *Ptr_Comp;
- Enumeration Discr;
- union {
- struct {
- Enumeration Enum_Comp;
- int Int_Comp;
- char Str_Comp [31];
- } var_1;
- struct {
- Enumeration E_Comp_2;
- char Str_2_Comp [31];
- } var_2;
- struct {
- char Ch_1_Comp;
- char Ch_2_Comp;
- } var_3;
- } variant;
- } Rec_Type, *Rec_Pointer;
-
-
-
diff --git a/contrib/cortex-strings/benchmarks/dhry/dhry_1.c b/contrib/cortex-strings/benchmarks/dhry/dhry_1.c
deleted file mode 100644
--- a/contrib/cortex-strings/benchmarks/dhry/dhry_1.c
+++ /dev/null
@@ -1,778 +0,0 @@
-/*
- *************************************************************************
- *
- * "DHRYSTONE" Benchmark Program
- * -----------------------------
- *
- * Version: C, Version 2.1
- *
- * File: dhry_1.c (part 2 of 3)
- *
- * Date: May 25, 1988
- *
- * Author: Reinhold P. Weicker
- *
- *************************************************************************
- */
-
- #include <time.h>
- #include <stdlib.h>
- #include <stdio.h>
- #include "dhry.h"
- /*COMPILER COMPILER COMPILER COMPILER COMPILER COMPILER COMPILER*/
-
- #ifdef COW
- #define compiler "Watcom C/C++ 10.5 Win386"
- #define options " -otexan -zp8 -5r -ms"
- #endif
- #ifdef CNW
- #define compiler "Watcom C/C++ 10.5 Win386"
- #define options " No optimisation"
- #endif
- #ifdef COD
- #define compiler "Watcom C/C++ 10.5 Dos4GW"
- #define options " -otexan -zp8 -5r -ms"
- #endif
- #ifdef CND
- #define compiler "Watcom C/C++ 10.5 Dos4GW"
- #define options " No optimisation"
- #endif
- #ifdef CONT
- #define compiler "Watcom C/C++ 10.5 Win32NT"
- #define options " -otexan -zp8 -5r -ms"
- #endif
- #ifdef CNNT
- #define compiler "Watcom C/C++ 10.5 Win32NT"
- #define options " No optimisation"
- #endif
- #ifdef COO2
- #define compiler "Watcom C/C++ 10.5 OS/2-32"
- #define options " -otexan -zp8 -5r -ms"
- #endif
- #ifdef CNO2
- #define compiler "Watcom C/C++ 10.5 OS/2-32"
- #define options " No optimisation"
- #endif
-
-
-/* Global Variables: */
-
-Rec_Pointer Ptr_Glob,
- Next_Ptr_Glob;
-int Int_Glob;
- Boolean Bool_Glob;
- char Ch_1_Glob,
- Ch_2_Glob;
- int Arr_1_Glob [50];
- int Arr_2_Glob [50] [50];
- int getinput = 1;
-
-
- char Reg_Define[100] = "Register option Selected.";
-
- Enumeration Func_1 (Capital_Letter Ch_1_Par_Val,
- Capital_Letter Ch_2_Par_Val);
- /*
- forward declaration necessary since Enumeration may not simply be int
- */
-
- #ifndef ROPT
- #define REG
- /* REG becomes defined as empty */
- /* i.e. no register variables */
- #else
- #define REG register
- #endif
-
- void Proc_1 (REG Rec_Pointer Ptr_Val_Par);
- void Proc_2 (One_Fifty *Int_Par_Ref);
- void Proc_3 (Rec_Pointer *Ptr_Ref_Par);
- void Proc_4 ();
- void Proc_5 ();
- void Proc_6 (Enumeration Enum_Val_Par, Enumeration *Enum_Ref_Par);
- void Proc_7 (One_Fifty Int_1_Par_Val, One_Fifty Int_2_Par_Val,
- One_Fifty *Int_Par_Ref);
- void Proc_8 (Arr_1_Dim Arr_1_Par_Ref, Arr_2_Dim Arr_2_Par_Ref,
- int Int_1_Par_Val, int Int_2_Par_Val);
-
- Boolean Func_2 (Str_30 Str_1_Par_Ref, Str_30 Str_2_Par_Ref);
-
-
- /* variables for time measurement: */
-
- #define Too_Small_Time 2
- /* Measurements should last at least 2 seconds */
-
- double Begin_Time,
- End_Time,
- User_Time;
-
- double Microseconds,
- Dhrystones_Per_Second,
- Vax_Mips;
-
- /* end of variables for time measurement */
-
-
- void main (int argc, char *argv[])
- /*****/
-
- /* main program, corresponds to procedures */
- /* Main and Proc_0 in the Ada version */
- {
- double dtime();
-
- One_Fifty Int_1_Loc;
- REG One_Fifty Int_2_Loc;
- One_Fifty Int_3_Loc;
- REG char Ch_Index;
- Enumeration Enum_Loc;
- Str_30 Str_1_Loc;
- Str_30 Str_2_Loc;
- REG int Run_Index;
- REG int Number_Of_Runs;
- int endit, count = 10;
- FILE *Ap;
- char general[9][80] = {" "};
-
- /* Initializations */
- if (argc > 1)
- {
- switch (argv[1][0])
- {
- case 'N':
- getinput = 0;
- break;
- case 'n':
- getinput = 0;
- break;
- }
- }
-
- if ((Ap = fopen("Dhry.txt","a+")) == NULL)
- {
- printf("Can not open Dhry.txt\n\n");
- printf("Press any key\n");
- exit(1);
- }
-
-/***********************************************************************
- * Change for compiler and optimisation used *
- ***********************************************************************/
-
- Next_Ptr_Glob = (Rec_Pointer) malloc (sizeof (Rec_Type));
- Ptr_Glob = (Rec_Pointer) malloc (sizeof (Rec_Type));
-
- Ptr_Glob->Ptr_Comp = Next_Ptr_Glob;
- Ptr_Glob->Discr = Ident_1;
- Ptr_Glob->variant.var_1.Enum_Comp = Ident_3;
- Ptr_Glob->variant.var_1.Int_Comp = 40;
- strcpy (Ptr_Glob->variant.var_1.Str_Comp,
- "DHRYSTONE PROGRAM, SOME STRING");
- strcpy (Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING");
-
- Arr_2_Glob [8][7] = 10;
- /* Was missing in published program. Without this statement, */
- /* Arr_2_Glob [8][7] would have an undefined value. */
- /* Warning: With 16-Bit processors and Number_Of_Runs > 32000, */
- /* overflow may occur for this array element. */
-
- printf ("\n");
- printf ("Dhrystone Benchmark, Version 2.1 (Language: C or C++)\n");
- printf ("\n");
-
- if (getinput == 0)
- {
- printf ("No run time input data\n\n");
- }
- else
- {
- printf ("With run time input data\n\n");
- }
-
- printf ("Compiler %s\n", compiler);
- printf ("Optimisation %s\n", options);
- #ifdef ROPT
- printf ("Register option selected\n\n");
- #else
- printf ("Register option not selected\n\n");
- strcpy(Reg_Define, "Register option Not selected.");
- #endif
-
- /*
- if (Reg)
- {
- printf ("Program compiled with 'register' attribute\n");
- printf ("\n");
- }
- else
- {
- printf ("Program compiled without 'register' attribute\n");
- printf ("\n");
- }
-
- printf ("Please give the number of runs through the benchmark: ");
- {
- int n;
- scanf ("%d", &n);
- Number_Of_Runs = n;
- }
- printf ("\n");
- printf ("Execution starts, %d runs through Dhrystone\n",
- Number_Of_Runs);
- */
-
- Number_Of_Runs = 5000;
-
- do
- {
-
- Number_Of_Runs = Number_Of_Runs * 2;
- count = count - 1;
- Arr_2_Glob [8][7] = 10;
-
- /***************/
- /* Start timer */
- /***************/
-
- Begin_Time = dtime();
-
- for (Run_Index = 1; Run_Index <= Number_Of_Runs; ++Run_Index)
- {
-
- Proc_5();
- Proc_4();
- /* Ch_1_Glob == 'A', Ch_2_Glob == 'B', Bool_Glob == true */
- Int_1_Loc = 2;
- Int_2_Loc = 3;
- strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING");
- Enum_Loc = Ident_2;
- Bool_Glob = ! Func_2 (Str_1_Loc, Str_2_Loc);
- /* Bool_Glob == 1 */
- while (Int_1_Loc < Int_2_Loc) /* loop body executed once */
- {
- Int_3_Loc = 5 * Int_1_Loc - Int_2_Loc;
- /* Int_3_Loc == 7 */
- Proc_7 (Int_1_Loc, Int_2_Loc, &Int_3_Loc);
- /* Int_3_Loc == 7 */
- Int_1_Loc += 1;
- } /* while */
- /* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
- Proc_8 (Arr_1_Glob, Arr_2_Glob, Int_1_Loc, Int_3_Loc);
- /* Int_Glob == 5 */
- Proc_1 (Ptr_Glob);
- for (Ch_Index = 'A'; Ch_Index <= Ch_2_Glob; ++Ch_Index)
- /* loop body executed twice */
- {
- if (Enum_Loc == Func_1 (Ch_Index, 'C'))
- /* then, not executed */
- {
- Proc_6 (Ident_1, &Enum_Loc);
- strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 3'RD STRING");
- Int_2_Loc = Run_Index;
- Int_Glob = Run_Index;
- }
- }
- /* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
- Int_2_Loc = Int_2_Loc * Int_1_Loc;
- Int_1_Loc = Int_2_Loc / Int_3_Loc;
- Int_2_Loc = 7 * (Int_2_Loc - Int_3_Loc) - Int_1_Loc;
- /* Int_1_Loc == 1, Int_2_Loc == 13, Int_3_Loc == 7 */
- Proc_2 (&Int_1_Loc);
- /* Int_1_Loc == 5 */
-
- } /* loop "for Run_Index" */
-
- /**************/
- /* Stop timer */
- /**************/
-
- End_Time = dtime();
- User_Time = End_Time - Begin_Time;
-
- printf ("%12.0f runs %6.2f seconds \n",(double) Number_Of_Runs, User_Time);
- if (User_Time > 5)
- {
- count = 0;
- }
- else
- {
- if (User_Time < 0.1)
- {
- Number_Of_Runs = Number_Of_Runs * 5;
- }
- }
- } /* calibrate/run do while */
- while (count >0);
-
- printf ("\n");
- printf ("Final values (* implementation-dependent):\n");
- printf ("\n");
- printf ("Int_Glob: ");
- if (Int_Glob == 5) printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d ", Int_Glob);
-
- printf ("Bool_Glob: ");
- if (Bool_Glob == 1) printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d\n", Bool_Glob);
-
- printf ("Ch_1_Glob: ");
- if (Ch_1_Glob == 'A') printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%c ", Ch_1_Glob);
-
- printf ("Ch_2_Glob: ");
- if (Ch_2_Glob == 'B') printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%c\n", Ch_2_Glob);
-
- printf ("Arr_1_Glob[8]: ");
- if (Arr_1_Glob[8] == 7) printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d ", Arr_1_Glob[8]);
-
- printf ("Arr_2_Glob8/7: ");
- if (Arr_2_Glob[8][7] == Number_Of_Runs + 10)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%10d\n", Arr_2_Glob[8][7]);
-
- printf ("Ptr_Glob-> ");
- printf (" Ptr_Comp: * %d\n", (int) Ptr_Glob->Ptr_Comp);
-
- printf (" Discr: ");
- if (Ptr_Glob->Discr == 0) printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d ", Ptr_Glob->Discr);
-
- printf ("Enum_Comp: ");
- if (Ptr_Glob->variant.var_1.Enum_Comp == 2)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d\n", Ptr_Glob->variant.var_1.Enum_Comp);
-
- printf (" Int_Comp: ");
- if (Ptr_Glob->variant.var_1.Int_Comp == 17) printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d ", Ptr_Glob->variant.var_1.Int_Comp);
-
- printf ("Str_Comp: ");
- if (strcmp(Ptr_Glob->variant.var_1.Str_Comp,
- "DHRYSTONE PROGRAM, SOME STRING") == 0)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%s\n", Ptr_Glob->variant.var_1.Str_Comp);
-
- printf ("Next_Ptr_Glob-> ");
- printf (" Ptr_Comp: * %d", (int) Next_Ptr_Glob->Ptr_Comp);
- printf (" same as above\n");
-
- printf (" Discr: ");
- if (Next_Ptr_Glob->Discr == 0)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d ", Next_Ptr_Glob->Discr);
-
- printf ("Enum_Comp: ");
- if (Next_Ptr_Glob->variant.var_1.Enum_Comp == 1)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d\n", Next_Ptr_Glob->variant.var_1.Enum_Comp);
-
- printf (" Int_Comp: ");
- if (Next_Ptr_Glob->variant.var_1.Int_Comp == 18)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d ", Next_Ptr_Glob->variant.var_1.Int_Comp);
-
- printf ("Str_Comp: ");
- if (strcmp(Next_Ptr_Glob->variant.var_1.Str_Comp,
- "DHRYSTONE PROGRAM, SOME STRING") == 0)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%s\n", Next_Ptr_Glob->variant.var_1.Str_Comp);
-
- printf ("Int_1_Loc: ");
- if (Int_1_Loc == 5)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d ", Int_1_Loc);
-
- printf ("Int_2_Loc: ");
- if (Int_2_Loc == 13)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d\n", Int_2_Loc);
-
- printf ("Int_3_Loc: ");
- if (Int_3_Loc == 7)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d ", Int_3_Loc);
-
- printf ("Enum_Loc: ");
- if (Enum_Loc == 1)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d\n", Enum_Loc);
-
- printf ("Str_1_Loc: ");
- if (strcmp(Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING") == 0)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%s\n", Str_1_Loc);
-
- printf ("Str_2_Loc: ");
- if (strcmp(Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING") == 0)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%s\n", Str_2_Loc);
-
- printf ("\n");
-
-
- if (User_Time < Too_Small_Time)
- {
- printf ("Measured time too small to obtain meaningful results\n");
- printf ("Please increase number of runs\n");
- printf ("\n");
- }
- else
- {
- Microseconds = User_Time * Mic_secs_Per_Second
- / (double) Number_Of_Runs;
- Dhrystones_Per_Second = (double) Number_Of_Runs / User_Time;
- Vax_Mips = Dhrystones_Per_Second / 1757.0;
-
- printf ("Microseconds for one run through Dhrystone: ");
- printf ("%12.2lf \n", Microseconds);
- printf ("Dhrystones per Second: ");
- printf ("%10.0lf \n", Dhrystones_Per_Second);
- printf ("VAX MIPS rating = ");
- printf ("%12.2lf \n",Vax_Mips);
- printf ("\n");
-
-/************************************************************************
- * Type details of hardware, software etc. *
- ************************************************************************/
-
- if (getinput == 1)
- {
- printf ("Enter the following which will be added with results to file DHRY.TXT\n");
- printf ("When submitting a number of results you need only provide details once\n");
- printf ("but a cross reference such as an abbreviated CPU type would be useful.\n");
- printf ("You can kill (exit or close) the program now and no data will be added.\n\n");
-
- printf ("PC Supplier/model ? ");
- gets(general[1]);
-
- printf ("CPU chip ? ");
- gets(general[2]);
-
- printf ("Clock MHz ? ");
- gets(general[3]);
-
- printf ("Cache size ? ");
- gets(general[4]);
-
- printf ("Chipset & H/W options ? ");
- gets(general[5]);
-
- printf ("OS/DOS version ? ");
- gets(general[6]);
-
- printf ("Your name ? ");
- gets(general[7]);
-
- printf ("Company/Location ? ");
- gets(general[8]);
-
- printf ("E-mail address ? ");
- gets(general[0]);
- }
-/************************************************************************
- * Add results to output file Dhry.txt *
- ************************************************************************/
- fprintf (Ap, "-------------------- -----------------------------------"
- "\n");
- fprintf (Ap, "Dhrystone Benchmark Version 2.1 (Language: C++)\n\n");
- fprintf (Ap, "PC model %s\n", general[1]);
- fprintf (Ap, "CPU %s\n", general[2]);
- fprintf (Ap, "Clock MHz %s\n", general[3]);
- fprintf (Ap, "Cache %s\n", general[4]);
- fprintf (Ap, "Options %s\n", general[5]);
- fprintf (Ap, "OS/DOS %s\n", general[6]);
- fprintf (Ap, "Compiler %s\n", compiler);
- fprintf (Ap, "OptLevel %s\n", options);
- fprintf (Ap, "Run by %s\n", general[7]);
- fprintf (Ap, "From %s\n", general[8]);
- fprintf (Ap, "Mail %s\n\n", general[0]);
-
- fprintf (Ap, "Final values (* implementation-dependent):\n");
- fprintf (Ap, "\n");
- fprintf (Ap, "Int_Glob: ");
- if (Int_Glob == 5) fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Int_Glob);
-
- fprintf (Ap, "Bool_Glob: ");
- if (Bool_Glob == 1) fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Bool_Glob);
-
- fprintf (Ap, "Ch_1_Glob: ");
- if (Ch_1_Glob == 'A') fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%c\n", Ch_1_Glob);
-
- fprintf (Ap, "Ch_2_Glob: ");
- if (Ch_2_Glob == 'B') fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%c\n", Ch_2_Glob);
-
- fprintf (Ap, "Arr_1_Glob[8]: ");
- if (Arr_1_Glob[8] == 7) fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Arr_1_Glob[8]);
-
- fprintf (Ap, "Arr_2_Glob8/7: ");
- if (Arr_2_Glob[8][7] == Number_Of_Runs + 10)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%10d\n", Arr_2_Glob[8][7]);
-
- fprintf (Ap, "Ptr_Glob-> \n");
- fprintf (Ap, " Ptr_Comp: * %d\n", (int) Ptr_Glob->Ptr_Comp);
-
- fprintf (Ap, " Discr: ");
- if (Ptr_Glob->Discr == 0) fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Ptr_Glob->Discr);
-
- fprintf (Ap, " Enum_Comp: ");
- if (Ptr_Glob->variant.var_1.Enum_Comp == 2)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Ptr_Glob->variant.var_1.Enum_Comp);
-
- fprintf (Ap, " Int_Comp: ");
- if (Ptr_Glob->variant.var_1.Int_Comp == 17) fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Ptr_Glob->variant.var_1.Int_Comp);
-
- fprintf (Ap, " Str_Comp: ");
- if (strcmp(Ptr_Glob->variant.var_1.Str_Comp,
- "DHRYSTONE PROGRAM, SOME STRING") == 0)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%s\n", Ptr_Glob->variant.var_1.Str_Comp);
-
- fprintf (Ap, "Next_Ptr_Glob-> \n");
- fprintf (Ap, " Ptr_Comp: * %d", (int) Next_Ptr_Glob->Ptr_Comp);
- fprintf (Ap, " same as above\n");
-
- fprintf (Ap, " Discr: ");
- if (Next_Ptr_Glob->Discr == 0)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Next_Ptr_Glob->Discr);
-
- fprintf (Ap, " Enum_Comp: ");
- if (Next_Ptr_Glob->variant.var_1.Enum_Comp == 1)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Next_Ptr_Glob->variant.var_1.Enum_Comp);
-
- fprintf (Ap, " Int_Comp: ");
- if (Next_Ptr_Glob->variant.var_1.Int_Comp == 18)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Next_Ptr_Glob->variant.var_1.Int_Comp);
-
- fprintf (Ap, " Str_Comp: ");
- if (strcmp(Next_Ptr_Glob->variant.var_1.Str_Comp,
- "DHRYSTONE PROGRAM, SOME STRING") == 0)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%s\n", Next_Ptr_Glob->variant.var_1.Str_Comp);
-
- fprintf (Ap, "Int_1_Loc: ");
- if (Int_1_Loc == 5)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Int_1_Loc);
-
- fprintf (Ap, "Int_2_Loc: ");
- if (Int_2_Loc == 13)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Int_2_Loc);
-
- fprintf (Ap, "Int_3_Loc: ");
- if (Int_3_Loc == 7)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Int_3_Loc);
-
- fprintf (Ap, "Enum_Loc: ");
- if (Enum_Loc == 1)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Enum_Loc);
-
- fprintf (Ap, "Str_1_Loc: ");
- if (strcmp(Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING") == 0)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%s\n", Str_1_Loc);
-
- fprintf (Ap, "Str_2_Loc: ");
- if (strcmp(Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING") == 0)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%s\n", Str_2_Loc);
-
-
- fprintf (Ap, "\n");
- fprintf(Ap,"%s\n",Reg_Define);
- fprintf (Ap, "\n");
- fprintf(Ap,"Microseconds 1 loop: %12.2lf\n",Microseconds);
- fprintf(Ap,"Dhrystones / second: %10.0lf\n",Dhrystones_Per_Second);
- fprintf(Ap,"VAX MIPS rating: %12.2lf\n\n",Vax_Mips);
- fclose(Ap);
- }
-
- printf ("\n");
- printf ("A new results file will have been created in the same directory as the\n");
- printf (".EXE files if one did not already exist. If you made a mistake on input, \n");
- printf ("you can use a text editor to correct it, delete the results or copy \n");
- printf ("them to a different file name. If you intend to run multiple tests you\n");
- printf ("you may wish to rename DHRY.TXT with a more informative title.\n\n");
- printf ("Please submit feedback and results files as a posting in Section 12\n");
- printf ("or to Roy_Longbottom@compuserve.com\n\n");
-
- if (getinput == 1)
- {
- printf("Press any key to exit\n");
- printf ("\nIf this is displayed you must close the window in the normal way\n");
- }
- }
-
-
- void Proc_1 (REG Rec_Pointer Ptr_Val_Par)
- /******************/
-
- /* executed once */
- {
- REG Rec_Pointer Next_Record = Ptr_Val_Par->Ptr_Comp;
- /* == Ptr_Glob_Next */
- /* Local variable, initialized with Ptr_Val_Par->Ptr_Comp, */
- /* corresponds to "rename" in Ada, "with" in Pascal */
-
- structassign (*Ptr_Val_Par->Ptr_Comp, *Ptr_Glob);
- Ptr_Val_Par->variant.var_1.Int_Comp = 5;
- Next_Record->variant.var_1.Int_Comp
- = Ptr_Val_Par->variant.var_1.Int_Comp;
- Next_Record->Ptr_Comp = Ptr_Val_Par->Ptr_Comp;
- Proc_3 (&Next_Record->Ptr_Comp);
- /* Ptr_Val_Par->Ptr_Comp->Ptr_Comp
- == Ptr_Glob->Ptr_Comp */
- if (Next_Record->Discr == Ident_1)
- /* then, executed */
- {
- Next_Record->variant.var_1.Int_Comp = 6;
- Proc_6 (Ptr_Val_Par->variant.var_1.Enum_Comp,
- &Next_Record->variant.var_1.Enum_Comp);
- Next_Record->Ptr_Comp = Ptr_Glob->Ptr_Comp;
- Proc_7 (Next_Record->variant.var_1.Int_Comp, 10,
- &Next_Record->variant.var_1.Int_Comp);
- }
- else /* not executed */
- structassign (*Ptr_Val_Par, *Ptr_Val_Par->Ptr_Comp);
- } /* Proc_1 */
-
-
- void Proc_2 (One_Fifty *Int_Par_Ref)
- /******************/
- /* executed once */
- /* *Int_Par_Ref == 1, becomes 4 */
-
- {
- One_Fifty Int_Loc;
- Enumeration Enum_Loc;
-
- Int_Loc = *Int_Par_Ref + 10;
- do /* executed once */
- if (Ch_1_Glob == 'A')
- /* then, executed */
- {
- Int_Loc -= 1;
- *Int_Par_Ref = Int_Loc - Int_Glob;
- Enum_Loc = Ident_1;
- } /* if */
- while (Enum_Loc != Ident_1); /* true */
- } /* Proc_2 */
-
-
- void Proc_3 (Rec_Pointer *Ptr_Ref_Par)
- /******************/
- /* executed once */
- /* Ptr_Ref_Par becomes Ptr_Glob */
-
- {
- if (Ptr_Glob != Null)
- /* then, executed */
- *Ptr_Ref_Par = Ptr_Glob->Ptr_Comp;
- Proc_7 (10, Int_Glob, &Ptr_Glob->variant.var_1.Int_Comp);
- } /* Proc_3 */
-
-
-void Proc_4 () /* without parameters */
- /*******/
- /* executed once */
- {
- Boolean Bool_Loc;
-
- Bool_Loc = Ch_1_Glob == 'A';
- Bool_Glob = Bool_Loc | Bool_Glob;
- Ch_2_Glob = 'B';
- } /* Proc_4 */
-
-
- void Proc_5 () /* without parameters */
- /*******/
- /* executed once */
- {
- Ch_1_Glob = 'A';
- Bool_Glob = false;
- } /* Proc_5 */
-
-
- /* Procedure for the assignment of structures, */
- /* if the C compiler doesn't support this feature */
- #ifdef NOSTRUCTASSIGN
- memcpy (d, s, l)
- register char *d;
- register char *s;
- register int l;
- {
- while (l--) *d++ = *s++;
- }
- #endif
-
-
-double dtime()
-{
-
- /* #include <ctype.h> */
-
- #define HZ CLOCKS_PER_SEC
- clock_t tnow;
-
- double q;
- tnow = clock();
- q = (double)tnow / (double)HZ;
- return q;
-}
diff --git a/contrib/cortex-strings/benchmarks/dhry/dhry_2.c b/contrib/cortex-strings/benchmarks/dhry/dhry_2.c
deleted file mode 100644
--- a/contrib/cortex-strings/benchmarks/dhry/dhry_2.c
+++ /dev/null
@@ -1,186 +0,0 @@
- /*
- *************************************************************************
- *
- * "DHRYSTONE" Benchmark Program
- * -----------------------------
- *
- * Version: C, Version 2.1
- *
- * File: dhry_2.c (part 3 of 3)
- *
- * Date: May 25, 1988
- *
- * Author: Reinhold P. Weicker
- *
- *************************************************************************
- */
-
- #include "dhry.h"
-
- #ifndef REG
- #define REG
- /* REG becomes defined as empty */
- /* i.e. no register variables */
- #else
- #define REG register
- #endif
-
- extern int Int_Glob;
- extern char Ch_1_Glob;
-
- Boolean Func_3 (Enumeration Enum_Par_Val);
-
- void Proc_6 (Enumeration Enum_Val_Par, Enumeration *Enum_Ref_Par)
- /*********************************/
- /* executed once */
- /* Enum_Val_Par == Ident_3, Enum_Ref_Par becomes Ident_2 */
-
- {
- *Enum_Ref_Par = Enum_Val_Par;
- if (! Func_3 (Enum_Val_Par))
- /* then, not executed */
- *Enum_Ref_Par = Ident_4;
- switch (Enum_Val_Par)
- {
- case Ident_1:
- *Enum_Ref_Par = Ident_1;
- break;
- case Ident_2:
- if (Int_Glob > 100)
- /* then */
- *Enum_Ref_Par = Ident_1;
- else *Enum_Ref_Par = Ident_4;
- break;
- case Ident_3: /* executed */
- *Enum_Ref_Par = Ident_2;
- break;
- case Ident_4: break;
- case Ident_5:
- *Enum_Ref_Par = Ident_3;
- break;
- } /* switch */
- } /* Proc_6 */
-
-
- void Proc_7 (One_Fifty Int_1_Par_Val, One_Fifty Int_2_Par_Val,
- One_Fifty *Int_Par_Ref)
- /**********************************************/
- /* executed three times */
- /* first call: Int_1_Par_Val == 2, Int_2_Par_Val == 3, */
- /* Int_Par_Ref becomes 7 */
- /* second call: Int_1_Par_Val == 10, Int_2_Par_Val == 5, */
- /* Int_Par_Ref becomes 17 */
- /* third call: Int_1_Par_Val == 6, Int_2_Par_Val == 10, */
- /* Int_Par_Ref becomes 18 */
-
- {
- One_Fifty Int_Loc;
-
- Int_Loc = Int_1_Par_Val + 2;
- *Int_Par_Ref = Int_2_Par_Val + Int_Loc;
- } /* Proc_7 */
-
-
- void Proc_8 (Arr_1_Dim Arr_1_Par_Ref, Arr_2_Dim Arr_2_Par_Ref,
- int Int_1_Par_Val, int Int_2_Par_Val)
- /*********************************************************************/
- /* executed once */
- /* Int_Par_Val_1 == 3 */
- /* Int_Par_Val_2 == 7 */
-
- {
- REG One_Fifty Int_Index;
- REG One_Fifty Int_Loc;
-
- Int_Loc = Int_1_Par_Val + 5;
- Arr_1_Par_Ref [Int_Loc] = Int_2_Par_Val;
- Arr_1_Par_Ref [Int_Loc+1] = Arr_1_Par_Ref [Int_Loc];
- Arr_1_Par_Ref [Int_Loc+30] = Int_Loc;
- for (Int_Index = Int_Loc; Int_Index <= Int_Loc+1; ++Int_Index)
- Arr_2_Par_Ref [Int_Loc] [Int_Index] = Int_Loc;
- Arr_2_Par_Ref [Int_Loc] [Int_Loc-1] += 1;
- Arr_2_Par_Ref [Int_Loc+20] [Int_Loc] = Arr_1_Par_Ref [Int_Loc];
- Int_Glob = 5;
- } /* Proc_8 */
-
-
- Enumeration Func_1 (Capital_Letter Ch_1_Par_Val,
- Capital_Letter Ch_2_Par_Val)
- /*************************************************/
- /* executed three times */
- /* first call: Ch_1_Par_Val == 'H', Ch_2_Par_Val == 'R' */
- /* second call: Ch_1_Par_Val == 'A', Ch_2_Par_Val == 'C' */
- /* third call: Ch_1_Par_Val == 'B', Ch_2_Par_Val == 'C' */
-
- {
- Capital_Letter Ch_1_Loc;
- Capital_Letter Ch_2_Loc;
-
- Ch_1_Loc = Ch_1_Par_Val;
- Ch_2_Loc = Ch_1_Loc;
- if (Ch_2_Loc != Ch_2_Par_Val)
- /* then, executed */
- return (Ident_1);
- else /* not executed */
- {
- Ch_1_Glob = Ch_1_Loc;
- return (Ident_2);
- }
- } /* Func_1 */
-
-
- Boolean Func_2 (Str_30 Str_1_Par_Ref, Str_30 Str_2_Par_Ref)
- /*************************************************/
- /* executed once */
- /* Str_1_Par_Ref == "DHRYSTONE PROGRAM, 1'ST STRING" */
- /* Str_2_Par_Ref == "DHRYSTONE PROGRAM, 2'ND STRING" */
-
- {
- REG One_Thirty Int_Loc;
- Capital_Letter Ch_Loc;
-
- Int_Loc = 2;
- while (Int_Loc <= 2) /* loop body executed once */
- if (Func_1 (Str_1_Par_Ref[Int_Loc],
- Str_2_Par_Ref[Int_Loc+1]) == Ident_1)
- /* then, executed */
- {
- Ch_Loc = 'A';
- Int_Loc += 1;
- } /* if, while */
- if (Ch_Loc >= 'W' && Ch_Loc < 'Z')
- /* then, not executed */
- Int_Loc = 7;
- if (Ch_Loc == 'R')
- /* then, not executed */
- return (true);
- else /* executed */
- {
- if (strcmp (Str_1_Par_Ref, Str_2_Par_Ref) > 0)
- /* then, not executed */
- {
- Int_Loc += 7;
- Int_Glob = Int_Loc;
- return (true);
- }
- else /* executed */
- return (false);
- } /* if Ch_Loc */
- } /* Func_2 */
-
-
- Boolean Func_3 (Enumeration Enum_Par_Val)
- /***************************/
- /* executed once */
- /* Enum_Par_Val == Ident_3 */
-
- {
- Enumeration Enum_Loc;
-
- Enum_Loc = Enum_Par_Val;
- if (Enum_Loc == Ident_3)
- /* then, executed */
- return (true);
- else /* not executed */
- return (false);
- } /* Func_3 */
diff --git a/contrib/cortex-strings/benchmarks/multi/harness.c b/contrib/cortex-strings/benchmarks/multi/harness.c
deleted file mode 100644
--- a/contrib/cortex-strings/benchmarks/multi/harness.c
+++ /dev/null
@@ -1,407 +0,0 @@
-/*
- * Copyright (c) 2011, Linaro Limited
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of the Linaro nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** A simple harness that times how long a string function takes to
- * run.
- */
-
-/* PENDING: Add EPL */
-
-#include <string.h>
-#include <time.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <stdbool.h>
-#include <assert.h>
-#include <unistd.h>
-#include <errno.h>
-
-#define NUM_ELEMS(_x) (sizeof(_x) / sizeof((_x)[0]))
-
-#ifndef VERSION
-#define VERSION "(unknown version)"
-#endif
-
-/** Make sure a function is called by using the return value */
-#define SPOIL(_x) volatile long x = (long)(_x); (void)x
-
-/** Type of functions that can be tested */
-typedef void (*stub_t)(void *dest, void *src, size_t n);
-
-/** Meta data about one test */
-struct test
-{
- /** Test name */
- const char *name;
- /** Function to test */
- stub_t stub;
-};
-
-/** Flush the cache by reading a chunk of memory */
-static void empty(volatile char *against)
-{
- /* We know that there's a 16 k cache with 64 byte lines giving
- a total of 256 lines. Read randomly from 256*5 places should
- flush everything */
- int offset = (1024 - 256)*1024;
-
- for (int i = offset; i < offset + 16*1024*3; i += 64)
- {
- against[i];
- }
-}
-
-/** Stub that does nothing. Used for calibrating */
-static void xbounce(void *dest, void *src, size_t n)
-{
- SPOIL(0);
-}
-
-/** Stub that calls memcpy */
-static void xmemcpy(void *dest, void *src, size_t n)
-{
- SPOIL(memcpy(dest, src, n));
-}
-
-/** Stub that calls memset */
-static void xmemset(void *dest, void *src, size_t n)
-{
- SPOIL(memset(dest, 0, n));
-}
-
-/** Stub that calls memcmp */
-static void xmemcmp(void *dest, void *src, size_t n)
-{
- SPOIL(memcmp(dest, src, n));
-}
-
-/** Stub that calls strcpy */
-static void xstrcpy(void *dest, void *src, size_t n)
-{
- SPOIL(strcpy(dest, src));
-}
-
-/** Stub that calls strlen */
-static void xstrlen(void *dest, void *src, size_t n)
-{
- SPOIL(strlen(dest));
-}
-
-/** Stub that calls strcmp */
-static void xstrcmp(void *dest, void *src, size_t n)
-{
- SPOIL(strcmp(dest, src));
-}
-
-/** Stub that calls strchr */
-static void xstrchr(void *dest, void *src, size_t n)
-{
- /* Put the character at the end of the string and before the null */
- ((char *)src)[n-1] = 32;
- SPOIL(strchr(src, 32));
-}
-
-/** Stub that calls memchr */
-static void xmemchr(void *dest, void *src, size_t n)
-{
- /* Put the character at the end of the block */
- ((char *)src)[n-1] = 32;
- SPOIL(memchr(src, 32, n));
-}
-
-/** All functions that can be tested */
-static const struct test tests[] =
- {
- { "bounce", xbounce },
- { "memchr", xmemchr },
- { "memcpy", xmemcpy },
- { "memset", xmemset },
- { "memcmp", xmemcmp },
- { "strchr", xstrchr },
- { "strcmp", xstrcmp },
- { "strcpy", xstrcpy },
- { "strlen", xstrlen },
- { NULL }
- };
-
-/** Show basic usage */
-static void usage(const char* name)
-{
- printf("%s %s: run a string related benchmark.\n"
- "usage: %s [-c block-size] [-l loop-count] [-a alignment|src_alignment:dst_alignment] [-f] [-t test-name] [-r run-id]\n"
- , name, VERSION, name);
-
- printf("Tests:");
-
- for (const struct test *ptest = tests; ptest->name != NULL; ptest++)
- {
- printf(" %s", ptest->name);
- }
-
- printf("\n");
-
- exit(-1);
-}
-
-/** Find the test by name */
-static const struct test *find_test(const char *name)
-{
- if (name == NULL)
- {
- return tests + 0;
- }
- else
- {
- for (const struct test *p = tests; p->name != NULL; p++)
- {
- if (strcmp(p->name, name) == 0)
- {
- return p;
- }
- }
- }
-
- return NULL;
-}
-
-#define MIN_BUFFER_SIZE 1024*1024
-#define MAX_ALIGNMENT 256
-
-/** Take a pointer and ensure that the lower bits == alignment */
-static char *realign(char *p, int alignment)
-{
- uintptr_t pp = (uintptr_t)p;
- pp = (pp + (MAX_ALIGNMENT - 1)) & ~(MAX_ALIGNMENT - 1);
- pp += alignment;
-
- return (char *)pp;
-}
-
-static int parse_int_arg(const char *arg, const char *exe_name)
-{
- long int ret;
-
- errno = 0;
- ret = strtol(arg, NULL, 0);
-
- if (errno)
- {
- usage(exe_name);
- }
-
- return (int)ret;
-}
-
-static void parse_alignment_arg(const char *arg, const char *exe_name,
- int *src_alignment, int *dst_alignment)
-{
- long int ret;
- char *endptr;
-
- errno = 0;
- ret = strtol(arg, &endptr, 0);
-
- if (errno)
- {
- usage(exe_name);
- }
-
- *src_alignment = (int)ret;
-
- if (ret > 256 || ret < 1)
- {
- printf("Alignment should be in the range [1, 256].\n");
- usage(exe_name);
- }
-
- if (ret == 256)
- ret = 0;
-
- if (endptr && *endptr == ':')
- {
- errno = 0;
- ret = strtol(endptr + 1, NULL, 0);
-
- if (errno)
- {
- usage(exe_name);
- }
-
- if (ret > 256 || ret < 1)
- {
- printf("Alignment should be in the range [1, 256].\n");
- usage(exe_name);
- }
-
- if (ret == 256)
- ret = 0;
- }
-
- *dst_alignment = (int)ret;
-}
-
-/** Setup and run a test */
-int main(int argc, char **argv)
-{
- /* Size of src and dest buffers */
- size_t buffer_size = MIN_BUFFER_SIZE;
-
- /* Number of bytes per call */
- int count = 31;
- /* Number of times to run */
- int loops = 10000000;
- /* True to flush the cache each time */
- int flush = 0;
- /* Name of the test */
- const char *name = NULL;
- /* Alignment of buffers */
- int src_alignment = 8;
- int dst_alignment = 8;
- /* Name of the run */
- const char *run_id = "0";
-
- int opt;
-
- while ((opt = getopt(argc, argv, "c:l:ft:r:hva:")) > 0)
- {
- switch (opt)
- {
- case 'c':
- count = parse_int_arg(optarg, argv[0]);
- break;
- case 'l':
- loops = parse_int_arg(optarg, argv[0]);
- break;
- case 'a':
- parse_alignment_arg(optarg, argv[0], &src_alignment, &dst_alignment);
- break;
- case 'f':
- flush = 1;
- break;
- case 't':
- name = strdup(optarg);
- break;
- case 'r':
- run_id = strdup(optarg);
- break;
- case 'h':
- usage(argv[0]);
- break;
- default:
- usage(argv[0]);
- break;
- }
- }
-
- /* Find the test by name */
- const struct test *ptest = find_test(name);
-
- if (ptest == NULL)
- {
- usage(argv[0]);
- }
-
- if (count + MAX_ALIGNMENT * 2 > MIN_BUFFER_SIZE)
- {
- buffer_size = count + MAX_ALIGNMENT * 2;
- }
-
- /* Buffers to read and write from */
- char *src = malloc(buffer_size);
- char *dest = malloc(buffer_size);
-
- assert(src != NULL && dest != NULL);
-
- src = realign(src, src_alignment);
- dest = realign(dest, dst_alignment);
-
- /* Fill the buffer with non-zero, reproducable random data */
- srandom(1539);
-
- for (int i = 0; i < buffer_size; i++)
- {
- src[i] = (char)random() | 1;
- dest[i] = src[i];
- }
-
- /* Make sure the buffers are null terminated for any string tests */
- src[count] = 0;
- dest[count] = 0;
-
- struct timespec start, end;
- int err = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
- assert(err == 0);
-
- /* Preload */
- stub_t stub = ptest->stub;
-
- /* Run two variants to reduce the cost of testing for the flush */
- if (flush == 0)
- {
- for (int i = 0; i < loops; i++)
- {
- (*stub)(dest, src, count);
- }
- }
- else
- {
- for (int i = 0; i < loops; i++)
- {
- (*stub)(dest, src, count);
- empty(dest);
- }
- }
-
- err = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);
- assert(err == 0);
-
- /* Drop any leading path and pull the variant name out of the executable */
- char *variant = strrchr(argv[0], '/');
-
- if (variant == NULL)
- {
- variant = argv[0];
- }
-
- variant = strstr(variant, "try-");
- assert(variant != NULL);
-
- double elapsed = (end.tv_sec - start.tv_sec) + (end.tv_nsec - start.tv_nsec) * 1e-9;
- /* Estimate the bounce time. Measured on a Panda. */
- double bounced = 0.448730 * loops / 50000000;
-
- /* Dump both machine and human readable versions */
- printf("%s:%s:%u:%u:%d:%d:%s:%.6f: took %.6f s for %u calls to %s of %u bytes. ~%.3f MB/s corrected.\n",
- variant + 4, ptest->name,
- count, loops, src_alignment, dst_alignment, run_id,
- elapsed,
- elapsed, loops, ptest->name, count,
- (double)loops*count/(elapsed - bounced)/(1024*1024));
-
- return 0;
-}
diff --git a/contrib/cortex-strings/configure.ac b/contrib/cortex-strings/configure.ac
deleted file mode 100644
--- a/contrib/cortex-strings/configure.ac
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright (c) 2011-2012, Linaro Limited
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# * Neither the name of the Linaro nor the
-# names of its contributors may be used to endorse or promote products
-# derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
-# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-AC_INIT(cortex-strings, 1.1-2012.06~dev)
-AM_INIT_AUTOMAKE(foreign subdir-objects color-tests dist-bzip2)
-AC_CONFIG_HEADERS([config.h])
-AC_CONFIG_FILES(Makefile)
-AC_CANONICAL_HOST
-AM_PROG_AS
-AC_PROG_CC
-AC_PROG_LIBTOOL
-
-default_submachine=
-
-case $host in
-aarch64*-*-*)
- arch=aarch64
- ;;
-arm*-*-*)
- arch=aarch32
- default_submachine=cortex-a9
- ;;
-x86_64-*-*-*)
- arch=generic
- ;;
-*)
- AC_MSG_ERROR([unknown architecture $host])
- ;;
-esac
-
-AM_CONDITIONAL([HOST_AARCH32], [test x$arch = xaarch32])
-AM_CONDITIONAL([HOST_AARCH64], [test x$arch = xaarch64])
-AM_CONDITIONAL([HOST_GENERIC], [test x$arch = xgeneric])
-
-AC_ARG_WITH([cpu],
- AS_HELP_STRING([--with-cpu=CPU],
- [select code for CPU variant @<:@default=cortex-a9@:>@]]),
- [dnl
- case "$withval" in
- yes|'') AC_MSG_ERROR([--with-cpu requires an argument]) ;;
- no) ;;
- *) submachine="$withval" ;;
- esac
-],
-[submachine=$default_submachine])
-
-AC_SUBST(submachine)
-AM_CONDITIONAL([WITH_SUBMACHINE], [test x$submachine != x])
-
-AC_ARG_WITH([neon],
- AC_HELP_STRING([--with-neon],
- [include NEON specific routines @<:@default=yes@:>@]),
- [with_neon=$withval],
- [with_neon=yes])
-AC_SUBST(with_neon)
-AM_CONDITIONAL(WITH_NEON, test x$with_neon = xyes)
-
-AC_ARG_WITH([vfp],
- AC_HELP_STRING([--with-vfp],
- [include VFP specific routines @<:@default=yes@:>@]),
- [with_vfp=$withval],
- [with_vfp=yes])
-AC_SUBST(with_vfp)
-AM_CONDITIONAL(WITH_VFP, test x$with_vfp = xyes)
-
-AC_OUTPUT
diff --git a/contrib/cortex-strings/scripts/add-license.sh b/contrib/cortex-strings/scripts/add-license.sh
deleted file mode 100755
--- a/contrib/cortex-strings/scripts/add-license.sh
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/bin/bash
-#
-# Add the modified BSD license to a file
-#
-
-f=`mktemp -d`
-trap "rm -rf $f" EXIT
-
-year=`date +%Y`
-cat > $f/original <<EOF
-Copyright (c) $year, Linaro Limited
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the Linaro nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-EOF
-
-# Translate it to C style
-echo "/*" > $f/c
-sed -r 's/(.*)/ * \1/' $f/original | sed -r 's/ +$//' >> $f/c
-echo " */" >> $f/c
-echo >> $f/c
-
-# ...and shell style
-sed -r 's/(.*)/# \1/' $f/original | sed -r 's/ +$//' >> $f/shell
-echo '#' >> $f/shell
-echo >> $f/shell
-
-for name in $@; do
- if grep -q Copyright $name; then
- echo $name already has some type of copyright
- continue
- fi
-
- case $name in
- # These files don't have an explicit license
- *autogen.sh*)
- continue;;
- *reference/newlib/*)
- continue;;
- *reference/newlib-xscale/*)
- continue;;
- */dhry/*)
- continue;;
-
- *.c)
- src=$f/c
- ;;
- *.sh|*.am|*.ac)
- src=$f/shell
- ;;
- *)
- echo Unrecognied extension on $name
- continue
- esac
-
- cat $src $name > $f/next
- mv $f/next $name
- echo Updated $name
-done
diff --git a/contrib/cortex-strings/scripts/bench.py b/contrib/cortex-strings/scripts/bench.py
deleted file mode 100644
--- a/contrib/cortex-strings/scripts/bench.py
+++ /dev/null
@@ -1,175 +0,0 @@
-#!/usr/bin/env python
-
-"""Simple harness that benchmarks different variants of the routines,
-caches the results, and emits all of the records at the end.
-
-Results are generated for different values of:
- * Source
- * Routine
- * Length
- * Alignment
-"""
-
-import argparse
-import subprocess
-import math
-import sys
-
-# Prefix to the executables
-build = '../build/try-'
-
-ALL = 'memchr memcmp memcpy memset strchr strcmp strcpy strlen'
-
-HAS = {
- 'this': 'bounce memchr memcpy memset strchr strcmp strcpy strlen',
- 'bionic-a9': 'memcmp memcpy memset strcmp strcpy strlen',
- 'bionic-a15': 'memcmp memcpy memset strcmp strcpy strlen',
- 'bionic-c': ALL,
- 'csl': 'memcpy memset',
- 'glibc': 'memcpy memset strchr strlen',
- 'glibc-c': ALL,
- 'newlib': 'memcpy strcmp strcpy strlen',
- 'newlib-c': ALL,
- 'newlib-xscale': 'memchr memcpy memset strchr strcmp strcpy strlen',
- 'plain': 'memset memcpy strcmp strcpy',
-}
-
-BOUNCE_ALIGNMENTS = ['1']
-SINGLE_BUFFER_ALIGNMENTS = ['1', '2', '4', '8', '16', '32']
-DUAL_BUFFER_ALIGNMENTS = ['1:32', '2:32', '4:32', '8:32', '16:32', '32:32']
-
-ALIGNMENTS = {
- 'bounce': BOUNCE_ALIGNMENTS,
- 'memchr': SINGLE_BUFFER_ALIGNMENTS,
- 'memset': SINGLE_BUFFER_ALIGNMENTS,
- 'strchr': SINGLE_BUFFER_ALIGNMENTS,
- 'strlen': SINGLE_BUFFER_ALIGNMENTS,
- 'memcmp': DUAL_BUFFER_ALIGNMENTS,
- 'memcpy': DUAL_BUFFER_ALIGNMENTS,
- 'strcmp': DUAL_BUFFER_ALIGNMENTS,
- 'strcpy': DUAL_BUFFER_ALIGNMENTS,
-}
-
-VARIANTS = sorted(HAS.keys())
-FUNCTIONS = sorted(ALIGNMENTS.keys())
-
-NUM_RUNS = 5
-
-def run(cache, variant, function, bytes, loops, alignment, run_id, quiet=False):
- """Perform a single run, exercising the cache as appropriate."""
- key = ':'.join('%s' % x for x in (variant, function, bytes, loops, alignment, run_id))
-
- if key in cache:
- got = cache[key]
- else:
- xbuild = build
- cmd = '%(xbuild)s%(variant)s -t %(function)s -c %(bytes)s -l %(loops)s -a %(alignment)s -r %(run_id)s' % locals()
-
- try:
- got = subprocess.check_output(cmd.split()).strip()
- except OSError, ex:
- assert False, 'Error %s while running %s' % (ex, cmd)
-
- parts = got.split(':')
- took = float(parts[7])
-
- cache[key] = got
-
- if not quiet:
- print got
- sys.stdout.flush()
-
- return took
-
-def run_many(cache, variants, bytes, all_functions):
- # We want the data to come out in a useful order. So fix an
- # alignment and function, and do all sizes for a variant first
- bytes = sorted(bytes)
- mid = bytes[int(len(bytes)/1.5)]
-
- if not all_functions:
- # Use the ordering in 'this' as the default
- all_functions = HAS['this'].split()
-
- # Find all other functions
- for functions in HAS.values():
- for function in functions.split():
- if function not in all_functions:
- all_functions.append(function)
-
- for function in all_functions:
- for alignment in ALIGNMENTS[function]:
- for variant in variants:
- if function not in HAS[variant].split():
- continue
-
- # Run a tracer through and see how long it takes and
- # adjust the number of loops based on that. Not great
- # for memchr() and similar which are O(n), but it will
- # do
- f = 50000000
- want = 5.0
-
- loops = int(f / math.sqrt(max(1, mid)))
- took = run(cache, variant, function, mid, loops, alignment, 0,
- quiet=True)
- # Keep it reasonable for silly routines like bounce
- factor = min(20, max(0.05, want/took))
- f = f * factor
-
- # Round f to a few significant figures
- scale = 10**int(math.log10(f) - 1)
- f = scale*int(f/scale)
-
- for b in sorted(bytes):
- # Figure out the number of loops to give a roughly consistent run
- loops = int(f / math.sqrt(max(1, b)))
- for run_id in range(0, NUM_RUNS):
- run(cache, variant, function, b, loops, alignment,
- run_id)
-
-def run_top(cache):
- parser = argparse.ArgumentParser()
- parser.add_argument("-v", "--variants", nargs="+", help="library variant to run (run all if not specified)", default = VARIANTS, choices = VARIANTS)
- parser.add_argument("-f", "--functions", nargs="+", help="function to run (run all if not specified)", default = FUNCTIONS, choices = FUNCTIONS)
- parser.add_argument("-l", "--limit", type=int, help="upper limit to test to (in bytes)", default = 512*1024)
- args = parser.parse_args()
-
- # Test all powers of 2
- step1 = 2.0
- # Test intermediate powers of 1.4
- step2 = 1.4
-
- bytes = []
-
- for step in [step1, step2]:
- if step:
- # Figure out how many steps get us up to the top
- steps = int(round(math.log(args.limit) / math.log(step)))
- bytes.extend([int(step**x) for x in range(0, steps+1)])
-
- run_many(cache, args.variants, bytes, args.functions)
-
-def main():
- cachename = 'cache.txt'
-
- cache = {}
-
- try:
- with open(cachename) as f:
- for line in f:
- line = line.strip()
- parts = line.split(':')
- cache[':'.join(parts[:7])] = line
- except:
- pass
-
- try:
- run_top(cache)
- finally:
- with open(cachename, 'w') as f:
- for line in sorted(cache.values()):
- print >> f, line
-
-if __name__ == '__main__':
- main()
diff --git a/contrib/cortex-strings/scripts/fixup.py b/contrib/cortex-strings/scripts/fixup.py
deleted file mode 100644
--- a/contrib/cortex-strings/scripts/fixup.py
+++ /dev/null
@@ -1,27 +0,0 @@
-"""Simple script that enables target specific blocks based on the first argument.
-
-Matches comment blocks like this:
-
-/* For Foo: abc
-def
-*/
-
-and de-comments them giving:
-abc
-def
-"""
-import re
-import sys
-
-def main():
- key = sys.argv[1]
- expr = re.compile(r'/\* For %s:\s([^*]+)\*/' % key, re.M)
-
- for arg in sys.argv[2:]:
- with open(arg) as f:
- body = f.read()
- with open(arg, 'w') as f:
- f.write(expr.sub(r'\1', body))
-
-if __name__ == '__main__':
- main()
diff --git a/contrib/cortex-strings/scripts/libplot.py b/contrib/cortex-strings/scripts/libplot.py
deleted file mode 100644
--- a/contrib/cortex-strings/scripts/libplot.py
+++ /dev/null
@@ -1,78 +0,0 @@
-"""Shared routines for the plotters."""
-
-import fileinput
-import collections
-
-Record = collections.namedtuple('Record', 'variant function bytes loops src_alignment dst_alignment run_id elapsed rest')
-
-
-def make_colours():
- return iter('m b g r c y k pink orange brown grey'.split())
-
-def parse_value(v):
- """Turn text into a primitive"""
- try:
- if '.' in v:
- return float(v)
- else:
- return int(v)
- except ValueError:
- return v
-
-def create_column_tuple(record, names):
- cols = [getattr(record, name) for name in names]
- return tuple(cols)
-
-def unique(records, name, prefer=''):
- """Return the unique values of a column in the records"""
- if type(name) == tuple:
- values = list(set(create_column_tuple(x, name) for x in records))
- else:
- values = list(set(getattr(x, name) for x in records))
-
- if not values:
- return values
- elif type(values[0]) == str:
- return sorted(values, key=lambda x: '%-06d|%s' % (-prefer.find(x), x))
- else:
- return sorted(values)
-
-def alignments_equal(alignments):
- for alignment in alignments:
- if alignment[0] != alignment[1]:
- return False
- return True
-
-def parse_row(line):
- return Record(*[parse_value(y) for y in line.split(':')])
-
-def parse():
- """Parse a record file into named tuples, correcting for loop
- overhead along the way.
- """
- records = [parse_row(x) for x in fileinput.input()]
-
- # Pull out any bounce values
- costs = {}
-
- for record in [x for x in records if x.function=='bounce']:
- costs[(record.bytes, record.loops)] = record.elapsed
-
- # Fix up all of the records for cost
- out = []
-
- for record in records:
- if record.function == 'bounce':
- continue
-
- cost = costs.get((record.bytes, record.loops), None)
-
- if not cost:
- out.append(record)
- else:
- # Unfortunately you can't update a namedtuple...
- values = list(record)
- values[-2] -= cost
- out.append(Record(*values))
-
- return out
diff --git a/contrib/cortex-strings/scripts/plot-align.py b/contrib/cortex-strings/scripts/plot-align.py
deleted file mode 100644
--- a/contrib/cortex-strings/scripts/plot-align.py
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/usr/bin/env python
-
-"""Plot the performance of different variants of one routine versus alignment.
-"""
-
-import libplot
-
-import pylab
-
-
-def plot(records, bytes, function):
- records = [x for x in records if x.bytes==bytes and x.function==function]
-
- variants = libplot.unique(records, 'variant', prefer='this')
- alignments = libplot.unique(records, ('src_alignment', 'dst_alignment'))
-
- X = pylab.arange(len(alignments))
- width = 1.0/(len(variants)+1)
-
- colours = libplot.make_colours()
-
- pylab.figure(1).set_size_inches((16, 12))
- pylab.clf()
-
- for i, variant in enumerate(variants):
- heights = []
-
- for alignment in alignments:
- matches = [x for x in records if x.variant==variant and x.src_alignment==alignment[0] and x.dst_alignment==alignment[1]]
-
- if matches:
- vals = [match.bytes*match.loops/match.elapsed/(1024*1024) for
- match in matches]
- mean = sum(vals)/len(vals)
- heights.append(mean)
- else:
- heights.append(0)
-
- pylab.bar(X+i*width, heights, width, color=colours.next(), label=variant)
-
-
- axes = pylab.axes()
- if libplot.alignments_equal(alignments):
- alignment_labels = ["%s" % x[0] for x in alignments]
- else:
- alignment_labels = ["%s:%s" % (x[0], x[1]) for x in alignments]
- axes.set_xticklabels(alignment_labels)
- axes.set_xticks(X + 0.5)
-
- pylab.title('Performance of different variants of %(function)s for %(bytes)d byte blocks' % locals())
- pylab.xlabel('Alignment')
- pylab.ylabel('Rate (MB/s)')
- pylab.legend(loc='lower right', ncol=3)
- pylab.grid()
- pylab.savefig('alignment-%(function)s-%(bytes)d.png' % locals(), dpi=72)
-
-def main():
- records = libplot.parse()
-
- for function in libplot.unique(records, 'function'):
- for bytes in libplot.unique(records, 'bytes'):
- plot(records, bytes, function)
-
- pylab.show()
-
-if __name__ == '__main__':
- main()
diff --git a/contrib/cortex-strings/scripts/plot-sizes.py b/contrib/cortex-strings/scripts/plot-sizes.py
deleted file mode 100644
--- a/contrib/cortex-strings/scripts/plot-sizes.py
+++ /dev/null
@@ -1,120 +0,0 @@
-#!/usr/bin/env python
-
-"""Plot the performance for different block sizes of one function across
-variants.
-"""
-
-import libplot
-
-import pylab
-import pdb
-import math
-
-def pretty_kb(v):
- if v < 1024:
- return '%d' % v
- else:
- if v % 1024 == 0:
- return '%d k' % (v//1024)
- else:
- return '%.1f k' % (v/1024)
-
-def plot(records, function, alignment=None, scale=1):
- variants = libplot.unique(records, 'variant', prefer='this')
- records = [x for x in records if x.function==function]
-
- if alignment != None:
- records = [x for x in records if x.src_alignment==alignment[0] and
- x.dst_alignment==alignment[1]]
-
- alignments = libplot.unique(records, ('src_alignment', 'dst_alignment'))
- if len(alignments) != 1:
- return False
- if libplot.alignments_equal(alignments):
- aalignment = alignments[0][0]
- else:
- aalignment = "%s:%s" % (alignments[0][0], alignments[0][1])
-
- bytes = libplot.unique(records, 'bytes')[0]
-
- colours = libplot.make_colours()
- all_x = []
-
- pylab.figure(1).set_size_inches((6.4*scale, 4.8*scale))
- pylab.clf()
-
- if 'str' in function:
- # The harness fills out to 16k. Anything past that is an
- # early match
- top = 16384
- else:
- top = 2**31
-
- for variant in variants:
- matches = [x for x in records if x.variant==variant and x.bytes <= top]
- matches.sort(key=lambda x: x.bytes)
-
- X = sorted(list(set([x.bytes for x in matches])))
- Y = []
- Yerr = []
- for xbytes in X:
- vals = [x.bytes*x.loops/x.elapsed/(1024*1024) for x in matches if x.bytes == xbytes]
- if len(vals) > 1:
- mean = sum(vals)/len(vals)
- Y.append(mean)
- if len(Yerr) == 0:
- Yerr = [[], []]
- err1 = max(vals) - mean
- assert err1 >= 0
- err2 = min(vals) - mean
- assert err2 <= 0
- Yerr[0].append(abs(err2))
- Yerr[1].append(err1)
- else:
- Y.append(vals[0])
-
- all_x.extend(X)
- colour = colours.next()
-
- if X:
- pylab.plot(X, Y, c=colour)
- if len(Yerr) > 0:
- pylab.errorbar(X, Y, yerr=Yerr, c=colour, label=variant, fmt='o')
- else:
- pylab.scatter(X, Y, c=colour, label=variant, edgecolors='none')
-
- pylab.legend(loc='upper left', ncol=3, prop={'size': 'small'})
- pylab.grid()
- pylab.title('%(function)s of %(aalignment)s byte aligned blocks' % locals())
- pylab.xlabel('Size (B)')
- pylab.ylabel('Rate (MB/s)')
-
- # Figure out how high the range goes
- top = max(all_x)
-
- power = int(round(math.log(max(all_x)) / math.log(2)))
-
- pylab.semilogx()
-
- pylab.axes().set_xticks([2**x for x in range(0, power+1)])
- pylab.axes().set_xticklabels([pretty_kb(2**x) for x in range(0, power+1)])
- pylab.xlim(0, top)
- pylab.ylim(0, pylab.ylim()[1])
- return True
-
-def main():
- records = libplot.parse()
-
- functions = libplot.unique(records, 'function')
- alignments = libplot.unique(records, ('src_alignment', 'dst_alignment'))
-
- for function in functions:
- for alignment in alignments:
- for scale in [1, 2.5]:
- if plot(records, function, alignment, scale):
- pylab.savefig('sizes-%s-%02d-%02d-%.1f.png' % (function, alignment[0], alignment[1], scale), dpi=72)
-
- pylab.show()
-
-if __name__ == '__main__':
- main()
diff --git a/contrib/cortex-strings/scripts/plot-top.py b/contrib/cortex-strings/scripts/plot-top.py
deleted file mode 100644
--- a/contrib/cortex-strings/scripts/plot-top.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python
-
-"""Plot the performance of different variants of the string routines
-for one size.
-"""
-
-import libplot
-
-import pylab
-
-
-def plot(records, bytes):
- records = [x for x in records if x.bytes==bytes]
-
- variants = libplot.unique(records, 'variant', prefer='this')
- functions = libplot.unique(records, 'function')
-
- X = pylab.arange(len(functions))
- width = 1.0/(len(variants)+1)
-
- colours = libplot.make_colours()
-
- pylab.figure(1).set_size_inches((16, 12))
- pylab.clf()
-
- for i, variant in enumerate(variants):
- heights = []
-
- for function in functions:
- matches = [x for x in records if x.variant==variant and x.function==function and x.src_alignment==8]
-
- if matches:
- vals = [match.bytes*match.loops/match.elapsed/(1024*1024) for
- match in matches]
- mean = sum(vals)/len(vals)
- heights.append(mean)
- else:
- heights.append(0)
-
- pylab.bar(X+i*width, heights, width, color=colours.next(), label=variant)
-
- axes = pylab.axes()
- axes.set_xticklabels(functions)
- axes.set_xticks(X + 0.5)
-
- pylab.title('Performance of different variants for %d byte blocks' % bytes)
- pylab.ylabel('Rate (MB/s)')
- pylab.legend(loc='upper left', ncol=3)
- pylab.grid()
- pylab.savefig('top-%06d.png' % bytes, dpi=72)
-
-def main():
- records = libplot.parse()
-
- for bytes in libplot.unique(records, 'bytes'):
- plot(records, bytes)
-
- pylab.show()
-
-if __name__ == '__main__':
- main()
diff --git a/contrib/cortex-strings/scripts/plot.py b/contrib/cortex-strings/scripts/plot.py
deleted file mode 100644
--- a/contrib/cortex-strings/scripts/plot.py
+++ /dev/null
@@ -1,123 +0,0 @@
-"""Plot the results for each test. Spits out a set of images into the
-current directory.
-"""
-
-import libplot
-
-import fileinput
-import collections
-import pprint
-
-import pylab
-
-Record = collections.namedtuple('Record', 'variant test size loops src_alignment dst_alignment run_id rawtime comment time bytes rate')
-
-def unique(rows, name):
- """Takes a list of values, pulls out the named field, and returns
- a list of the unique values of this field.
- """
- return sorted(set(getattr(x, name) for x in rows))
-
-def to_float(v):
- """Convert a string into a better type.
-
- >>> to_float('foo')
- 'foo'
- >>> to_float('1.23')
- 1.23
- >>> to_float('45')
- 45
- """
- try:
- if '.' in v:
- return float(v)
- else:
- return int(v)
- except:
- return v
-
-def parse():
- # Split the input up
- rows = [x.strip().split(':') for x in fileinput.input()]
- # Automatically turn numbers into the base type
- rows = [[to_float(y) for y in x] for x in rows]
-
- # Scan once to calculate the overhead
- r = [Record(*(x + [0, 0, 0])) for x in rows]
- bounces = pylab.array([(x.loops, x.rawtime) for x in r if x.test == 'bounce'])
- fit = pylab.polyfit(bounces[:,0], bounces[:,1], 1)
-
- records = []
-
- for row in rows:
- # Make a dummy record so we can use the names
- r1 = Record(*(row + [0, 0, 0]))
-
- bytes = r1.size * r1.loops
- # Calculate the bounce time
- delta = pylab.polyval(fit, [r1.loops])
- time = r1.rawtime - delta
- rate = bytes / time
-
- records.append(Record(*(row + [time, bytes, rate])))
-
- return records
-
-def plot(records, field, scale, ylabel):
- variants = unique(records, 'variant')
- tests = unique(records, 'test')
-
- colours = libplot.make_colours()
-
- # A little hack. We want the 'all' record to be drawn last so
- # that it's obvious on the graph. Assume that no tests come
- # before it alphabetically
- variants.reverse()
-
- for test in tests:
- for variant in variants:
- v = [x for x in records if x.test==test and x.variant==variant]
- v.sort(key=lambda x: x.size)
- V = pylab.array([(x.size, getattr(x, field)) for x in v])
-
- # Ensure our results appear
- order = 1 if variant == 'this' else 0
-
- try:
- # A little hack. We want the 'all' to be obvious on
- # the graph
- if variant == 'all':
- pylab.scatter(V[:,0], V[:,1]/scale, label=variant)
- pylab.plot(V[:,0], V[:,1]/scale)
- else:
- pylab.plot(V[:,0], V[:,1]/scale, label=variant,
- zorder=order, c = colours.next())
-
- except Exception, ex:
- # michaelh1 likes to run this script while the test is
- # still running which can lead to bad data
- print ex, 'on %s of %s' % (variant, test)
-
- pylab.legend(loc='lower right', ncol=2, prop={'size': 'small'})
- pylab.xlabel('Block size (B)')
- pylab.ylabel(ylabel)
- pylab.title('%s %s' % (test, field))
- pylab.grid()
-
- pylab.savefig('%s-%s.png' % (test, field), dpi=100)
- pylab.semilogx(basex=2)
- pylab.savefig('%s-%s-semilog.png' % (test, field), dpi=100)
- pylab.clf()
-
-def test():
- import doctest
- doctest.testmod()
-
-def main():
- records = parse()
-
- plot(records, 'rate', 1024**2, 'Rate (MB/s)')
- plot(records, 'time', 1, 'Total time (s)')
-
-if __name__ == '__main__':
- main()
diff --git a/contrib/cortex-strings/scripts/trim.sh b/contrib/cortex-strings/scripts/trim.sh
deleted file mode 100755
--- a/contrib/cortex-strings/scripts/trim.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-#
-# Trims the whitespace from around any given images
-#
-
-for i in $@; do
- convert $i -bordercolor white -border 1x1 -trim +repage -alpha off +dither -colors 32 PNG8:next-$i
- mv next-$i $i
-done
diff --git a/contrib/cortex-strings/src/aarch64/memchr.S b/contrib/cortex-strings/src/aarch64/memchr.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/aarch64/memchr.S
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * memchr - find a character in a memory zone
- *
- * Copyright (c) 2014, ARM Limited
- * All rights Reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of the company nor the names of its contributors
- * may be used to endorse or promote products derived from this
- * software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64
- * Neon Available.
- */
-
-/* Arguments and results. */
-#define srcin x0
-#define chrin w1
-#define cntin x2
-
-#define result x0
-
-#define src x3
-#define tmp x4
-#define wtmp2 w5
-#define synd x6
-#define soff x9
-#define cntrem x10
-
-#define vrepchr v0
-#define vdata1 v1
-#define vdata2 v2
-#define vhas_chr1 v3
-#define vhas_chr2 v4
-#define vrepmask v5
-#define vend v6
-
-/*
- * Core algorithm:
- *
- * For each 32-byte chunk we calculate a 64-bit syndrome value, with two bits
- * per byte. For each tuple, bit 0 is set if the relevant byte matched the
- * requested character and bit 1 is not used (faster than using a 32bit
- * syndrome). Since the bits in the syndrome reflect exactly the order in which
- * things occur in the original string, counting trailing zeros allows to
- * identify exactly which byte has matched.
- */
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-def_fn memchr
- /* Do not dereference srcin if no bytes to compare. */
- cbz cntin, .Lzero_length
- /*
- * Magic constant 0x40100401 allows us to identify which lane matches
- * the requested byte.
- */
- mov wtmp2, #0x0401
- movk wtmp2, #0x4010, lsl #16
- dup vrepchr.16b, chrin
- /* Work with aligned 32-byte chunks */
- bic src, srcin, #31
- dup vrepmask.4s, wtmp2
- ands soff, srcin, #31
- and cntrem, cntin, #31
- b.eq .Lloop
-
- /*
- * Input string is not 32-byte aligned. We calculate the syndrome
- * value for the aligned 32 bytes block containing the first bytes
- * and mask the irrelevant part.
- */
-
- ld1 {vdata1.16b, vdata2.16b}, [src], #32
- sub tmp, soff, #32
- adds cntin, cntin, tmp
- cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
- cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
- and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
- and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
- addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
- addp vend.16b, vend.16b, vend.16b /* 128->64 */
- mov synd, vend.d[0]
- /* Clear the soff*2 lower bits */
- lsl tmp, soff, #1
- lsr synd, synd, tmp
- lsl synd, synd, tmp
- /* The first block can also be the last */
- b.ls .Lmasklast
- /* Have we found something already? */
- cbnz synd, .Ltail
-
-.Lloop:
- ld1 {vdata1.16b, vdata2.16b}, [src], #32
- subs cntin, cntin, #32
- cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
- cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
- /* If we're out of data we finish regardless of the result */
- b.ls .Lend
- /* Use a fast check for the termination condition */
- orr vend.16b, vhas_chr1.16b, vhas_chr2.16b
- addp vend.2d, vend.2d, vend.2d
- mov synd, vend.d[0]
- /* We're not out of data, loop if we haven't found the character */
- cbz synd, .Lloop
-
-.Lend:
- /* Termination condition found, let's calculate the syndrome value */
- and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
- and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
- addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
- addp vend.16b, vend.16b, vend.16b /* 128->64 */
- mov synd, vend.d[0]
- /* Only do the clear for the last possible block */
- b.hi .Ltail
-
-.Lmasklast:
- /* Clear the (32 - ((cntrem + soff) % 32)) * 2 upper bits */
- add tmp, cntrem, soff
- and tmp, tmp, #31
- sub tmp, tmp, #32
- neg tmp, tmp, lsl #1
- lsl synd, synd, tmp
- lsr synd, synd, tmp
-
-.Ltail:
- /* Count the trailing zeros using bit reversing */
- rbit synd, synd
- /* Compensate the last post-increment */
- sub src, src, #32
- /* Check that we have found a character */
- cmp synd, #0
- /* And count the leading zeros */
- clz synd, synd
- /* Compute the potential result */
- add result, src, synd, lsr #1
- /* Select result or NULL */
- csel result, xzr, result, eq
- ret
-
-.Lzero_length:
- mov result, #0
- ret
-
- .size memchr, . - memchr
diff --git a/contrib/cortex-strings/src/aarch64/memcmp.S b/contrib/cortex-strings/src/aarch64/memcmp.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/aarch64/memcmp.S
+++ /dev/null
@@ -1,162 +0,0 @@
-/* memcmp - compare memory
-
- Copyright (c) 2013, Linaro Limited
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the Linaro nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64
- */
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-/* Parameters and result. */
-#define src1 x0
-#define src2 x1
-#define limit x2
-#define result x0
-
-/* Internal variables. */
-#define data1 x3
-#define data1w w3
-#define data2 x4
-#define data2w w4
-#define has_nul x5
-#define diff x6
-#define endloop x7
-#define tmp1 x8
-#define tmp2 x9
-#define tmp3 x10
-#define pos x11
-#define limit_wd x12
-#define mask x13
-
-def_fn memcmp p2align=6
- cbz limit, .Lret0
- eor tmp1, src1, src2
- tst tmp1, #7
- b.ne .Lmisaligned8
- ands tmp1, src1, #7
- b.ne .Lmutual_align
- add limit_wd, limit, #7
- lsr limit_wd, limit_wd, #3
- /* Start of performance-critical section -- one 64B cache line. */
-.Lloop_aligned:
- ldr data1, [src1], #8
- ldr data2, [src2], #8
-.Lstart_realigned:
- subs limit_wd, limit_wd, #1
- eor diff, data1, data2 /* Non-zero if differences found. */
- csinv endloop, diff, xzr, ne /* Last Dword or differences. */
- cbz endloop, .Lloop_aligned
- /* End of performance-critical section -- one 64B cache line. */
-
- /* Not reached the limit, must have found a diff. */
- cbnz limit_wd, .Lnot_limit
-
- /* Limit % 8 == 0 => all bytes significant. */
- ands limit, limit, #7
- b.eq .Lnot_limit
-
- lsl limit, limit, #3 /* Bits -> bytes. */
- mov mask, #~0
-#ifdef __AARCH64EB__
- lsr mask, mask, limit
-#else
- lsl mask, mask, limit
-#endif
- bic data1, data1, mask
- bic data2, data2, mask
-
- orr diff, diff, mask
-.Lnot_limit:
-
-#ifndef __AARCH64EB__
- rev diff, diff
- rev data1, data1
- rev data2, data2
-#endif
- /* The MS-non-zero bit of DIFF marks either the first bit
- that is different, or the end of the significant data.
- Shifting left now will bring the critical information into the
- top bits. */
- clz pos, diff
- lsl data1, data1, pos
- lsl data2, data2, pos
- /* But we need to zero-extend (char is unsigned) the value and then
- perform a signed 32-bit subtraction. */
- lsr data1, data1, #56
- sub result, data1, data2, lsr #56
- ret
-
-.Lmutual_align:
- /* Sources are mutually aligned, but are not currently at an
- alignment boundary. Round down the addresses and then mask off
- the bytes that precede the start point. */
- bic src1, src1, #7
- bic src2, src2, #7
- add limit, limit, tmp1 /* Adjust the limit for the extra. */
- lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
- ldr data1, [src1], #8
- neg tmp1, tmp1 /* Bits to alignment -64. */
- ldr data2, [src2], #8
- mov tmp2, #~0
-#ifdef __AARCH64EB__
- /* Big-endian. Early bytes are at MSB. */
- lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
-#else
- /* Little-endian. Early bytes are at LSB. */
- lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
-#endif
- add limit_wd, limit, #7
- orr data1, data1, tmp2
- orr data2, data2, tmp2
- lsr limit_wd, limit_wd, #3
- b .Lstart_realigned
-
-.Lret0:
- mov result, #0
- ret
-
- .p2align 6
-.Lmisaligned8:
- sub limit, limit, #1
-1:
- /* Perhaps we can do better than this. */
- ldrb data1w, [src1], #1
- ldrb data2w, [src2], #1
- subs limit, limit, #1
- ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
- b.eq 1b
- sub result, data1, data2
- ret
- .size memcmp, . - memcmp
diff --git a/contrib/cortex-strings/src/aarch64/memcpy.S b/contrib/cortex-strings/src/aarch64/memcpy.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/aarch64/memcpy.S
+++ /dev/null
@@ -1,225 +0,0 @@
-/* Copyright (c) 2012, Linaro Limited
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the Linaro nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
-
-/*
- * Copyright (c) 2015 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- * products derived from this software without specific prior written
- * permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64, unaligned accesses.
- *
- */
-
-#define dstin x0
-#define src x1
-#define count x2
-#define dst x3
-#define srcend x4
-#define dstend x5
-#define A_l x6
-#define A_lw w6
-#define A_h x7
-#define A_hw w7
-#define B_l x8
-#define B_lw w8
-#define B_h x9
-#define C_l x10
-#define C_h x11
-#define D_l x12
-#define D_h x13
-#define E_l src
-#define E_h count
-#define F_l dst
-#define F_h srcend
-#define tmp1 x9
-
-#define L(l) .L ## l
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-/* Copies are split into 3 main cases: small copies of up to 16 bytes,
- medium copies of 17..96 bytes which are fully unrolled. Large copies
- of more than 96 bytes align the destination and use an unrolled loop
- processing 64 bytes per iteration.
- Small and medium copies read all data before writing, allowing any
- kind of overlap, and memmove tailcalls memcpy for these cases as
- well as non-overlapping copies.
-*/
-
-def_fn memcpy p2align=6
- prfm PLDL1KEEP, [src]
- add srcend, src, count
- add dstend, dstin, count
- cmp count, 16
- b.ls L(copy16)
- cmp count, 96
- b.hi L(copy_long)
-
- /* Medium copies: 17..96 bytes. */
- sub tmp1, count, 1
- ldp A_l, A_h, [src]
- tbnz tmp1, 6, L(copy96)
- ldp D_l, D_h, [srcend, -16]
- tbz tmp1, 5, 1f
- ldp B_l, B_h, [src, 16]
- ldp C_l, C_h, [srcend, -32]
- stp B_l, B_h, [dstin, 16]
- stp C_l, C_h, [dstend, -32]
-1:
- stp A_l, A_h, [dstin]
- stp D_l, D_h, [dstend, -16]
- ret
-
- .p2align 4
- /* Small copies: 0..16 bytes. */
-L(copy16):
- cmp count, 8
- b.lo 1f
- ldr A_l, [src]
- ldr A_h, [srcend, -8]
- str A_l, [dstin]
- str A_h, [dstend, -8]
- ret
- .p2align 4
-1:
- tbz count, 2, 1f
- ldr A_lw, [src]
- ldr A_hw, [srcend, -4]
- str A_lw, [dstin]
- str A_hw, [dstend, -4]
- ret
-
- /* Copy 0..3 bytes. Use a branchless sequence that copies the same
- byte 3 times if count==1, or the 2nd byte twice if count==2. */
-1:
- cbz count, 2f
- lsr tmp1, count, 1
- ldrb A_lw, [src]
- ldrb A_hw, [srcend, -1]
- ldrb B_lw, [src, tmp1]
- strb A_lw, [dstin]
- strb B_lw, [dstin, tmp1]
- strb A_hw, [dstend, -1]
-2: ret
-
- .p2align 4
- /* Copy 64..96 bytes. Copy 64 bytes from the start and
- 32 bytes from the end. */
-L(copy96):
- ldp B_l, B_h, [src, 16]
- ldp C_l, C_h, [src, 32]
- ldp D_l, D_h, [src, 48]
- ldp E_l, E_h, [srcend, -32]
- ldp F_l, F_h, [srcend, -16]
- stp A_l, A_h, [dstin]
- stp B_l, B_h, [dstin, 16]
- stp C_l, C_h, [dstin, 32]
- stp D_l, D_h, [dstin, 48]
- stp E_l, E_h, [dstend, -32]
- stp F_l, F_h, [dstend, -16]
- ret
-
- /* Align DST to 16 byte alignment so that we don't cross cache line
- boundaries on both loads and stores. There are at least 96 bytes
- to copy, so copy 16 bytes unaligned and then align. The loop
- copies 64 bytes per iteration and prefetches one iteration ahead. */
-
- .p2align 4
-L(copy_long):
- and tmp1, dstin, 15
- bic dst, dstin, 15
- ldp D_l, D_h, [src]
- sub src, src, tmp1
- add count, count, tmp1 /* Count is now 16 too large. */
- ldp A_l, A_h, [src, 16]
- stp D_l, D_h, [dstin]
- ldp B_l, B_h, [src, 32]
- ldp C_l, C_h, [src, 48]
- ldp D_l, D_h, [src, 64]!
- subs count, count, 128 + 16 /* Test and readjust count. */
- b.ls 2f
-1:
- stp A_l, A_h, [dst, 16]
- ldp A_l, A_h, [src, 16]
- stp B_l, B_h, [dst, 32]
- ldp B_l, B_h, [src, 32]
- stp C_l, C_h, [dst, 48]
- ldp C_l, C_h, [src, 48]
- stp D_l, D_h, [dst, 64]!
- ldp D_l, D_h, [src, 64]!
- subs count, count, 64
- b.hi 1b
-
- /* Write the last full set of 64 bytes. The remainder is at most 64
- bytes, so it is safe to always copy 64 bytes from the end even if
- there is just 1 byte left. */
-2:
- ldp E_l, E_h, [srcend, -64]
- stp A_l, A_h, [dst, 16]
- ldp A_l, A_h, [srcend, -48]
- stp B_l, B_h, [dst, 32]
- ldp B_l, B_h, [srcend, -32]
- stp C_l, C_h, [dst, 48]
- ldp C_l, C_h, [srcend, -16]
- stp D_l, D_h, [dst, 64]
- stp E_l, E_h, [dstend, -64]
- stp A_l, A_h, [dstend, -48]
- stp B_l, B_h, [dstend, -32]
- stp C_l, C_h, [dstend, -16]
- ret
-
- .size memcpy, . - memcpy
diff --git a/contrib/cortex-strings/src/aarch64/memmove.S b/contrib/cortex-strings/src/aarch64/memmove.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/aarch64/memmove.S
+++ /dev/null
@@ -1,150 +0,0 @@
-/* Copyright (c) 2013, Linaro Limited
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the Linaro nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
-
-/*
- * Copyright (c) 2015 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- * products derived from this software without specific prior written
- * permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64, unaligned accesses
- */
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-/* Parameters and result. */
-#define dstin x0
-#define src x1
-#define count x2
-#define srcend x3
-#define dstend x4
-#define tmp1 x5
-#define A_l x6
-#define A_h x7
-#define B_l x8
-#define B_h x9
-#define C_l x10
-#define C_h x11
-#define D_l x12
-#define D_h x13
-#define E_l count
-#define E_h tmp1
-
-/* All memmoves up to 96 bytes are done by memcpy as it supports overlaps.
- Larger backwards copies are also handled by memcpy. The only remaining
- case is forward large copies. The destination is aligned, and an
- unrolled loop processes 64 bytes per iteration.
-*/
-
-def_fn memmove, 6
- sub tmp1, dstin, src
- cmp count, 96
- ccmp tmp1, count, 2, hi
- b.hs memcpy
-
- cbz tmp1, 3f
- add dstend, dstin, count
- add srcend, src, count
-
- /* Align dstend to 16 byte alignment so that we don't cross cache line
- boundaries on both loads and stores. There are at least 96 bytes
- to copy, so copy 16 bytes unaligned and then align. The loop
- copies 64 bytes per iteration and prefetches one iteration ahead. */
-
- and tmp1, dstend, 15
- ldp D_l, D_h, [srcend, -16]
- sub srcend, srcend, tmp1
- sub count, count, tmp1
- ldp A_l, A_h, [srcend, -16]
- stp D_l, D_h, [dstend, -16]
- ldp B_l, B_h, [srcend, -32]
- ldp C_l, C_h, [srcend, -48]
- ldp D_l, D_h, [srcend, -64]!
- sub dstend, dstend, tmp1
- subs count, count, 128
- b.ls 2f
- nop
-1:
- stp A_l, A_h, [dstend, -16]
- ldp A_l, A_h, [srcend, -16]
- stp B_l, B_h, [dstend, -32]
- ldp B_l, B_h, [srcend, -32]
- stp C_l, C_h, [dstend, -48]
- ldp C_l, C_h, [srcend, -48]
- stp D_l, D_h, [dstend, -64]!
- ldp D_l, D_h, [srcend, -64]!
- subs count, count, 64
- b.hi 1b
-
- /* Write the last full set of 64 bytes. The remainder is at most 64
- bytes, so it is safe to always copy 64 bytes from the start even if
- there is just 1 byte left. */
-2:
- ldp E_l, E_h, [src, 48]
- stp A_l, A_h, [dstend, -16]
- ldp A_l, A_h, [src, 32]
- stp B_l, B_h, [dstend, -32]
- ldp B_l, B_h, [src, 16]
- stp C_l, C_h, [dstend, -48]
- ldp C_l, C_h, [src]
- stp D_l, D_h, [dstend, -64]
- stp E_l, E_h, [dstin, 48]
- stp A_l, A_h, [dstin, 32]
- stp B_l, B_h, [dstin, 16]
- stp C_l, C_h, [dstin]
-3: ret
-
- .size memmove, . - memmove
diff --git a/contrib/cortex-strings/src/aarch64/memset.S b/contrib/cortex-strings/src/aarch64/memset.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/aarch64/memset.S
+++ /dev/null
@@ -1,235 +0,0 @@
-/* Copyright (c) 2012, Linaro Limited
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the Linaro nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
-
-/*
- * Copyright (c) 2015 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- * products derived from this software without specific prior written
- * permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64, unaligned accesses
- *
- */
-
-
-#define dstin x0
-#define val x1
-#define valw w1
-#define count x2
-#define dst x3
-#define dstend x4
-#define tmp1 x5
-#define tmp1w w5
-#define tmp2 x6
-#define tmp2w w6
-#define zva_len x7
-#define zva_lenw w7
-
-#define L(l) .L ## l
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-def_fn memset p2align=6
-
- dup v0.16B, valw
- add dstend, dstin, count
-
- cmp count, 96
- b.hi L(set_long)
- cmp count, 16
- b.hs L(set_medium)
- mov val, v0.D[0]
-
- /* Set 0..15 bytes. */
- tbz count, 3, 1f
- str val, [dstin]
- str val, [dstend, -8]
- ret
- nop
-1: tbz count, 2, 2f
- str valw, [dstin]
- str valw, [dstend, -4]
- ret
-2: cbz count, 3f
- strb valw, [dstin]
- tbz count, 1, 3f
- strh valw, [dstend, -2]
-3: ret
-
- /* Set 17..96 bytes. */
-L(set_medium):
- str q0, [dstin]
- tbnz count, 6, L(set96)
- str q0, [dstend, -16]
- tbz count, 5, 1f
- str q0, [dstin, 16]
- str q0, [dstend, -32]
-1: ret
-
- .p2align 4
- /* Set 64..96 bytes. Write 64 bytes from the start and
- 32 bytes from the end. */
-L(set96):
- str q0, [dstin, 16]
- stp q0, q0, [dstin, 32]
- stp q0, q0, [dstend, -32]
- ret
-
- .p2align 3
- nop
-L(set_long):
- and valw, valw, 255
- bic dst, dstin, 15
- str q0, [dstin]
- cmp count, 256
- ccmp valw, 0, 0, cs
- b.eq L(try_zva)
-L(no_zva):
- sub count, dstend, dst /* Count is 16 too large. */
- add dst, dst, 16
- sub count, count, 64 + 16 /* Adjust count and bias for loop. */
-1: stp q0, q0, [dst], 64
- stp q0, q0, [dst, -32]
-L(tail64):
- subs count, count, 64
- b.hi 1b
-2: stp q0, q0, [dstend, -64]
- stp q0, q0, [dstend, -32]
- ret
-
- .p2align 3
-L(try_zva):
- mrs tmp1, dczid_el0
- tbnz tmp1w, 4, L(no_zva)
- and tmp1w, tmp1w, 15
- cmp tmp1w, 4 /* ZVA size is 64 bytes. */
- b.ne L(zva_128)
-
- /* Write the first and last 64 byte aligned block using stp rather
- than using DC ZVA. This is faster on some cores.
- */
-L(zva_64):
- str q0, [dst, 16]
- stp q0, q0, [dst, 32]
- bic dst, dst, 63
- stp q0, q0, [dst, 64]
- stp q0, q0, [dst, 96]
- sub count, dstend, dst /* Count is now 128 too large. */
- sub count, count, 128+64+64 /* Adjust count and bias for loop. */
- add dst, dst, 128
- nop
-1: dc zva, dst
- add dst, dst, 64
- subs count, count, 64
- b.hi 1b
- stp q0, q0, [dst, 0]
- stp q0, q0, [dst, 32]
- stp q0, q0, [dstend, -64]
- stp q0, q0, [dstend, -32]
- ret
-
- .p2align 3
-L(zva_128):
- cmp tmp1w, 5 /* ZVA size is 128 bytes. */
- b.ne L(zva_other)
-
- str q0, [dst, 16]
- stp q0, q0, [dst, 32]
- stp q0, q0, [dst, 64]
- stp q0, q0, [dst, 96]
- bic dst, dst, 127
- sub count, dstend, dst /* Count is now 128 too large. */
- sub count, count, 128+128 /* Adjust count and bias for loop. */
- add dst, dst, 128
-1: dc zva, dst
- add dst, dst, 128
- subs count, count, 128
- b.hi 1b
- stp q0, q0, [dstend, -128]
- stp q0, q0, [dstend, -96]
- stp q0, q0, [dstend, -64]
- stp q0, q0, [dstend, -32]
- ret
-
-L(zva_other):
- mov tmp2w, 4
- lsl zva_lenw, tmp2w, tmp1w
- add tmp1, zva_len, 64 /* Max alignment bytes written. */
- cmp count, tmp1
- blo L(no_zva)
-
- sub tmp2, zva_len, 1
- add tmp1, dst, zva_len
- add dst, dst, 16
- subs count, tmp1, dst /* Actual alignment bytes to write. */
- bic tmp1, tmp1, tmp2 /* Aligned dc zva start address. */
- beq 2f
-1: stp q0, q0, [dst], 64
- stp q0, q0, [dst, -32]
- subs count, count, 64
- b.hi 1b
-2: mov dst, tmp1
- sub count, dstend, tmp1 /* Remaining bytes to write. */
- subs count, count, zva_len
- b.lo 4f
-3: dc zva, dst
- add dst, dst, zva_len
- subs count, count, zva_len
- b.hs 3b
-4: add count, count, zva_len
- b L(tail64)
-
- .size memset, . - memset
diff --git a/contrib/cortex-strings/src/aarch64/strchr.S b/contrib/cortex-strings/src/aarch64/strchr.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/aarch64/strchr.S
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- strchr - find a character in a string
-
- Copyright (c) 2014, ARM Limited
- All rights Reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the company nor the names of its contributors
- may be used to endorse or promote products derived from this
- software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64
- * Neon Available.
- */
-
-/* Arguments and results. */
-#define srcin x0
-#define chrin w1
-
-#define result x0
-
-#define src x2
-#define tmp1 x3
-#define wtmp2 w4
-#define tmp3 x5
-
-#define vrepchr v0
-#define vdata1 v1
-#define vdata2 v2
-#define vhas_nul1 v3
-#define vhas_nul2 v4
-#define vhas_chr1 v5
-#define vhas_chr2 v6
-#define vrepmask_0 v7
-#define vrepmask_c v16
-#define vend1 v17
-#define vend2 v18
-
-/* Core algorithm.
-
- For each 32-byte hunk we calculate a 64-bit syndrome value, with
- two bits per byte (LSB is always in bits 0 and 1, for both big
- and little-endian systems). For each tuple, bit 0 is set iff
- the relevant byte matched the requested character; bit 1 is set
- iff the relevant byte matched the NUL end of string (we trigger
- off bit0 for the special case of looking for NUL). Since the bits
- in the syndrome reflect exactly the order in which things occur
- in the original string a count_trailing_zeros() operation will
- identify exactly which byte is causing the termination, and why. */
-
-/* Locals and temporaries. */
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
- .macro def_alias f a
- .weak \a
- .set \a,\f
- .endm
-
-def_fn strchr
-def_alias strchr index
- /* Magic constant 0x40100401 to allow us to identify which lane
- matches the requested byte. Magic constant 0x80200802 used
- similarly for NUL termination. */
- mov wtmp2, #0x0401
- movk wtmp2, #0x4010, lsl #16
- dup vrepchr.16b, chrin
- bic src, srcin, #31 /* Work with aligned 32-byte hunks. */
- dup vrepmask_c.4s, wtmp2
- ands tmp1, srcin, #31
- add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */
- b.eq .Lloop
-
- /* Input string is not 32-byte aligned. Rather than forcing
- the padding bytes to a safe value, we calculate the syndrome
- for all the bytes, but then mask off those bits of the
- syndrome that are related to the padding. */
- ld1 {vdata1.16b, vdata2.16b}, [src], #32
- neg tmp1, tmp1
- cmeq vhas_nul1.16b, vdata1.16b, #0
- cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
- cmeq vhas_nul2.16b, vdata2.16b, #0
- cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
- and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
- and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
- and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
- and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
- orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
- orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
- lsl tmp1, tmp1, #1
- addp vend1.16b, vend1.16b, vend2.16b // 256->128
- mov tmp3, #~0
- addp vend1.16b, vend1.16b, vend2.16b // 128->64
- lsr tmp1, tmp3, tmp1
-
- mov tmp3, vend1.d[0]
- bic tmp1, tmp3, tmp1 // Mask padding bits.
- cbnz tmp1, .Ltail
-
-.Lloop:
- ld1 {vdata1.16b, vdata2.16b}, [src], #32
- cmeq vhas_nul1.16b, vdata1.16b, #0
- cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
- cmeq vhas_nul2.16b, vdata2.16b, #0
- cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
- /* Use a fast check for the termination condition. */
- orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
- orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
- orr vend1.16b, vend1.16b, vend2.16b
- addp vend1.2d, vend1.2d, vend1.2d
- mov tmp1, vend1.d[0]
- cbz tmp1, .Lloop
-
- /* Termination condition found. Now need to establish exactly why
- we terminated. */
- and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
- and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
- and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
- and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
- orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
- orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
- addp vend1.16b, vend1.16b, vend2.16b // 256->128
- addp vend1.16b, vend1.16b, vend2.16b // 128->64
-
- mov tmp1, vend1.d[0]
-.Ltail:
- /* Count the trailing zeros, by bit reversing... */
- rbit tmp1, tmp1
- /* Re-bias source. */
- sub src, src, #32
- clz tmp1, tmp1 /* And counting the leading zeros. */
- /* Tmp1 is even if the target charager was found first. Otherwise
- we've found the end of string and we weren't looking for NUL. */
- tst tmp1, #1
- add result, src, tmp1, lsr #1
- csel result, result, xzr, eq
- ret
-
- .size strchr, . - strchr
diff --git a/contrib/cortex-strings/src/aarch64/strchrnul.S b/contrib/cortex-strings/src/aarch64/strchrnul.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/aarch64/strchrnul.S
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- strchrnul - find a character or nul in a string
-
- Copyright (c) 2014, ARM Limited
- All rights Reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the company nor the names of its contributors
- may be used to endorse or promote products derived from this
- software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64
- * Neon Available.
- */
-
-/* Arguments and results. */
-#define srcin x0
-#define chrin w1
-
-#define result x0
-
-#define src x2
-#define tmp1 x3
-#define wtmp2 w4
-#define tmp3 x5
-
-#define vrepchr v0
-#define vdata1 v1
-#define vdata2 v2
-#define vhas_nul1 v3
-#define vhas_nul2 v4
-#define vhas_chr1 v5
-#define vhas_chr2 v6
-#define vrepmask v7
-#define vend1 v16
-
-/* Core algorithm.
-
- For each 32-byte hunk we calculate a 64-bit syndrome value, with
- two bits per byte (LSB is always in bits 0 and 1, for both big
- and little-endian systems). For each tuple, bit 0 is set iff
- the relevant byte matched the requested character or nul. Since the
- bits in the syndrome reflect exactly the order in which things occur
- in the original string a count_trailing_zeros() operation will
- identify exactly which byte is causing the termination. */
-
-/* Locals and temporaries. */
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-def_fn strchrnul
- /* Magic constant 0x40100401 to allow us to identify which lane
- matches the termination condition. */
- mov wtmp2, #0x0401
- movk wtmp2, #0x4010, lsl #16
- dup vrepchr.16b, chrin
- bic src, srcin, #31 /* Work with aligned 32-byte hunks. */
- dup vrepmask.4s, wtmp2
- ands tmp1, srcin, #31
- b.eq .Lloop
-
- /* Input string is not 32-byte aligned. Rather than forcing
- the padding bytes to a safe value, we calculate the syndrome
- for all the bytes, but then mask off those bits of the
- syndrome that are related to the padding. */
- ld1 {vdata1.16b, vdata2.16b}, [src], #32
- neg tmp1, tmp1
- cmeq vhas_nul1.16b, vdata1.16b, #0
- cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
- cmeq vhas_nul2.16b, vdata2.16b, #0
- cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
- orr vhas_chr1.16b, vhas_chr1.16b, vhas_nul1.16b
- orr vhas_chr2.16b, vhas_chr2.16b, vhas_nul2.16b
- and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
- and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
- lsl tmp1, tmp1, #1
- addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
- mov tmp3, #~0
- addp vend1.16b, vend1.16b, vend1.16b // 128->64
- lsr tmp1, tmp3, tmp1
-
- mov tmp3, vend1.d[0]
- bic tmp1, tmp3, tmp1 // Mask padding bits.
- cbnz tmp1, .Ltail
-
-.Lloop:
- ld1 {vdata1.16b, vdata2.16b}, [src], #32
- cmeq vhas_nul1.16b, vdata1.16b, #0
- cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
- cmeq vhas_nul2.16b, vdata2.16b, #0
- cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
- /* Use a fast check for the termination condition. */
- orr vhas_chr1.16b, vhas_nul1.16b, vhas_chr1.16b
- orr vhas_chr2.16b, vhas_nul2.16b, vhas_chr2.16b
- orr vend1.16b, vhas_chr1.16b, vhas_chr2.16b
- addp vend1.2d, vend1.2d, vend1.2d
- mov tmp1, vend1.d[0]
- cbz tmp1, .Lloop
-
- /* Termination condition found. Now need to establish exactly why
- we terminated. */
- and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
- and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
- addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
- addp vend1.16b, vend1.16b, vend1.16b // 128->64
-
- mov tmp1, vend1.d[0]
-.Ltail:
- /* Count the trailing zeros, by bit reversing... */
- rbit tmp1, tmp1
- /* Re-bias source. */
- sub src, src, #32
- clz tmp1, tmp1 /* ... and counting the leading zeros. */
- /* tmp1 is twice the offset into the fragment. */
- add result, src, tmp1, lsr #1
- ret
-
- .size strchrnul, . - strchrnul
diff --git a/contrib/cortex-strings/src/aarch64/strcmp.S b/contrib/cortex-strings/src/aarch64/strcmp.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/aarch64/strcmp.S
+++ /dev/null
@@ -1,166 +0,0 @@
-/* Copyright (c) 2012, Linaro Limited
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the Linaro nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64
- */
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-#define REP8_01 0x0101010101010101
-#define REP8_7f 0x7f7f7f7f7f7f7f7f
-#define REP8_80 0x8080808080808080
-
-/* Parameters and result. */
-#define src1 x0
-#define src2 x1
-#define result x0
-
-/* Internal variables. */
-#define data1 x2
-#define data1w w2
-#define data2 x3
-#define data2w w3
-#define has_nul x4
-#define diff x5
-#define syndrome x6
-#define tmp1 x7
-#define tmp2 x8
-#define tmp3 x9
-#define zeroones x10
-#define pos x11
-
- /* Start of performance-critical section -- one 64B cache line. */
-def_fn strcmp p2align=6
- eor tmp1, src1, src2
- mov zeroones, #REP8_01
- tst tmp1, #7
- b.ne .Lmisaligned8
- ands tmp1, src1, #7
- b.ne .Lmutual_align
- /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
- (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
- can be done in parallel across the entire word. */
-.Lloop_aligned:
- ldr data1, [src1], #8
- ldr data2, [src2], #8
-.Lstart_realigned:
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- eor diff, data1, data2 /* Non-zero if differences found. */
- bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
- orr syndrome, diff, has_nul
- cbz syndrome, .Lloop_aligned
- /* End of performance-critical section -- one 64B cache line. */
-
-#ifndef __AARCH64EB__
- rev syndrome, syndrome
- rev data1, data1
- /* The MS-non-zero bit of the syndrome marks either the first bit
- that is different, or the top bit of the first zero byte.
- Shifting left now will bring the critical information into the
- top bits. */
- clz pos, syndrome
- rev data2, data2
- lsl data1, data1, pos
- lsl data2, data2, pos
- /* But we need to zero-extend (char is unsigned) the value and then
- perform a signed 32-bit subtraction. */
- lsr data1, data1, #56
- sub result, data1, data2, lsr #56
- ret
-#else
- /* For big-endian we cannot use the trick with the syndrome value
- as carry-propagation can corrupt the upper bits if the trailing
- bytes in the string contain 0x01. */
- /* However, if there is no NUL byte in the dword, we can generate
- the result directly. We can't just subtract the bytes as the
- MSB might be significant. */
- cbnz has_nul, 1f
- cmp data1, data2
- cset result, ne
- cneg result, result, lo
- ret
-1:
- /* Re-compute the NUL-byte detection, using a byte-reversed value. */
- rev tmp3, data1
- sub tmp1, tmp3, zeroones
- orr tmp2, tmp3, #REP8_7f
- bic has_nul, tmp1, tmp2
- rev has_nul, has_nul
- orr syndrome, diff, has_nul
- clz pos, syndrome
- /* The MS-non-zero bit of the syndrome marks either the first bit
- that is different, or the top bit of the first zero byte.
- Shifting left now will bring the critical information into the
- top bits. */
- lsl data1, data1, pos
- lsl data2, data2, pos
- /* But we need to zero-extend (char is unsigned) the value and then
- perform a signed 32-bit subtraction. */
- lsr data1, data1, #56
- sub result, data1, data2, lsr #56
- ret
-#endif
-
-.Lmutual_align:
- /* Sources are mutually aligned, but are not currently at an
- alignment boundary. Round down the addresses and then mask off
- the bytes that preceed the start point. */
- bic src1, src1, #7
- bic src2, src2, #7
- lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
- ldr data1, [src1], #8
- neg tmp1, tmp1 /* Bits to alignment -64. */
- ldr data2, [src2], #8
- mov tmp2, #~0
-#ifdef __AARCH64EB__
- /* Big-endian. Early bytes are at MSB. */
- lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
-#else
- /* Little-endian. Early bytes are at LSB. */
- lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
-#endif
- orr data1, data1, tmp2
- orr data2, data2, tmp2
- b .Lstart_realigned
-
-.Lmisaligned8:
- /* We can do better than this. */
- ldrb data1w, [src1], #1
- ldrb data2w, [src2], #1
- cmp data1w, #1
- ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
- b.eq .Lmisaligned8
- sub result, data1, data2
- ret
diff --git a/contrib/cortex-strings/src/aarch64/strcpy.S b/contrib/cortex-strings/src/aarch64/strcpy.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/aarch64/strcpy.S
+++ /dev/null
@@ -1,336 +0,0 @@
-/*
- strcpy/stpcpy - copy a string returning pointer to start/end.
-
- Copyright (c) 2013, 2014, 2015 ARM Ltd.
- All Rights Reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the company nor the names of its contributors
- may be used to endorse or promote products derived from this
- software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64, unaligned accesses, min page size 4k.
- */
-
-/* To build as stpcpy, define BUILD_STPCPY before compiling this file.
-
- To test the page crossing code path more thoroughly, compile with
- -DSTRCPY_TEST_PAGE_CROSS - this will force all copies through the slower
- entry path. This option is not intended for production use. */
-
-/* Arguments and results. */
-#define dstin x0
-#define srcin x1
-
-/* Locals and temporaries. */
-#define src x2
-#define dst x3
-#define data1 x4
-#define data1w w4
-#define data2 x5
-#define data2w w5
-#define has_nul1 x6
-#define has_nul2 x7
-#define tmp1 x8
-#define tmp2 x9
-#define tmp3 x10
-#define tmp4 x11
-#define zeroones x12
-#define data1a x13
-#define data2a x14
-#define pos x15
-#define len x16
-#define to_align x17
-
-#ifdef BUILD_STPCPY
-#define STRCPY stpcpy
-#else
-#define STRCPY strcpy
-#endif
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
- /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
- (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
- can be done in parallel across the entire word. */
-
-#define REP8_01 0x0101010101010101
-#define REP8_7f 0x7f7f7f7f7f7f7f7f
-#define REP8_80 0x8080808080808080
-
- /* AArch64 systems have a minimum page size of 4k. We can do a quick
- page size check for crossing this boundary on entry and if we
- do not, then we can short-circuit much of the entry code. We
- expect early page-crossing strings to be rare (probability of
- 16/MIN_PAGE_SIZE ~= 0.4%), so the branch should be quite
- predictable, even with random strings.
-
- We don't bother checking for larger page sizes, the cost of setting
- up the correct page size is just not worth the extra gain from
- a small reduction in the cases taking the slow path. Note that
- we only care about whether the first fetch, which may be
- misaligned, crosses a page boundary - after that we move to aligned
- fetches for the remainder of the string. */
-
-#ifdef STRCPY_TEST_PAGE_CROSS
- /* Make everything that isn't Qword aligned look like a page cross. */
-#define MIN_PAGE_P2 4
-#else
-#define MIN_PAGE_P2 12
-#endif
-
-#define MIN_PAGE_SIZE (1 << MIN_PAGE_P2)
-
-def_fn STRCPY p2align=6
- /* For moderately short strings, the fastest way to do the copy is to
- calculate the length of the string in the same way as strlen, then
- essentially do a memcpy of the result. This avoids the need for
- multiple byte copies and further means that by the time we
- reach the bulk copy loop we know we can always use DWord
- accesses. We expect strcpy to rarely be called repeatedly
- with the same source string, so branch prediction is likely to
- always be difficult - we mitigate against this by preferring
- conditional select operations over branches whenever this is
- feasible. */
- and tmp2, srcin, #(MIN_PAGE_SIZE - 1)
- mov zeroones, #REP8_01
- and to_align, srcin, #15
- cmp tmp2, #(MIN_PAGE_SIZE - 16)
- neg tmp1, to_align
- /* The first fetch will straddle a (possible) page boundary iff
- srcin + 15 causes bit[MIN_PAGE_P2] to change value. A 16-byte
- aligned string will never fail the page align check, so will
- always take the fast path. */
- b.gt .Lpage_cross
-
-.Lpage_cross_ok:
- ldp data1, data2, [srcin]
-#ifdef __AARCH64EB__
- /* Because we expect the end to be found within 16 characters
- (profiling shows this is the most common case), it's worth
- swapping the bytes now to save having to recalculate the
- termination syndrome later. We preserve data1 and data2
- so that we can re-use the values later on. */
- rev tmp2, data1
- sub tmp1, tmp2, zeroones
- orr tmp2, tmp2, #REP8_7f
- bics has_nul1, tmp1, tmp2
- b.ne .Lfp_le8
- rev tmp4, data2
- sub tmp3, tmp4, zeroones
- orr tmp4, tmp4, #REP8_7f
-#else
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- bics has_nul1, tmp1, tmp2
- b.ne .Lfp_le8
- sub tmp3, data2, zeroones
- orr tmp4, data2, #REP8_7f
-#endif
- bics has_nul2, tmp3, tmp4
- b.eq .Lbulk_entry
-
- /* The string is short (<=16 bytes). We don't know exactly how
- short though, yet. Work out the exact length so that we can
- quickly select the optimal copy strategy. */
-.Lfp_gt8:
- rev has_nul2, has_nul2
- clz pos, has_nul2
- mov tmp2, #56
- add dst, dstin, pos, lsr #3 /* Bits to bytes. */
- sub pos, tmp2, pos
-#ifdef __AARCH64EB__
- lsr data2, data2, pos
-#else
- lsl data2, data2, pos
-#endif
- str data2, [dst, #1]
- str data1, [dstin]
-#ifdef BUILD_STPCPY
- add dstin, dst, #8
-#endif
- ret
-
-.Lfp_le8:
- rev has_nul1, has_nul1
- clz pos, has_nul1
- add dst, dstin, pos, lsr #3 /* Bits to bytes. */
- subs tmp2, pos, #24 /* Pos in bits. */
- b.lt .Lfp_lt4
-#ifdef __AARCH64EB__
- mov tmp2, #56
- sub pos, tmp2, pos
- lsr data2, data1, pos
- lsr data1, data1, #32
-#else
- lsr data2, data1, tmp2
-#endif
- /* 4->7 bytes to copy. */
- str data2w, [dst, #-3]
- str data1w, [dstin]
-#ifdef BUILD_STPCPY
- mov dstin, dst
-#endif
- ret
-.Lfp_lt4:
- cbz pos, .Lfp_lt2
- /* 2->3 bytes to copy. */
-#ifdef __AARCH64EB__
- lsr data1, data1, #48
-#endif
- strh data1w, [dstin]
- /* Fall-through, one byte (max) to go. */
-.Lfp_lt2:
- /* Null-terminated string. Last character must be zero! */
- strb wzr, [dst]
-#ifdef BUILD_STPCPY
- mov dstin, dst
-#endif
- ret
-
- .p2align 6
- /* Aligning here ensures that the entry code and main loop all lies
- within one 64-byte cache line. */
-.Lbulk_entry:
- sub to_align, to_align, #16
- stp data1, data2, [dstin]
- sub src, srcin, to_align
- sub dst, dstin, to_align
- b .Lentry_no_page_cross
-
- /* The inner loop deals with two Dwords at a time. This has a
- slightly higher start-up cost, but we should win quite quickly,
- especially on cores with a high number of issue slots per
- cycle, as we get much better parallelism out of the operations. */
-.Lmain_loop:
- stp data1, data2, [dst], #16
-.Lentry_no_page_cross:
- ldp data1, data2, [src], #16
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- sub tmp3, data2, zeroones
- orr tmp4, data2, #REP8_7f
- bic has_nul1, tmp1, tmp2
- bics has_nul2, tmp3, tmp4
- ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */
- b.eq .Lmain_loop
-
- /* Since we know we are copying at least 16 bytes, the fastest way
- to deal with the tail is to determine the location of the
- trailing NUL, then (re)copy the 16 bytes leading up to that. */
- cmp has_nul1, #0
-#ifdef __AARCH64EB__
- /* For big-endian, carry propagation (if the final byte in the
- string is 0x01) means we cannot use has_nul directly. The
- easiest way to get the correct byte is to byte-swap the data
- and calculate the syndrome a second time. */
- csel data1, data1, data2, ne
- rev data1, data1
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- bic has_nul1, tmp1, tmp2
-#else
- csel has_nul1, has_nul1, has_nul2, ne
-#endif
- rev has_nul1, has_nul1
- clz pos, has_nul1
- add tmp1, pos, #72
- add pos, pos, #8
- csel pos, pos, tmp1, ne
- add src, src, pos, lsr #3
- add dst, dst, pos, lsr #3
- ldp data1, data2, [src, #-32]
- stp data1, data2, [dst, #-16]
-#ifdef BUILD_STPCPY
- sub dstin, dst, #1
-#endif
- ret
-
-.Lpage_cross:
- bic src, srcin, #15
- /* Start by loading two words at [srcin & ~15], then forcing the
- bytes that precede srcin to 0xff. This means they never look
- like termination bytes. */
- ldp data1, data2, [src]
- lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
- tst to_align, #7
- csetm tmp2, ne
-#ifdef __AARCH64EB__
- lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
-#else
- lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
-#endif
- orr data1, data1, tmp2
- orr data2a, data2, tmp2
- cmp to_align, #8
- csinv data1, data1, xzr, lt
- csel data2, data2, data2a, lt
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- sub tmp3, data2, zeroones
- orr tmp4, data2, #REP8_7f
- bic has_nul1, tmp1, tmp2
- bics has_nul2, tmp3, tmp4
- ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */
- b.eq .Lpage_cross_ok
- /* We now need to make data1 and data2 look like they've been
- loaded directly from srcin. Do a rotate on the 128-bit value. */
- lsl tmp1, to_align, #3 /* Bytes->bits. */
- neg tmp2, to_align, lsl #3
-#ifdef __AARCH64EB__
- lsl data1a, data1, tmp1
- lsr tmp4, data2, tmp2
- lsl data2, data2, tmp1
- orr tmp4, tmp4, data1a
- cmp to_align, #8
- csel data1, tmp4, data2, lt
- rev tmp2, data1
- rev tmp4, data2
- sub tmp1, tmp2, zeroones
- orr tmp2, tmp2, #REP8_7f
- sub tmp3, tmp4, zeroones
- orr tmp4, tmp4, #REP8_7f
-#else
- lsr data1a, data1, tmp1
- lsl tmp4, data2, tmp2
- lsr data2, data2, tmp1
- orr tmp4, tmp4, data1a
- cmp to_align, #8
- csel data1, tmp4, data2, lt
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- sub tmp3, data2, zeroones
- orr tmp4, data2, #REP8_7f
-#endif
- bic has_nul1, tmp1, tmp2
- cbnz has_nul1, .Lfp_le8
- bic has_nul2, tmp3, tmp4
- b .Lfp_gt8
-
- .size STRCPY, . - STRCPY
diff --git a/contrib/cortex-strings/src/aarch64/strlen.S b/contrib/cortex-strings/src/aarch64/strlen.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/aarch64/strlen.S
+++ /dev/null
@@ -1,233 +0,0 @@
-/* Copyright (c) 2013-2015, Linaro Limited
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the Linaro nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64, unaligned accesses, min page size 4k.
- */
-
-/* To test the page crossing code path more thoroughly, compile with
- -DTEST_PAGE_CROSS - this will force all calls through the slower
- entry path. This option is not intended for production use. */
-
-/* Arguments and results. */
-#define srcin x0
-#define len x0
-
-/* Locals and temporaries. */
-#define src x1
-#define data1 x2
-#define data2 x3
-#define has_nul1 x4
-#define has_nul2 x5
-#define tmp1 x4
-#define tmp2 x5
-#define tmp3 x6
-#define tmp4 x7
-#define zeroones x8
-
-#define L(l) .L ## l
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
- /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
- (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
- can be done in parallel across the entire word. A faster check
- (X - 1) & 0x80 is zero for non-NUL ASCII characters, but gives
- false hits for characters 129..255. */
-
-#define REP8_01 0x0101010101010101
-#define REP8_7f 0x7f7f7f7f7f7f7f7f
-#define REP8_80 0x8080808080808080
-
-#ifdef TEST_PAGE_CROSS
-# define MIN_PAGE_SIZE 15
-#else
-# define MIN_PAGE_SIZE 4096
-#endif
-
- /* Since strings are short on average, we check the first 16 bytes
- of the string for a NUL character. In order to do an unaligned ldp
- safely we have to do a page cross check first. If there is a NUL
- byte we calculate the length from the 2 8-byte words using
- conditional select to reduce branch mispredictions (it is unlikely
- strlen will be repeatedly called on strings with the same length).
-
- If the string is longer than 16 bytes, we align src so don't need
- further page cross checks, and process 32 bytes per iteration
- using the fast NUL check. If we encounter non-ASCII characters,
- fallback to a second loop using the full NUL check.
-
- If the page cross check fails, we read 16 bytes from an aligned
- address, remove any characters before the string, and continue
- in the main loop using aligned loads. Since strings crossing a
- page in the first 16 bytes are rare (probability of
- 16/MIN_PAGE_SIZE ~= 0.4%), this case does not need to be optimized.
-
- AArch64 systems have a minimum page size of 4k. We don't bother
- checking for larger page sizes - the cost of setting up the correct
- page size is just not worth the extra gain from a small reduction in
- the cases taking the slow path. Note that we only care about
- whether the first fetch, which may be misaligned, crosses a page
- boundary. */
-
-def_fn strlen p2align=6
- and tmp1, srcin, MIN_PAGE_SIZE - 1
- mov zeroones, REP8_01
- cmp tmp1, MIN_PAGE_SIZE - 16
- b.gt L(page_cross)
- ldp data1, data2, [srcin]
-#ifdef __AARCH64EB__
- /* For big-endian, carry propagation (if the final byte in the
- string is 0x01) means we cannot use has_nul1/2 directly.
- Since we expect strings to be small and early-exit,
- byte-swap the data now so has_null1/2 will be correct. */
- rev data1, data1
- rev data2, data2
-#endif
- sub tmp1, data1, zeroones
- orr tmp2, data1, REP8_7f
- sub tmp3, data2, zeroones
- orr tmp4, data2, REP8_7f
- bics has_nul1, tmp1, tmp2
- bic has_nul2, tmp3, tmp4
- ccmp has_nul2, 0, 0, eq
- beq L(main_loop_entry)
-
- /* Enter with C = has_nul1 == 0. */
- csel has_nul1, has_nul1, has_nul2, cc
- mov len, 8
- rev has_nul1, has_nul1
- clz tmp1, has_nul1
- csel len, xzr, len, cc
- add len, len, tmp1, lsr 3
- ret
-
- /* The inner loop processes 32 bytes per iteration and uses the fast
- NUL check. If we encounter non-ASCII characters, use a second
- loop with the accurate NUL check. */
- .p2align 4
-L(main_loop_entry):
- bic src, srcin, 15
- sub src, src, 16
-L(main_loop):
- ldp data1, data2, [src, 32]!
-.Lpage_cross_entry:
- sub tmp1, data1, zeroones
- sub tmp3, data2, zeroones
- orr tmp2, tmp1, tmp3
- tst tmp2, zeroones, lsl 7
- bne 1f
- ldp data1, data2, [src, 16]
- sub tmp1, data1, zeroones
- sub tmp3, data2, zeroones
- orr tmp2, tmp1, tmp3
- tst tmp2, zeroones, lsl 7
- beq L(main_loop)
- add src, src, 16
-1:
- /* The fast check failed, so do the slower, accurate NUL check. */
- orr tmp2, data1, REP8_7f
- orr tmp4, data2, REP8_7f
- bics has_nul1, tmp1, tmp2
- bic has_nul2, tmp3, tmp4
- ccmp has_nul2, 0, 0, eq
- beq L(nonascii_loop)
-
- /* Enter with C = has_nul1 == 0. */
-L(tail):
-#ifdef __AARCH64EB__
- /* For big-endian, carry propagation (if the final byte in the
- string is 0x01) means we cannot use has_nul1/2 directly. The
- easiest way to get the correct byte is to byte-swap the data
- and calculate the syndrome a second time. */
- csel data1, data1, data2, cc
- rev data1, data1
- sub tmp1, data1, zeroones
- orr tmp2, data1, REP8_7f
- bic has_nul1, tmp1, tmp2
-#else
- csel has_nul1, has_nul1, has_nul2, cc
-#endif
- sub len, src, srcin
- rev has_nul1, has_nul1
- add tmp2, len, 8
- clz tmp1, has_nul1
- csel len, len, tmp2, cc
- add len, len, tmp1, lsr 3
- ret
-
-L(nonascii_loop):
- ldp data1, data2, [src, 16]!
- sub tmp1, data1, zeroones
- orr tmp2, data1, REP8_7f
- sub tmp3, data2, zeroones
- orr tmp4, data2, REP8_7f
- bics has_nul1, tmp1, tmp2
- bic has_nul2, tmp3, tmp4
- ccmp has_nul2, 0, 0, eq
- bne L(tail)
- ldp data1, data2, [src, 16]!
- sub tmp1, data1, zeroones
- orr tmp2, data1, REP8_7f
- sub tmp3, data2, zeroones
- orr tmp4, data2, REP8_7f
- bics has_nul1, tmp1, tmp2
- bic has_nul2, tmp3, tmp4
- ccmp has_nul2, 0, 0, eq
- beq L(nonascii_loop)
- b L(tail)
-
- /* Load 16 bytes from [srcin & ~15] and force the bytes that precede
- srcin to 0x7f, so we ignore any NUL bytes before the string.
- Then continue in the aligned loop. */
-L(page_cross):
- bic src, srcin, 15
- ldp data1, data2, [src]
- lsl tmp1, srcin, 3
- mov tmp4, -1
-#ifdef __AARCH64EB__
- /* Big-endian. Early bytes are at MSB. */
- lsr tmp1, tmp4, tmp1 /* Shift (tmp1 & 63). */
-#else
- /* Little-endian. Early bytes are at LSB. */
- lsl tmp1, tmp4, tmp1 /* Shift (tmp1 & 63). */
-#endif
- orr tmp1, tmp1, REP8_80
- orn data1, data1, tmp1
- orn tmp2, data2, tmp1
- tst srcin, 8
- csel data1, data1, tmp4, eq
- csel data2, data2, tmp2, eq
- b L(page_cross_entry)
-
- .size strlen, . - strlen
diff --git a/contrib/cortex-strings/src/aarch64/strncmp.S b/contrib/cortex-strings/src/aarch64/strncmp.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/aarch64/strncmp.S
+++ /dev/null
@@ -1,222 +0,0 @@
-/* Copyright (c) 2013, Linaro Limited
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the Linaro nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64
- */
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-#define REP8_01 0x0101010101010101
-#define REP8_7f 0x7f7f7f7f7f7f7f7f
-#define REP8_80 0x8080808080808080
-
-/* Parameters and result. */
-#define src1 x0
-#define src2 x1
-#define limit x2
-#define result x0
-
-/* Internal variables. */
-#define data1 x3
-#define data1w w3
-#define data2 x4
-#define data2w w4
-#define has_nul x5
-#define diff x6
-#define syndrome x7
-#define tmp1 x8
-#define tmp2 x9
-#define tmp3 x10
-#define zeroones x11
-#define pos x12
-#define limit_wd x13
-#define mask x14
-#define endloop x15
-
- .text
- .p2align 6
- .rep 7
- nop /* Pad so that the loop below fits a cache line. */
- .endr
-def_fn strncmp
- cbz limit, .Lret0
- eor tmp1, src1, src2
- mov zeroones, #REP8_01
- tst tmp1, #7
- b.ne .Lmisaligned8
- ands tmp1, src1, #7
- b.ne .Lmutual_align
- /* Calculate the number of full and partial words -1. */
- sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
- lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */
-
- /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
- (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
- can be done in parallel across the entire word. */
- /* Start of performance-critical section -- one 64B cache line. */
-.Lloop_aligned:
- ldr data1, [src1], #8
- ldr data2, [src2], #8
-.Lstart_realigned:
- subs limit_wd, limit_wd, #1
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- eor diff, data1, data2 /* Non-zero if differences found. */
- csinv endloop, diff, xzr, pl /* Last Dword or differences. */
- bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
- ccmp endloop, #0, #0, eq
- b.eq .Lloop_aligned
- /* End of performance-critical section -- one 64B cache line. */
-
- /* Not reached the limit, must have found the end or a diff. */
- tbz limit_wd, #63, .Lnot_limit
-
- /* Limit % 8 == 0 => all bytes significant. */
- ands limit, limit, #7
- b.eq .Lnot_limit
-
- lsl limit, limit, #3 /* Bits -> bytes. */
- mov mask, #~0
-#ifdef __AARCH64EB__
- lsr mask, mask, limit
-#else
- lsl mask, mask, limit
-#endif
- bic data1, data1, mask
- bic data2, data2, mask
-
- /* Make sure that the NUL byte is marked in the syndrome. */
- orr has_nul, has_nul, mask
-
-.Lnot_limit:
- orr syndrome, diff, has_nul
-
-#ifndef __AARCH64EB__
- rev syndrome, syndrome
- rev data1, data1
- /* The MS-non-zero bit of the syndrome marks either the first bit
- that is different, or the top bit of the first zero byte.
- Shifting left now will bring the critical information into the
- top bits. */
- clz pos, syndrome
- rev data2, data2
- lsl data1, data1, pos
- lsl data2, data2, pos
- /* But we need to zero-extend (char is unsigned) the value and then
- perform a signed 32-bit subtraction. */
- lsr data1, data1, #56
- sub result, data1, data2, lsr #56
- ret
-#else
- /* For big-endian we cannot use the trick with the syndrome value
- as carry-propagation can corrupt the upper bits if the trailing
- bytes in the string contain 0x01. */
- /* However, if there is no NUL byte in the dword, we can generate
- the result directly. We can't just subtract the bytes as the
- MSB might be significant. */
- cbnz has_nul, 1f
- cmp data1, data2
- cset result, ne
- cneg result, result, lo
- ret
-1:
- /* Re-compute the NUL-byte detection, using a byte-reversed value. */
- rev tmp3, data1
- sub tmp1, tmp3, zeroones
- orr tmp2, tmp3, #REP8_7f
- bic has_nul, tmp1, tmp2
- rev has_nul, has_nul
- orr syndrome, diff, has_nul
- clz pos, syndrome
- /* The MS-non-zero bit of the syndrome marks either the first bit
- that is different, or the top bit of the first zero byte.
- Shifting left now will bring the critical information into the
- top bits. */
- lsl data1, data1, pos
- lsl data2, data2, pos
- /* But we need to zero-extend (char is unsigned) the value and then
- perform a signed 32-bit subtraction. */
- lsr data1, data1, #56
- sub result, data1, data2, lsr #56
- ret
-#endif
-
-.Lmutual_align:
- /* Sources are mutually aligned, but are not currently at an
- alignment boundary. Round down the addresses and then mask off
- the bytes that precede the start point.
- We also need to adjust the limit calculations, but without
- overflowing if the limit is near ULONG_MAX. */
- bic src1, src1, #7
- bic src2, src2, #7
- ldr data1, [src1], #8
- neg tmp3, tmp1, lsl #3 /* 64 - bits(bytes beyond align). */
- ldr data2, [src2], #8
- mov tmp2, #~0
- sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
-#ifdef __AARCH64EB__
- /* Big-endian. Early bytes are at MSB. */
- lsl tmp2, tmp2, tmp3 /* Shift (tmp1 & 63). */
-#else
- /* Little-endian. Early bytes are at LSB. */
- lsr tmp2, tmp2, tmp3 /* Shift (tmp1 & 63). */
-#endif
- and tmp3, limit_wd, #7
- lsr limit_wd, limit_wd, #3
- /* Adjust the limit. Only low 3 bits used, so overflow irrelevant. */
- add limit, limit, tmp1
- add tmp3, tmp3, tmp1
- orr data1, data1, tmp2
- orr data2, data2, tmp2
- add limit_wd, limit_wd, tmp3, lsr #3
- b .Lstart_realigned
-
-.Lret0:
- mov result, #0
- ret
-
- .p2align 6
-.Lmisaligned8:
- sub limit, limit, #1
-1:
- /* Perhaps we can do better than this. */
- ldrb data1w, [src1], #1
- ldrb data2w, [src2], #1
- subs limit, limit, #1
- ccmp data1w, #1, #0, cs /* NZCV = 0b0000. */
- ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
- b.eq 1b
- sub result, data1, data2
- ret
- .size strncmp, . - strncmp
diff --git a/contrib/cortex-strings/src/aarch64/strnlen.S b/contrib/cortex-strings/src/aarch64/strnlen.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/aarch64/strnlen.S
+++ /dev/null
@@ -1,181 +0,0 @@
-/* strnlen - calculate the length of a string with limit.
-
- Copyright (c) 2013, Linaro Limited
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the Linaro nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64
- */
-
-/* Arguments and results. */
-#define srcin x0
-#define len x0
-#define limit x1
-
-/* Locals and temporaries. */
-#define src x2
-#define data1 x3
-#define data2 x4
-#define data2a x5
-#define has_nul1 x6
-#define has_nul2 x7
-#define tmp1 x8
-#define tmp2 x9
-#define tmp3 x10
-#define tmp4 x11
-#define zeroones x12
-#define pos x13
-#define limit_wd x14
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-#define REP8_01 0x0101010101010101
-#define REP8_7f 0x7f7f7f7f7f7f7f7f
-#define REP8_80 0x8080808080808080
-
- .text
- .p2align 6
-.Lstart:
- /* Pre-pad to ensure critical loop begins an icache line. */
- .rep 7
- nop
- .endr
- /* Put this code here to avoid wasting more space with pre-padding. */
-.Lhit_limit:
- mov len, limit
- ret
-
-def_fn strnlen
- cbz limit, .Lhit_limit
- mov zeroones, #REP8_01
- bic src, srcin, #15
- ands tmp1, srcin, #15
- b.ne .Lmisaligned
- /* Calculate the number of full and partial words -1. */
- sub limit_wd, limit, #1 /* Limit != 0, so no underflow. */
- lsr limit_wd, limit_wd, #4 /* Convert to Qwords. */
-
- /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
- (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
- can be done in parallel across the entire word. */
- /* The inner loop deals with two Dwords at a time. This has a
- slightly higher start-up cost, but we should win quite quickly,
- especially on cores with a high number of issue slots per
- cycle, as we get much better parallelism out of the operations. */
-
- /* Start of critial section -- keep to one 64Byte cache line. */
-.Lloop:
- ldp data1, data2, [src], #16
-.Lrealigned:
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- sub tmp3, data2, zeroones
- orr tmp4, data2, #REP8_7f
- bic has_nul1, tmp1, tmp2
- bic has_nul2, tmp3, tmp4
- subs limit_wd, limit_wd, #1
- orr tmp1, has_nul1, has_nul2
- ccmp tmp1, #0, #0, pl /* NZCV = 0000 */
- b.eq .Lloop
- /* End of critical section -- keep to one 64Byte cache line. */
-
- orr tmp1, has_nul1, has_nul2
- cbz tmp1, .Lhit_limit /* No null in final Qword. */
-
- /* We know there's a null in the final Qword. The easiest thing
- to do now is work out the length of the string and return
- MIN (len, limit). */
-
- sub len, src, srcin
- cbz has_nul1, .Lnul_in_data2
-#ifdef __AARCH64EB__
- mov data2, data1
-#endif
- sub len, len, #8
- mov has_nul2, has_nul1
-.Lnul_in_data2:
-#ifdef __AARCH64EB__
- /* For big-endian, carry propagation (if the final byte in the
- string is 0x01) means we cannot use has_nul directly. The
- easiest way to get the correct byte is to byte-swap the data
- and calculate the syndrome a second time. */
- rev data2, data2
- sub tmp1, data2, zeroones
- orr tmp2, data2, #REP8_7f
- bic has_nul2, tmp1, tmp2
-#endif
- sub len, len, #8
- rev has_nul2, has_nul2
- clz pos, has_nul2
- add len, len, pos, lsr #3 /* Bits to bytes. */
- cmp len, limit
- csel len, len, limit, ls /* Return the lower value. */
- ret
-
-.Lmisaligned:
- /* Deal with a partial first word.
- We're doing two things in parallel here;
- 1) Calculate the number of words (but avoiding overflow if
- limit is near ULONG_MAX) - to do this we need to work out
- limit + tmp1 - 1 as a 65-bit value before shifting it;
- 2) Load and mask the initial data words - we force the bytes
- before the ones we are interested in to 0xff - this ensures
- early bytes will not hit any zero detection. */
- sub limit_wd, limit, #1
- neg tmp4, tmp1
- cmp tmp1, #8
-
- and tmp3, limit_wd, #15
- lsr limit_wd, limit_wd, #4
- mov tmp2, #~0
-
- ldp data1, data2, [src], #16
- lsl tmp4, tmp4, #3 /* Bytes beyond alignment -> bits. */
- add tmp3, tmp3, tmp1
-
-#ifdef __AARCH64EB__
- /* Big-endian. Early bytes are at MSB. */
- lsl tmp2, tmp2, tmp4 /* Shift (tmp1 & 63). */
-#else
- /* Little-endian. Early bytes are at LSB. */
- lsr tmp2, tmp2, tmp4 /* Shift (tmp1 & 63). */
-#endif
- add limit_wd, limit_wd, tmp3, lsr #4
-
- orr data1, data1, tmp2
- orr data2a, data2, tmp2
-
- csinv data1, data1, xzr, le
- csel data2, data2, data2a, le
- b .Lrealigned
- .size strnlen, . - .Lstart /* Include pre-padding in size. */
diff --git a/contrib/cortex-strings/src/arm/memchr.S b/contrib/cortex-strings/src/arm/memchr.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/arm/memchr.S
+++ /dev/null
@@ -1,155 +0,0 @@
-/* Copyright (c) 2010-2011, Linaro Limited
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
- * Neither the name of Linaro Limited nor the names of its
- contributors may be used to endorse or promote products derived
- from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- Written by Dave Gilbert <david.gilbert@linaro.org>
-
- This memchr routine is optimised on a Cortex-A9 and should work on
- all ARMv7 processors. It has a fast past for short sizes, and has
- an optimised path for large data sets; the worst case is finding the
- match early in a large data set.
-
- */
-
-@ 2011-02-07 david.gilbert@linaro.org
-@ Extracted from local git a5b438d861
-@ 2011-07-14 david.gilbert@linaro.org
-@ Import endianness fix from local git ea786f1b
-@ 2011-12-07 david.gilbert@linaro.org
-@ Removed unneeded cbz from align loop
-
- .syntax unified
- .arch armv7-a
-
-@ this lets us check a flag in a 00/ff byte easily in either endianness
-#ifdef __ARMEB__
-#define CHARTSTMASK(c) 1<<(31-(c*8))
-#else
-#define CHARTSTMASK(c) 1<<(c*8)
-#endif
- .text
- .thumb
-
-@ ---------------------------------------------------------------------------
- .thumb_func
- .align 2
- .p2align 4,,15
- .global memchr
- .type memchr,%function
-memchr:
- @ r0 = start of memory to scan
- @ r1 = character to look for
- @ r2 = length
- @ returns r0 = pointer to character or NULL if not found
- and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char
-
- cmp r2,#16 @ If it's short don't bother with anything clever
- blt 20f
-
- tst r0, #7 @ If it's already aligned skip the next bit
- beq 10f
-
- @ Work up to an aligned point
-5:
- ldrb r3, [r0],#1
- subs r2, r2, #1
- cmp r3, r1
- beq 50f @ If it matches exit found
- tst r0, #7
- bne 5b @ If not aligned yet then do next byte
-
-10:
- @ At this point, we are aligned, we know we have at least 8 bytes to work with
- push {r4,r5,r6,r7}
- orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes
- orr r1, r1, r1, lsl #16
- bic r4, r2, #7 @ Number of double words to work with
- mvns r7, #0 @ all F's
- movs r3, #0
-
-15:
- ldmia r0!,{r5,r6}
- subs r4, r4, #8
- eor r5,r5, r1 @ Get it so that r5,r6 have 00's where the bytes match the target
- eor r6,r6, r1
- uadd8 r5, r5, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0
- sel r5, r3, r7 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
- uadd8 r6, r6, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0
- sel r6, r5, r7 @ chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
- cbnz r6, 60f
- bne 15b @ (Flags from the subs above) If not run out of bytes then go around again
-
- pop {r4,r5,r6,r7}
- and r1,r1,#0xff @ Get r1 back to a single character from the expansion above
- and r2,r2,#7 @ Leave the count remaining as the number after the double words have been done
-
-20:
- cbz r2, 40f @ 0 length or hit the end already then not found
-
-21: @ Post aligned section, or just a short call
- ldrb r3,[r0],#1
- subs r2,r2,#1
- eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub
- cbz r3, 50f
- bne 21b @ on r2 flags
-
-40:
- movs r0,#0 @ not found
- bx lr
-
-50:
- subs r0,r0,#1 @ found
- bx lr
-
-60: @ We're here because the fast path found a hit - now we have to track down exactly which word it was
- @ r0 points to the start of the double word after the one that was tested
- @ r5 has the 00/ff pattern for the first word, r6 has the chained value
- cmp r5, #0
- itte eq
- moveq r5, r6 @ the end is in the 2nd word
- subeq r0,r0,#3 @ Points to 2nd byte of 2nd word
- subne r0,r0,#7 @ or 2nd byte of 1st word
-
- @ r0 currently points to the 3rd byte of the word containing the hit
- tst r5, # CHARTSTMASK(0) @ 1st character
- bne 61f
- adds r0,r0,#1
- tst r5, # CHARTSTMASK(1) @ 2nd character
- ittt eq
- addeq r0,r0,#1
- tsteq r5, # (3<<15) @ 2nd & 3rd character
- @ If not the 3rd must be the last one
- addeq r0,r0,#1
-
-61:
- pop {r4,r5,r6,r7}
- subs r0,r0,#1
- bx lr
diff --git a/contrib/cortex-strings/src/arm/memcpy.S b/contrib/cortex-strings/src/arm/memcpy.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/arm/memcpy.S
+++ /dev/null
@@ -1,617 +0,0 @@
-/* Copyright (c) 2013, Linaro Limited
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
- * Neither the name of Linaro Limited nor the names of its
- contributors may be used to endorse or promote products derived
- from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- This memcpy routine is optimised for Cortex-A15 cores and takes advantage
- of VFP or NEON when built with the appropriate flags.
-
- Assumptions:
-
- ARMv6 (ARMv7-a if using Neon)
- ARM state
- Unaligned accesses
-
- */
-
- .syntax unified
- /* This implementation requires ARM state. */
- .arm
-
-#ifdef __ARM_NEON__
-
- .fpu neon
- .arch armv7-a
-# define FRAME_SIZE 4
-# define USE_VFP
-# define USE_NEON
-
-#elif !defined (__SOFTFP__)
-
- .arch armv6
- .fpu vfpv2
-# define FRAME_SIZE 32
-# define USE_VFP
-
-#else
- .arch armv6
-# define FRAME_SIZE 32
-
-#endif
-
-/* Old versions of GAS incorrectly implement the NEON align semantics. */
-#ifdef BROKEN_ASM_NEON_ALIGN
-#define ALIGN(addr, align) addr,:align
-#else
-#define ALIGN(addr, align) addr:align
-#endif
-
-#define PC_OFFSET 8 /* PC pipeline compensation. */
-#define INSN_SIZE 4
-
-/* Call parameters. */
-#define dstin r0
-#define src r1
-#define count r2
-
-/* Locals. */
-#define tmp1 r3
-#define dst ip
-#define tmp2 r10
-
-#ifndef USE_NEON
-/* For bulk copies using GP registers. */
-#define A_l r2 /* Call-clobbered. */
-#define A_h r3 /* Call-clobbered. */
-#define B_l r4
-#define B_h r5
-#define C_l r6
-#define C_h r7
-#define D_l r8
-#define D_h r9
-#endif
-
-/* Number of lines ahead to pre-fetch data. If you change this the code
- below will need adjustment to compensate. */
-
-#define prefetch_lines 5
-
-#ifdef USE_VFP
- .macro cpy_line_vfp vreg, base
- vstr \vreg, [dst, #\base]
- vldr \vreg, [src, #\base]
- vstr d0, [dst, #\base + 8]
- vldr d0, [src, #\base + 8]
- vstr d1, [dst, #\base + 16]
- vldr d1, [src, #\base + 16]
- vstr d2, [dst, #\base + 24]
- vldr d2, [src, #\base + 24]
- vstr \vreg, [dst, #\base + 32]
- vldr \vreg, [src, #\base + prefetch_lines * 64 - 32]
- vstr d0, [dst, #\base + 40]
- vldr d0, [src, #\base + 40]
- vstr d1, [dst, #\base + 48]
- vldr d1, [src, #\base + 48]
- vstr d2, [dst, #\base + 56]
- vldr d2, [src, #\base + 56]
- .endm
-
- .macro cpy_tail_vfp vreg, base
- vstr \vreg, [dst, #\base]
- vldr \vreg, [src, #\base]
- vstr d0, [dst, #\base + 8]
- vldr d0, [src, #\base + 8]
- vstr d1, [dst, #\base + 16]
- vldr d1, [src, #\base + 16]
- vstr d2, [dst, #\base + 24]
- vldr d2, [src, #\base + 24]
- vstr \vreg, [dst, #\base + 32]
- vstr d0, [dst, #\base + 40]
- vldr d0, [src, #\base + 40]
- vstr d1, [dst, #\base + 48]
- vldr d1, [src, #\base + 48]
- vstr d2, [dst, #\base + 56]
- vldr d2, [src, #\base + 56]
- .endm
-#endif
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-def_fn memcpy p2align=6
-
- mov dst, dstin /* Preserve dstin, we need to return it. */
- cmp count, #64
- bge .Lcpy_not_short
- /* Deal with small copies quickly by dropping straight into the
- exit block. */
-
-.Ltail63unaligned:
-#ifdef USE_NEON
- and tmp1, count, #0x38
- rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE)
- add pc, pc, tmp1
- vld1.8 {d0}, [src]! /* 14 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 12 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 10 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 8 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 6 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 4 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 2 words to go. */
- vst1.8 {d0}, [dst]!
-
- tst count, #4
- ldrne tmp1, [src], #4
- strne tmp1, [dst], #4
-#else
- /* Copy up to 15 full words of data. May not be aligned. */
- /* Cannot use VFP for unaligned data. */
- and tmp1, count, #0x3c
- add dst, dst, tmp1
- add src, src, tmp1
- rsb tmp1, tmp1, #(60 - PC_OFFSET/2 + INSN_SIZE/2)
- /* Jump directly into the sequence below at the correct offset. */
- add pc, pc, tmp1, lsl #1
-
- ldr tmp1, [src, #-60] /* 15 words to go. */
- str tmp1, [dst, #-60]
-
- ldr tmp1, [src, #-56] /* 14 words to go. */
- str tmp1, [dst, #-56]
- ldr tmp1, [src, #-52]
- str tmp1, [dst, #-52]
-
- ldr tmp1, [src, #-48] /* 12 words to go. */
- str tmp1, [dst, #-48]
- ldr tmp1, [src, #-44]
- str tmp1, [dst, #-44]
-
- ldr tmp1, [src, #-40] /* 10 words to go. */
- str tmp1, [dst, #-40]
- ldr tmp1, [src, #-36]
- str tmp1, [dst, #-36]
-
- ldr tmp1, [src, #-32] /* 8 words to go. */
- str tmp1, [dst, #-32]
- ldr tmp1, [src, #-28]
- str tmp1, [dst, #-28]
-
- ldr tmp1, [src, #-24] /* 6 words to go. */
- str tmp1, [dst, #-24]
- ldr tmp1, [src, #-20]
- str tmp1, [dst, #-20]
-
- ldr tmp1, [src, #-16] /* 4 words to go. */
- str tmp1, [dst, #-16]
- ldr tmp1, [src, #-12]
- str tmp1, [dst, #-12]
-
- ldr tmp1, [src, #-8] /* 2 words to go. */
- str tmp1, [dst, #-8]
- ldr tmp1, [src, #-4]
- str tmp1, [dst, #-4]
-#endif
-
- lsls count, count, #31
- ldrhcs tmp1, [src], #2
- ldrbne src, [src] /* Src is dead, use as a scratch. */
- strhcs tmp1, [dst], #2
- strbne src, [dst]
- bx lr
-
-.Lcpy_not_short:
- /* At least 64 bytes to copy, but don't know the alignment yet. */
- str tmp2, [sp, #-FRAME_SIZE]!
- and tmp2, src, #7
- and tmp1, dst, #7
- cmp tmp1, tmp2
- bne .Lcpy_notaligned
-
-#ifdef USE_VFP
- /* Magic dust alert! Force VFP on Cortex-A9. Experiments show
- that the FP pipeline is much better at streaming loads and
- stores. This is outside the critical loop. */
- vmov.f32 s0, s0
-#endif
-
- /* SRC and DST have the same mutual 64-bit alignment, but we may
- still need to pre-copy some bytes to get to natural alignment.
- We bring SRC and DST into full 64-bit alignment. */
- lsls tmp2, dst, #29
- beq 1f
- rsbs tmp2, tmp2, #0
- sub count, count, tmp2, lsr #29
- ldrmi tmp1, [src], #4
- strmi tmp1, [dst], #4
- lsls tmp2, tmp2, #2
- ldrhcs tmp1, [src], #2
- ldrbne tmp2, [src], #1
- strhcs tmp1, [dst], #2
- strbne tmp2, [dst], #1
-
-1:
- subs tmp2, count, #64 /* Use tmp2 for count. */
- blt .Ltail63aligned
-
- cmp tmp2, #512
- bge .Lcpy_body_long
-
-.Lcpy_body_medium: /* Count in tmp2. */
-#ifdef USE_VFP
-1:
- vldr d0, [src, #0]
- subs tmp2, tmp2, #64
- vldr d1, [src, #8]
- vstr d0, [dst, #0]
- vldr d0, [src, #16]
- vstr d1, [dst, #8]
- vldr d1, [src, #24]
- vstr d0, [dst, #16]
- vldr d0, [src, #32]
- vstr d1, [dst, #24]
- vldr d1, [src, #40]
- vstr d0, [dst, #32]
- vldr d0, [src, #48]
- vstr d1, [dst, #40]
- vldr d1, [src, #56]
- vstr d0, [dst, #48]
- add src, src, #64
- vstr d1, [dst, #56]
- add dst, dst, #64
- bge 1b
- tst tmp2, #0x3f
- beq .Ldone
-
-.Ltail63aligned: /* Count in tmp2. */
- and tmp1, tmp2, #0x38
- add dst, dst, tmp1
- add src, src, tmp1
- rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE)
- add pc, pc, tmp1
-
- vldr d0, [src, #-56] /* 14 words to go. */
- vstr d0, [dst, #-56]
- vldr d0, [src, #-48] /* 12 words to go. */
- vstr d0, [dst, #-48]
- vldr d0, [src, #-40] /* 10 words to go. */
- vstr d0, [dst, #-40]
- vldr d0, [src, #-32] /* 8 words to go. */
- vstr d0, [dst, #-32]
- vldr d0, [src, #-24] /* 6 words to go. */
- vstr d0, [dst, #-24]
- vldr d0, [src, #-16] /* 4 words to go. */
- vstr d0, [dst, #-16]
- vldr d0, [src, #-8] /* 2 words to go. */
- vstr d0, [dst, #-8]
-#else
- sub src, src, #8
- sub dst, dst, #8
-1:
- ldrd A_l, A_h, [src, #8]
- strd A_l, A_h, [dst, #8]
- ldrd A_l, A_h, [src, #16]
- strd A_l, A_h, [dst, #16]
- ldrd A_l, A_h, [src, #24]
- strd A_l, A_h, [dst, #24]
- ldrd A_l, A_h, [src, #32]
- strd A_l, A_h, [dst, #32]
- ldrd A_l, A_h, [src, #40]
- strd A_l, A_h, [dst, #40]
- ldrd A_l, A_h, [src, #48]
- strd A_l, A_h, [dst, #48]
- ldrd A_l, A_h, [src, #56]
- strd A_l, A_h, [dst, #56]
- ldrd A_l, A_h, [src, #64]!
- strd A_l, A_h, [dst, #64]!
- subs tmp2, tmp2, #64
- bge 1b
- tst tmp2, #0x3f
- bne 1f
- ldr tmp2,[sp], #FRAME_SIZE
- bx lr
-1:
- add src, src, #8
- add dst, dst, #8
-
-.Ltail63aligned: /* Count in tmp2. */
- /* Copy up to 7 d-words of data. Similar to Ltail63unaligned, but
- we know that the src and dest are 64-bit aligned so we can use
- LDRD/STRD to improve efficiency. */
- /* TMP2 is now negative, but we don't care about that. The bottom
- six bits still tell us how many bytes are left to copy. */
-
- and tmp1, tmp2, #0x38
- add dst, dst, tmp1
- add src, src, tmp1
- rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE)
- add pc, pc, tmp1
- ldrd A_l, A_h, [src, #-56] /* 14 words to go. */
- strd A_l, A_h, [dst, #-56]
- ldrd A_l, A_h, [src, #-48] /* 12 words to go. */
- strd A_l, A_h, [dst, #-48]
- ldrd A_l, A_h, [src, #-40] /* 10 words to go. */
- strd A_l, A_h, [dst, #-40]
- ldrd A_l, A_h, [src, #-32] /* 8 words to go. */
- strd A_l, A_h, [dst, #-32]
- ldrd A_l, A_h, [src, #-24] /* 6 words to go. */
- strd A_l, A_h, [dst, #-24]
- ldrd A_l, A_h, [src, #-16] /* 4 words to go. */
- strd A_l, A_h, [dst, #-16]
- ldrd A_l, A_h, [src, #-8] /* 2 words to go. */
- strd A_l, A_h, [dst, #-8]
-
-#endif
- tst tmp2, #4
- ldrne tmp1, [src], #4
- strne tmp1, [dst], #4
- lsls tmp2, tmp2, #31 /* Count (tmp2) now dead. */
- ldrhcs tmp1, [src], #2
- ldrbne tmp2, [src]
- strhcs tmp1, [dst], #2
- strbne tmp2, [dst]
-
-.Ldone:
- ldr tmp2, [sp], #FRAME_SIZE
- bx lr
-
-.Lcpy_body_long: /* Count in tmp2. */
-
- /* Long copy. We know that there's at least (prefetch_lines * 64)
- bytes to go. */
-#ifdef USE_VFP
- /* Don't use PLD. Instead, read some data in advance of the current
- copy position into a register. This should act like a PLD
- operation but we won't have to repeat the transfer. */
-
- vldr d3, [src, #0]
- vldr d4, [src, #64]
- vldr d5, [src, #128]
- vldr d6, [src, #192]
- vldr d7, [src, #256]
-
- vldr d0, [src, #8]
- vldr d1, [src, #16]
- vldr d2, [src, #24]
- add src, src, #32
-
- subs tmp2, tmp2, #prefetch_lines * 64 * 2
- blt 2f
-1:
- cpy_line_vfp d3, 0
- cpy_line_vfp d4, 64
- cpy_line_vfp d5, 128
- add dst, dst, #3 * 64
- add src, src, #3 * 64
- cpy_line_vfp d6, 0
- cpy_line_vfp d7, 64
- add dst, dst, #2 * 64
- add src, src, #2 * 64
- subs tmp2, tmp2, #prefetch_lines * 64
- bge 1b
-
-2:
- cpy_tail_vfp d3, 0
- cpy_tail_vfp d4, 64
- cpy_tail_vfp d5, 128
- add src, src, #3 * 64
- add dst, dst, #3 * 64
- cpy_tail_vfp d6, 0
- vstr d7, [dst, #64]
- vldr d7, [src, #64]
- vstr d0, [dst, #64 + 8]
- vldr d0, [src, #64 + 8]
- vstr d1, [dst, #64 + 16]
- vldr d1, [src, #64 + 16]
- vstr d2, [dst, #64 + 24]
- vldr d2, [src, #64 + 24]
- vstr d7, [dst, #64 + 32]
- add src, src, #96
- vstr d0, [dst, #64 + 40]
- vstr d1, [dst, #64 + 48]
- vstr d2, [dst, #64 + 56]
- add dst, dst, #128
- add tmp2, tmp2, #prefetch_lines * 64
- b .Lcpy_body_medium
-#else
- /* Long copy. Use an SMS style loop to maximize the I/O
- bandwidth of the core. We don't have enough spare registers
- to synthesise prefetching, so use PLD operations. */
- /* Pre-bias src and dst. */
- sub src, src, #8
- sub dst, dst, #8
- pld [src, #8]
- pld [src, #72]
- subs tmp2, tmp2, #64
- pld [src, #136]
- ldrd A_l, A_h, [src, #8]
- strd B_l, B_h, [sp, #8]
- ldrd B_l, B_h, [src, #16]
- strd C_l, C_h, [sp, #16]
- ldrd C_l, C_h, [src, #24]
- strd D_l, D_h, [sp, #24]
- pld [src, #200]
- ldrd D_l, D_h, [src, #32]!
- b 1f
- .p2align 6
-2:
- pld [src, #232]
- strd A_l, A_h, [dst, #40]
- ldrd A_l, A_h, [src, #40]
- strd B_l, B_h, [dst, #48]
- ldrd B_l, B_h, [src, #48]
- strd C_l, C_h, [dst, #56]
- ldrd C_l, C_h, [src, #56]
- strd D_l, D_h, [dst, #64]!
- ldrd D_l, D_h, [src, #64]!
- subs tmp2, tmp2, #64
-1:
- strd A_l, A_h, [dst, #8]
- ldrd A_l, A_h, [src, #8]
- strd B_l, B_h, [dst, #16]
- ldrd B_l, B_h, [src, #16]
- strd C_l, C_h, [dst, #24]
- ldrd C_l, C_h, [src, #24]
- strd D_l, D_h, [dst, #32]
- ldrd D_l, D_h, [src, #32]
- bcs 2b
- /* Save the remaining bytes and restore the callee-saved regs. */
- strd A_l, A_h, [dst, #40]
- add src, src, #40
- strd B_l, B_h, [dst, #48]
- ldrd B_l, B_h, [sp, #8]
- strd C_l, C_h, [dst, #56]
- ldrd C_l, C_h, [sp, #16]
- strd D_l, D_h, [dst, #64]
- ldrd D_l, D_h, [sp, #24]
- add dst, dst, #72
- tst tmp2, #0x3f
- bne .Ltail63aligned
- ldr tmp2, [sp], #FRAME_SIZE
- bx lr
-#endif
-
-.Lcpy_notaligned:
- pld [src]
- pld [src, #64]
- /* There's at least 64 bytes to copy, but there is no mutual
- alignment. */
- /* Bring DST to 64-bit alignment. */
- lsls tmp2, dst, #29
- pld [src, #(2 * 64)]
- beq 1f
- rsbs tmp2, tmp2, #0
- sub count, count, tmp2, lsr #29
- ldrmi tmp1, [src], #4
- strmi tmp1, [dst], #4
- lsls tmp2, tmp2, #2
- ldrbne tmp1, [src], #1
- ldrhcs tmp2, [src], #2
- strbne tmp1, [dst], #1
- strhcs tmp2, [dst], #2
-1:
- pld [src, #(3 * 64)]
- subs count, count, #64
- ldrmi tmp2, [sp], #FRAME_SIZE
- bmi .Ltail63unaligned
- pld [src, #(4 * 64)]
-
-#ifdef USE_NEON
- vld1.8 {d0-d3}, [src]!
- vld1.8 {d4-d7}, [src]!
- subs count, count, #64
- bmi 2f
-1:
- pld [src, #(4 * 64)]
- vst1.8 {d0-d3}, [ALIGN (dst, 64)]!
- vld1.8 {d0-d3}, [src]!
- vst1.8 {d4-d7}, [ALIGN (dst, 64)]!
- vld1.8 {d4-d7}, [src]!
- subs count, count, #64
- bpl 1b
-2:
- vst1.8 {d0-d3}, [ALIGN (dst, 64)]!
- vst1.8 {d4-d7}, [ALIGN (dst, 64)]!
- ands count, count, #0x3f
-#else
- /* Use an SMS style loop to maximize the I/O bandwidth. */
- sub src, src, #4
- sub dst, dst, #8
- subs tmp2, count, #64 /* Use tmp2 for count. */
- ldr A_l, [src, #4]
- ldr A_h, [src, #8]
- strd B_l, B_h, [sp, #8]
- ldr B_l, [src, #12]
- ldr B_h, [src, #16]
- strd C_l, C_h, [sp, #16]
- ldr C_l, [src, #20]
- ldr C_h, [src, #24]
- strd D_l, D_h, [sp, #24]
- ldr D_l, [src, #28]
- ldr D_h, [src, #32]!
- b 1f
- .p2align 6
-2:
- pld [src, #(5 * 64) - (32 - 4)]
- strd A_l, A_h, [dst, #40]
- ldr A_l, [src, #36]
- ldr A_h, [src, #40]
- strd B_l, B_h, [dst, #48]
- ldr B_l, [src, #44]
- ldr B_h, [src, #48]
- strd C_l, C_h, [dst, #56]
- ldr C_l, [src, #52]
- ldr C_h, [src, #56]
- strd D_l, D_h, [dst, #64]!
- ldr D_l, [src, #60]
- ldr D_h, [src, #64]!
- subs tmp2, tmp2, #64
-1:
- strd A_l, A_h, [dst, #8]
- ldr A_l, [src, #4]
- ldr A_h, [src, #8]
- strd B_l, B_h, [dst, #16]
- ldr B_l, [src, #12]
- ldr B_h, [src, #16]
- strd C_l, C_h, [dst, #24]
- ldr C_l, [src, #20]
- ldr C_h, [src, #24]
- strd D_l, D_h, [dst, #32]
- ldr D_l, [src, #28]
- ldr D_h, [src, #32]
- bcs 2b
-
- /* Save the remaining bytes and restore the callee-saved regs. */
- strd A_l, A_h, [dst, #40]
- add src, src, #36
- strd B_l, B_h, [dst, #48]
- ldrd B_l, B_h, [sp, #8]
- strd C_l, C_h, [dst, #56]
- ldrd C_l, C_h, [sp, #16]
- strd D_l, D_h, [dst, #64]
- ldrd D_l, D_h, [sp, #24]
- add dst, dst, #72
- ands count, tmp2, #0x3f
-#endif
- ldr tmp2, [sp], #FRAME_SIZE
- bne .Ltail63unaligned
- bx lr
-
- .size memcpy, . - memcpy
diff --git a/contrib/cortex-strings/src/arm/memset.S b/contrib/cortex-strings/src/arm/memset.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/arm/memset.S
+++ /dev/null
@@ -1,122 +0,0 @@
-/* Copyright (c) 2010-2011, Linaro Limited
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
- * Neither the name of Linaro Limited nor the names of its
- contributors may be used to endorse or promote products derived
- from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- Written by Dave Gilbert <david.gilbert@linaro.org>
-
- This memset routine is optimised on a Cortex-A9 and should work on
- all ARMv7 processors.
-
- */
-
- .syntax unified
- .arch armv7-a
-
-@ 2011-08-30 david.gilbert@linaro.org
-@ Extracted from local git 2f11b436
-
-@ this lets us check a flag in a 00/ff byte easily in either endianness
-#ifdef __ARMEB__
-#define CHARTSTMASK(c) 1<<(31-(c*8))
-#else
-#define CHARTSTMASK(c) 1<<(c*8)
-#endif
- .text
- .thumb
-
-@ ---------------------------------------------------------------------------
- .thumb_func
- .align 2
- .p2align 4,,15
- .global memset
- .type memset,%function
-memset:
- @ r0 = address
- @ r1 = character
- @ r2 = count
- @ returns original address in r0
-
- mov r3, r0 @ Leave r0 alone
- cbz r2, 10f @ Exit if 0 length
-
- tst r0, #7
- beq 2f @ Already aligned
-
- @ Ok, so we're misaligned here
-1:
- strb r1, [r3], #1
- subs r2,r2,#1
- tst r3, #7
- cbz r2, 10f @ Exit if we hit the end
- bne 1b @ go round again if still misaligned
-
-2:
- @ OK, so we're aligned
- push {r4,r5,r6,r7}
- bics r4, r2, #15 @ if less than 16 bytes then need to finish it off
- beq 5f
-
-3:
- @ POSIX says that ch is cast to an unsigned char. A uxtb is one
- @ byte and takes two cycles, where an AND is four bytes but one
- @ cycle.
- and r1, #0xFF
- orr r1, r1, r1, lsl#8 @ Same character into all bytes
- orr r1, r1, r1, lsl#16
- mov r5,r1
- mov r6,r1
- mov r7,r1
-
-4:
- subs r4,r4,#16
- stmia r3!,{r1,r5,r6,r7}
- bne 4b
- and r2,r2,#15
-
- @ At this point we're still aligned and we have upto align-1 bytes left to right
- @ we can avoid some of the byte-at-a time now by testing for some big chunks
- tst r2,#8
- itt ne
- subne r2,r2,#8
- stmiane r3!,{r1,r5}
-
-5:
- pop {r4,r5,r6,r7}
- cbz r2, 10f
-
- @ Got to do any last < alignment bytes
-6:
- subs r2,r2,#1
- strb r1,[r3],#1
- bne 6b
-
-10:
- bx lr @ goodbye
diff --git a/contrib/cortex-strings/src/arm/strchr.S b/contrib/cortex-strings/src/arm/strchr.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/arm/strchr.S
+++ /dev/null
@@ -1,80 +0,0 @@
-/* Copyright (c) 2010-2011, Linaro Limited
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
- * Neither the name of Linaro Limited nor the names of its
- contributors may be used to endorse or promote products derived
- from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- Written by Dave Gilbert <david.gilbert@linaro.org>
-
- A very simple strchr routine, from benchmarks on A9 it's a bit faster than
- the current version in eglibc (2.12.1-0ubuntu14 package)
- I don't think doing a word at a time version is worth it since a lot
- of strchr cases are very short anyway.
-
- */
-
-@ 2011-02-07 david.gilbert@linaro.org
-@ Extracted from local git a5b438d861
-
- .syntax unified
- .arch armv7-a
-
- .text
- .thumb
-
-@ ---------------------------------------------------------------------------
-
- .thumb_func
- .align 2
- .p2align 4,,15
- .global strchr
- .type strchr,%function
-strchr:
- @ r0 = start of string
- @ r1 = character to match
- @ returns NULL for no match, or a pointer to the match
- and r1,r1, #255
-
-1:
- ldrb r2,[r0],#1
- cmp r2,r1
- cbz r2,10f
- bne 1b
-
- @ We're here if it matched
-5:
- subs r0,r0,#1
- bx lr
-
-10:
- @ We're here if we ran off the end
- cmp r1, #0 @ Corner case - you're allowed to search for the nil and get a pointer to it
- beq 5b @ A bit messy, if it's common we should branch at the start to a special loop
- mov r0,#0
- bx lr
diff --git a/contrib/cortex-strings/src/arm/strcmp.S b/contrib/cortex-strings/src/arm/strcmp.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/arm/strcmp.S
+++ /dev/null
@@ -1,500 +0,0 @@
-/*
- * Copyright (c) 2012-2014 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- * products derived from this software without specific prior written
- * permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* Implementation of strcmp for ARMv7 when DSP instructions are
- available. Use ldrd to support wider loads, provided the data
- is sufficiently aligned. Use saturating arithmetic to optimize
- the compares. */
-
-/* Build Options:
- STRCMP_NO_PRECHECK: Don't run a quick pre-check of the first
- byte in the string. If comparing completely random strings
- the pre-check will save time, since there is a very high
- probability of a mismatch in the first character: we save
- significant overhead if this is the common case. However,
- if strings are likely to be identical (eg because we're
- verifying a hit in a hash table), then this check is largely
- redundant. */
-
-#define STRCMP_NO_PRECHECK 0
-
- /* This version uses Thumb-2 code. */
- .thumb
- .syntax unified
-
-#ifdef __ARM_BIG_ENDIAN
-#define S2LO lsl
-#define S2LOEQ lsleq
-#define S2HI lsr
-#define MSB 0x000000ff
-#define LSB 0xff000000
-#define BYTE0_OFFSET 24
-#define BYTE1_OFFSET 16
-#define BYTE2_OFFSET 8
-#define BYTE3_OFFSET 0
-#else /* not __ARM_BIG_ENDIAN */
-#define S2LO lsr
-#define S2LOEQ lsreq
-#define S2HI lsl
-#define BYTE0_OFFSET 0
-#define BYTE1_OFFSET 8
-#define BYTE2_OFFSET 16
-#define BYTE3_OFFSET 24
-#define MSB 0xff000000
-#define LSB 0x000000ff
-#endif /* not __ARM_BIG_ENDIAN */
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-/* Parameters and result. */
-#define src1 r0
-#define src2 r1
-#define result r0 /* Overlaps src1. */
-
-/* Internal variables. */
-#define tmp1 r4
-#define tmp2 r5
-#define const_m1 r12
-
-/* Additional internal variables for 64-bit aligned data. */
-#define data1a r2
-#define data1b r3
-#define data2a r6
-#define data2b r7
-#define syndrome_a tmp1
-#define syndrome_b tmp2
-
-/* Additional internal variables for 32-bit aligned data. */
-#define data1 r2
-#define data2 r3
-#define syndrome tmp2
-
-
- /* Macro to compute and return the result value for word-aligned
- cases. */
- .macro strcmp_epilogue_aligned synd d1 d2 restore_r6
-#ifdef __ARM_BIG_ENDIAN
- /* If data1 contains a zero byte, then syndrome will contain a 1 in
- bit 7 of that byte. Otherwise, the highest set bit in the
- syndrome will highlight the first different bit. It is therefore
- sufficient to extract the eight bits starting with the syndrome
- bit. */
- clz tmp1, \synd
- lsl r1, \d2, tmp1
- .if \restore_r6
- ldrd r6, r7, [sp, #8]
- .endif
- .cfi_restore 6
- .cfi_restore 7
- lsl \d1, \d1, tmp1
- .cfi_remember_state
- lsr result, \d1, #24
- ldrd r4, r5, [sp], #16
- .cfi_restore 4
- .cfi_restore 5
- sub result, result, r1, lsr #24
- bx lr
-#else
- /* To use the big-endian trick we'd have to reverse all three words.
- that's slower than this approach. */
- rev \synd, \synd
- clz tmp1, \synd
- bic tmp1, tmp1, #7
- lsr r1, \d2, tmp1
- .cfi_remember_state
- .if \restore_r6
- ldrd r6, r7, [sp, #8]
- .endif
- .cfi_restore 6
- .cfi_restore 7
- lsr \d1, \d1, tmp1
- and result, \d1, #255
- and r1, r1, #255
- ldrd r4, r5, [sp], #16
- .cfi_restore 4
- .cfi_restore 5
- sub result, result, r1
-
- bx lr
-#endif
- .endm
-
- .text
- .p2align 5
-.Lstrcmp_start_addr:
-#if STRCMP_NO_PRECHECK == 0
-.Lfastpath_exit:
- sub r0, r2, r3
- bx lr
- nop
-#endif
-def_fn strcmp
-#if STRCMP_NO_PRECHECK == 0
- ldrb r2, [src1]
- ldrb r3, [src2]
- cmp r2, #1
- it cs
- cmpcs r2, r3
- bne .Lfastpath_exit
-#endif
- .cfi_startproc
- strd r4, r5, [sp, #-16]!
- .cfi_def_cfa_offset 16
- .cfi_offset 4, -16
- .cfi_offset 5, -12
- orr tmp1, src1, src2
- strd r6, r7, [sp, #8]
- .cfi_offset 6, -8
- .cfi_offset 7, -4
- mvn const_m1, #0
- lsl r2, tmp1, #29
- cbz r2, .Lloop_aligned8
-
-.Lnot_aligned:
- eor tmp1, src1, src2
- tst tmp1, #7
- bne .Lmisaligned8
-
- /* Deal with mutual misalignment by aligning downwards and then
- masking off the unwanted loaded data to prevent a difference. */
- and tmp1, src1, #7
- bic src1, src1, #7
- and tmp2, tmp1, #3
- bic src2, src2, #7
- lsl tmp2, tmp2, #3 /* Bytes -> bits. */
- ldrd data1a, data1b, [src1], #16
- tst tmp1, #4
- ldrd data2a, data2b, [src2], #16
- /* In thumb code we can't use MVN with a register shift, but
- we do have ORN. */
- S2HI tmp1, const_m1, tmp2
- orn data1a, data1a, tmp1
- orn data2a, data2a, tmp1
- beq .Lstart_realigned8
- orn data1b, data1b, tmp1
- mov data1a, const_m1
- orn data2b, data2b, tmp1
- mov data2a, const_m1
- b .Lstart_realigned8
-
- /* Unwind the inner loop by a factor of 2, giving 16 bytes per
- pass. */
- .p2align 5,,12 /* Don't start in the tail bytes of a cache line. */
- .p2align 2 /* Always word aligned. */
-.Lloop_aligned8:
- ldrd data1a, data1b, [src1], #16
- ldrd data2a, data2b, [src2], #16
-.Lstart_realigned8:
- uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */
- eor syndrome_a, data1a, data2a
- sel syndrome_a, syndrome_a, const_m1
- cbnz syndrome_a, .Ldiff_in_a
- uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */
- eor syndrome_b, data1b, data2b
- sel syndrome_b, syndrome_b, const_m1
- cbnz syndrome_b, .Ldiff_in_b
-
- ldrd data1a, data1b, [src1, #-8]
- ldrd data2a, data2b, [src2, #-8]
- uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */
- eor syndrome_a, data1a, data2a
- sel syndrome_a, syndrome_a, const_m1
- uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */
- eor syndrome_b, data1b, data2b
- sel syndrome_b, syndrome_b, const_m1
- /* Can't use CBZ for backwards branch. */
- orrs syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */
- beq .Lloop_aligned8
-
-.Ldiff_found:
- cbnz syndrome_a, .Ldiff_in_a
-
-.Ldiff_in_b:
- strcmp_epilogue_aligned syndrome_b, data1b, data2b 1
-
-.Ldiff_in_a:
- .cfi_restore_state
- strcmp_epilogue_aligned syndrome_a, data1a, data2a 1
-
- .cfi_restore_state
-.Lmisaligned8:
- tst tmp1, #3
- bne .Lmisaligned4
- ands tmp1, src1, #3
- bne .Lmutual_align4
-
- /* Unrolled by a factor of 2, to reduce the number of post-increment
- operations. */
-.Lloop_aligned4:
- ldr data1, [src1], #8
- ldr data2, [src2], #8
-.Lstart_realigned4:
- uadd8 syndrome, data1, const_m1 /* Only need GE bits. */
- eor syndrome, data1, data2
- sel syndrome, syndrome, const_m1
- cbnz syndrome, .Laligned4_done
- ldr data1, [src1, #-4]
- ldr data2, [src2, #-4]
- uadd8 syndrome, data1, const_m1
- eor syndrome, data1, data2
- sel syndrome, syndrome, const_m1
- cmp syndrome, #0
- beq .Lloop_aligned4
-
-.Laligned4_done:
- strcmp_epilogue_aligned syndrome, data1, data2, 0
-
-.Lmutual_align4:
- .cfi_restore_state
- /* Deal with mutual misalignment by aligning downwards and then
- masking off the unwanted loaded data to prevent a difference. */
- lsl tmp1, tmp1, #3 /* Bytes -> bits. */
- bic src1, src1, #3
- ldr data1, [src1], #8
- bic src2, src2, #3
- ldr data2, [src2], #8
-
- /* In thumb code we can't use MVN with a register shift, but
- we do have ORN. */
- S2HI tmp1, const_m1, tmp1
- orn data1, data1, tmp1
- orn data2, data2, tmp1
- b .Lstart_realigned4
-
-.Lmisaligned4:
- ands tmp1, src1, #3
- beq .Lsrc1_aligned
- sub src2, src2, tmp1
- bic src1, src1, #3
- lsls tmp1, tmp1, #31
- ldr data1, [src1], #4
- beq .Laligned_m2
- bcs .Laligned_m1
-
-#if STRCMP_NO_PRECHECK == 1
- ldrb data2, [src2, #1]
- uxtb tmp1, data1, ror #BYTE1_OFFSET
- subs tmp1, tmp1, data2
- bne .Lmisaligned_exit
- cbz data2, .Lmisaligned_exit
-
-.Laligned_m2:
- ldrb data2, [src2, #2]
- uxtb tmp1, data1, ror #BYTE2_OFFSET
- subs tmp1, tmp1, data2
- bne .Lmisaligned_exit
- cbz data2, .Lmisaligned_exit
-
-.Laligned_m1:
- ldrb data2, [src2, #3]
- uxtb tmp1, data1, ror #BYTE3_OFFSET
- subs tmp1, tmp1, data2
- bne .Lmisaligned_exit
- add src2, src2, #4
- cbnz data2, .Lsrc1_aligned
-#else /* STRCMP_NO_PRECHECK */
- /* If we've done the pre-check, then we don't need to check the
- first byte again here. */
- ldrb data2, [src2, #2]
- uxtb tmp1, data1, ror #BYTE2_OFFSET
- subs tmp1, tmp1, data2
- bne .Lmisaligned_exit
- cbz data2, .Lmisaligned_exit
-
-.Laligned_m2:
- ldrb data2, [src2, #3]
- uxtb tmp1, data1, ror #BYTE3_OFFSET
- subs tmp1, tmp1, data2
- bne .Lmisaligned_exit
- cbnz data2, .Laligned_m1
-#endif
-
-.Lmisaligned_exit:
- .cfi_remember_state
- mov result, tmp1
- ldr r4, [sp], #16
- .cfi_restore 4
- bx lr
-
-#if STRCMP_NO_PRECHECK == 0
-.Laligned_m1:
- add src2, src2, #4
-#endif
-.Lsrc1_aligned:
- .cfi_restore_state
- /* src1 is word aligned, but src2 has no common alignment
- with it. */
- ldr data1, [src1], #4
- lsls tmp1, src2, #31 /* C=src2[1], Z=src2[0]. */
-
- bic src2, src2, #3
- ldr data2, [src2], #4
- bhi .Loverlap1 /* C=1, Z=0 => src2[1:0] = 0b11. */
- bcs .Loverlap2 /* C=1, Z=1 => src2[1:0] = 0b10. */
-
- /* (overlap3) C=0, Z=0 => src2[1:0] = 0b01. */
-.Loverlap3:
- bic tmp1, data1, #MSB
- uadd8 syndrome, data1, const_m1
- eors syndrome, tmp1, data2, S2LO #8
- sel syndrome, syndrome, const_m1
- bne 4f
- cbnz syndrome, 5f
- ldr data2, [src2], #4
- eor tmp1, tmp1, data1
- cmp tmp1, data2, S2HI #24
- bne 6f
- ldr data1, [src1], #4
- b .Loverlap3
-4:
- S2LO data2, data2, #8
- b .Lstrcmp_tail
-
-5:
- bics syndrome, syndrome, #MSB
- bne .Lstrcmp_done_equal
-
- /* We can only get here if the MSB of data1 contains 0, so
- fast-path the exit. */
- ldrb result, [src2]
- .cfi_remember_state
- ldrd r4, r5, [sp], #16
- .cfi_restore 4
- .cfi_restore 5
- /* R6/7 Not used in this sequence. */
- .cfi_restore 6
- .cfi_restore 7
- neg result, result
- bx lr
-
-6:
- .cfi_restore_state
- S2LO data1, data1, #24
- and data2, data2, #LSB
- b .Lstrcmp_tail
-
- .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */
-.Loverlap2:
- and tmp1, data1, const_m1, S2LO #16
- uadd8 syndrome, data1, const_m1
- eors syndrome, tmp1, data2, S2LO #16
- sel syndrome, syndrome, const_m1
- bne 4f
- cbnz syndrome, 5f
- ldr data2, [src2], #4
- eor tmp1, tmp1, data1
- cmp tmp1, data2, S2HI #16
- bne 6f
- ldr data1, [src1], #4
- b .Loverlap2
-4:
- S2LO data2, data2, #16
- b .Lstrcmp_tail
-5:
- ands syndrome, syndrome, const_m1, S2LO #16
- bne .Lstrcmp_done_equal
-
- ldrh data2, [src2]
- S2LO data1, data1, #16
-#ifdef __ARM_BIG_ENDIAN
- lsl data2, data2, #16
-#endif
- b .Lstrcmp_tail
-
-6:
- S2LO data1, data1, #16
- and data2, data2, const_m1, S2LO #16
- b .Lstrcmp_tail
-
- .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */
-.Loverlap1:
- and tmp1, data1, #LSB
- uadd8 syndrome, data1, const_m1
- eors syndrome, tmp1, data2, S2LO #24
- sel syndrome, syndrome, const_m1
- bne 4f
- cbnz syndrome, 5f
- ldr data2, [src2], #4
- eor tmp1, tmp1, data1
- cmp tmp1, data2, S2HI #8
- bne 6f
- ldr data1, [src1], #4
- b .Loverlap1
-4:
- S2LO data2, data2, #24
- b .Lstrcmp_tail
-5:
- tst syndrome, #LSB
- bne .Lstrcmp_done_equal
- ldr data2, [src2]
-6:
- S2LO data1, data1, #8
- bic data2, data2, #MSB
- b .Lstrcmp_tail
-
-.Lstrcmp_done_equal:
- mov result, #0
- .cfi_remember_state
- ldrd r4, r5, [sp], #16
- .cfi_restore 4
- .cfi_restore 5
- /* R6/7 not used in this sequence. */
- .cfi_restore 6
- .cfi_restore 7
- bx lr
-
-.Lstrcmp_tail:
- .cfi_restore_state
-#ifndef __ARM_BIG_ENDIAN
- rev data1, data1
- rev data2, data2
- /* Now everything looks big-endian... */
-#endif
- uadd8 tmp1, data1, const_m1
- eor tmp1, data1, data2
- sel syndrome, tmp1, const_m1
- clz tmp1, syndrome
- lsl data1, data1, tmp1
- lsl data2, data2, tmp1
- lsr result, data1, #24
- ldrd r4, r5, [sp], #16
- .cfi_restore 4
- .cfi_restore 5
- /* R6/7 not used in this sequence. */
- .cfi_restore 6
- .cfi_restore 7
- sub result, result, data2, lsr #24
- bx lr
- .cfi_endproc
- .size strcmp, . - .Lstrcmp_start_addr
diff --git a/contrib/cortex-strings/src/thumb-2/strcpy.c b/contrib/cortex-strings/src/thumb-2/strcpy.c
deleted file mode 100644
--- a/contrib/cortex-strings/src/thumb-2/strcpy.c
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright (c) 2008 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- * products derived from this software without specific prior written
- * permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* For GLIBC:
-#include <string.h>
-#include <memcopy.h>
-
-#undef strcmp
-*/
-
-#ifdef __thumb2__
-#define magic1(REG) "#0x01010101"
-#define magic2(REG) "#0x80808080"
-#else
-#define magic1(REG) #REG
-#define magic2(REG) #REG ", lsl #7"
-#endif
-
-char* __attribute__((naked))
-strcpy (char* dst, const char* src)
-{
- asm (
-#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
- (defined (__thumb__) && !defined (__thumb2__)))
- "pld [r1, #0]\n\t"
- "eor r2, r0, r1\n\t"
- "mov ip, r0\n\t"
- "tst r2, #3\n\t"
- "bne 4f\n\t"
- "tst r1, #3\n\t"
- "bne 3f\n"
- "5:\n\t"
-#ifndef __thumb2__
- "str r5, [sp, #-4]!\n\t"
- "mov r5, #0x01\n\t"
- "orr r5, r5, r5, lsl #8\n\t"
- "orr r5, r5, r5, lsl #16\n\t"
-#endif
-
- "str r4, [sp, #-4]!\n\t"
- "tst r1, #4\n\t"
- "ldr r3, [r1], #4\n\t"
- "beq 2f\n\t"
- "sub r2, r3, "magic1(r5)"\n\t"
- "bics r2, r2, r3\n\t"
- "tst r2, "magic2(r5)"\n\t"
- "itt eq\n\t"
- "streq r3, [ip], #4\n\t"
- "ldreq r3, [r1], #4\n"
- "bne 1f\n\t"
- /* Inner loop. We now know that r1 is 64-bit aligned, so we
- can safely fetch up to two words. This allows us to avoid
- load stalls. */
- ".p2align 2\n"
- "2:\n\t"
- "pld [r1, #8]\n\t"
- "ldr r4, [r1], #4\n\t"
- "sub r2, r3, "magic1(r5)"\n\t"
- "bics r2, r2, r3\n\t"
- "tst r2, "magic2(r5)"\n\t"
- "sub r2, r4, "magic1(r5)"\n\t"
- "bne 1f\n\t"
- "str r3, [ip], #4\n\t"
- "bics r2, r2, r4\n\t"
- "tst r2, "magic2(r5)"\n\t"
- "itt eq\n\t"
- "ldreq r3, [r1], #4\n\t"
- "streq r4, [ip], #4\n\t"
- "beq 2b\n\t"
- "mov r3, r4\n"
- "1:\n\t"
-#ifdef __ARMEB__
- "rors r3, r3, #24\n\t"
-#endif
- "strb r3, [ip], #1\n\t"
- "tst r3, #0xff\n\t"
-#ifdef __ARMEL__
- "ror r3, r3, #8\n\t"
-#endif
- "bne 1b\n\t"
- "ldr r4, [sp], #4\n\t"
-#ifndef __thumb2__
- "ldr r5, [sp], #4\n\t"
-#endif
- "BX LR\n"
-
- /* Strings have the same offset from word alignment, but it's
- not zero. */
- "3:\n\t"
- "tst r1, #1\n\t"
- "beq 1f\n\t"
- "ldrb r2, [r1], #1\n\t"
- "strb r2, [ip], #1\n\t"
- "cmp r2, #0\n\t"
- "it eq\n"
- "BXEQ LR\n"
- "1:\n\t"
- "tst r1, #2\n\t"
- "beq 5b\n\t"
- "ldrh r2, [r1], #2\n\t"
-#ifdef __ARMEB__
- "tst r2, #0xff00\n\t"
- "iteet ne\n\t"
- "strneh r2, [ip], #2\n\t"
- "lsreq r2, r2, #8\n\t"
- "streqb r2, [ip]\n\t"
- "tstne r2, #0xff\n\t"
-#else
- "tst r2, #0xff\n\t"
- "itet ne\n\t"
- "strneh r2, [ip], #2\n\t"
- "streqb r2, [ip]\n\t"
- "tstne r2, #0xff00\n\t"
-#endif
- "bne 5b\n\t"
- "BX LR\n"
-
- /* src and dst do not have a common word-alignement. Fall back to
- byte copying. */
- "4:\n\t"
- "ldrb r2, [r1], #1\n\t"
- "strb r2, [ip], #1\n\t"
- "cmp r2, #0\n\t"
- "bne 4b\n\t"
- "BX LR"
-
-#elif !defined (__thumb__) || defined (__thumb2__)
- "mov r3, r0\n\t"
- "1:\n\t"
- "ldrb r2, [r1], #1\n\t"
- "strb r2, [r3], #1\n\t"
- "cmp r2, #0\n\t"
- "bne 1b\n\t"
- "BX LR"
-#else
- "mov r3, r0\n\t"
- "1:\n\t"
- "ldrb r2, [r1]\n\t"
- "add r1, r1, #1\n\t"
- "strb r2, [r3]\n\t"
- "add r3, r3, #1\n\t"
- "cmp r2, #0\n\t"
- "bne 1b\n\t"
- "BX LR"
-#endif
- );
-}
-/* For GLIBC: libc_hidden_builtin_def (strcpy) */
diff --git a/contrib/cortex-strings/src/thumb-2/strlen.S b/contrib/cortex-strings/src/thumb-2/strlen.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/thumb-2/strlen.S
+++ /dev/null
@@ -1,150 +0,0 @@
-/* Copyright (c) 2010-2011,2013 Linaro Limited
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
- * Neither the name of Linaro Limited nor the names of its
- contributors may be used to endorse or promote products derived
- from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- Assumes:
- ARMv6T2, AArch32
-
- */
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-#ifdef __ARMEB__
-#define S2LO lsl
-#define S2HI lsr
-#else
-#define S2LO lsr
-#define S2HI lsl
-#endif
-
- /* This code requires Thumb. */
- .thumb
- .syntax unified
-
-/* Parameters and result. */
-#define srcin r0
-#define result r0
-
-/* Internal variables. */
-#define src r1
-#define data1a r2
-#define data1b r3
-#define const_m1 r12
-#define const_0 r4
-#define tmp1 r4 /* Overlaps const_0 */
-#define tmp2 r5
-
-def_fn strlen p2align=6
- pld [srcin, #0]
- strd r4, r5, [sp, #-8]!
- bic src, srcin, #7
- mvn const_m1, #0
- ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */
- pld [src, #32]
- bne.w .Lmisaligned8
- mov const_0, #0
- mov result, #-8
-.Lloop_aligned:
- /* Bytes 0-7. */
- ldrd data1a, data1b, [src]
- pld [src, #64]
- add result, result, #8
-.Lstart_realigned:
- uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
- sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
- uadd8 data1b, data1b, const_m1
- sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
- cbnz data1b, .Lnull_found
-
- /* Bytes 8-15. */
- ldrd data1a, data1b, [src, #8]
- uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
- add result, result, #8
- sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
- uadd8 data1b, data1b, const_m1
- sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
- cbnz data1b, .Lnull_found
-
- /* Bytes 16-23. */
- ldrd data1a, data1b, [src, #16]
- uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
- add result, result, #8
- sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
- uadd8 data1b, data1b, const_m1
- sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
- cbnz data1b, .Lnull_found
-
- /* Bytes 24-31. */
- ldrd data1a, data1b, [src, #24]
- add src, src, #32
- uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
- add result, result, #8
- sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
- uadd8 data1b, data1b, const_m1
- sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
- cmp data1b, #0
- beq .Lloop_aligned
-
-.Lnull_found:
- cmp data1a, #0
- itt eq
- addeq result, result, #4
- moveq data1a, data1b
-#ifndef __ARMEB__
- rev data1a, data1a
-#endif
- clz data1a, data1a
- ldrd r4, r5, [sp], #8
- add result, result, data1a, lsr #3 /* Bits -> Bytes. */
- bx lr
-
-.Lmisaligned8:
- ldrd data1a, data1b, [src]
- and tmp2, tmp1, #3
- rsb result, tmp1, #0
- lsl tmp2, tmp2, #3 /* Bytes -> bits. */
- tst tmp1, #4
- pld [src, #64]
- S2HI tmp2, const_m1, tmp2
- orn data1a, data1a, tmp2
- itt ne
- ornne data1b, data1b, tmp2
- movne data1a, const_m1
- mov const_0, #0
- b .Lstart_realigned
- .size strlen, . - strlen
-
diff --git a/contrib/cortex-strings/src/thumb/aeabi_idiv.S b/contrib/cortex-strings/src/thumb/aeabi_idiv.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/thumb/aeabi_idiv.S
+++ /dev/null
@@ -1,318 +0,0 @@
-/*
- * Copyright (c) 2014 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- * products derived from this software without specific prior written
- * permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* An executable stack is *not* required for these functions. */
-
-.section .note.GNU-stack,"",%progbits
-.previous
-.eabi_attribute 25, 1
-
-/* ANSI concatenation macros. */
-
-#define CONCAT1(a, b) CONCAT2(a, b)
-#define CONCAT2(a, b) a ## b
-
-/* Use the right prefix for global labels. */
-
-#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
-
-#define TYPE(x) .type SYM(x),function
-#define SIZE(x) .size SYM(x), . - SYM(x)
-#define LSYM(x) .x
-
-.macro cfi_start start_label, end_label
- .pushsection .debug_frame
-LSYM(Lstart_frame):
- .4byte LSYM(Lend_cie) - LSYM(Lstart_cie)
-LSYM(Lstart_cie):
- .4byte 0xffffffff
- .byte 0x1
- .ascii "\0"
- .uleb128 0x1
- .sleb128 -4
- .byte 0xe
- .byte 0xc
- .uleb128 0xd
- .uleb128 0x0
-
- .align 2
-LSYM(Lend_cie):
- .4byte LSYM(Lend_fde)-LSYM(Lstart_fde)
-LSYM(Lstart_fde):
- .4byte LSYM(Lstart_frame)
- .4byte \start_label
- .4byte \end_label-\start_label
- .popsection
-.endm
-
-.macro cfi_end end_label
- .pushsection .debug_frame
- .align 2
-LSYM(Lend_fde):
- .popsection
-\end_label:
-.endm
-
-.macro THUMB_LDIV0 name signed
- push {r0, lr}
- movs r0, #0
- bl SYM(__aeabi_idiv0)
- pop {r1, pc}
-.endm
-
-.macro FUNC_END name
- SIZE (__\name)
-.endm
-
-.macro DIV_FUNC_END name signed
- cfi_start __\name, LSYM(Lend_div0)
-LSYM(Ldiv0):
- THUMB_LDIV0 \name \signed
- cfi_end LSYM(Lend_div0)
- FUNC_END \name
-.endm
-
-.macro THUMB_FUNC_START name
- .globl SYM (\name)
- TYPE (\name)
- .thumb_func
-SYM (\name):
-.endm
-
-.macro FUNC_START name
- .text
- .globl SYM (__\name)
- TYPE (__\name)
- .align 0
- .force_thumb
- .thumb_func
- .syntax unified
-SYM (__\name):
-.endm
-
-.macro FUNC_ALIAS new old
- .globl SYM (__\new)
- .thumb_set SYM (__\new), SYM (__\old)
-.endm
-
-/* Register aliases. */
-work .req r4
-dividend .req r0
-divisor .req r1
-overdone .req r2
-result .req r2
-curbit .req r3
-
-/* ------------------------------------------------------------------------ */
-/* Bodies of the division and modulo routines. */
-/* ------------------------------------------------------------------------ */
-.macro BranchToDiv n, label
- lsrs curbit, dividend, \n
- cmp curbit, divisor
- bcc \label
-.endm
-
-.macro DoDiv n
- lsrs curbit, dividend, \n
- cmp curbit, divisor
- bcc 1f
- lsls curbit, divisor, \n
- subs dividend, dividend, curbit
-
-1: adcs result, result
-.endm
-
-.macro THUMB1_Div_Positive
- movs result, #0
- BranchToDiv #1, LSYM(Lthumb1_div1)
- BranchToDiv #4, LSYM(Lthumb1_div4)
- BranchToDiv #8, LSYM(Lthumb1_div8)
- BranchToDiv #12, LSYM(Lthumb1_div12)
- BranchToDiv #16, LSYM(Lthumb1_div16)
-LSYM(Lthumb1_div_large_positive):
- movs result, #0xff
- lsls divisor, divisor, #8
- rev result, result
- lsrs curbit, dividend, #16
- cmp curbit, divisor
- bcc 1f
- asrs result, #8
- lsls divisor, divisor, #8
- beq LSYM(Ldivbyzero_waypoint)
-
-1: lsrs curbit, dividend, #12
- cmp curbit, divisor
- bcc LSYM(Lthumb1_div12)
- b LSYM(Lthumb1_div16)
-LSYM(Lthumb1_div_loop):
- lsrs divisor, divisor, #8
-LSYM(Lthumb1_div16):
- Dodiv #15
- Dodiv #14
- Dodiv #13
- Dodiv #12
-LSYM(Lthumb1_div12):
- Dodiv #11
- Dodiv #10
- Dodiv #9
- Dodiv #8
- bcs LSYM(Lthumb1_div_loop)
-LSYM(Lthumb1_div8):
- Dodiv #7
- Dodiv #6
- Dodiv #5
-LSYM(Lthumb1_div5):
- Dodiv #4
-LSYM(Lthumb1_div4):
- Dodiv #3
-LSYM(Lthumb1_div3):
- Dodiv #2
-LSYM(Lthumb1_div2):
- Dodiv #1
-LSYM(Lthumb1_div1):
- subs divisor, dividend, divisor
- bcs 1f
- mov divisor, dividend
-
-1: adcs result, result
- mov dividend, result
- bx lr
-
-LSYM(Ldivbyzero_waypoint):
- b LSYM(Ldiv0)
-.endm
-
-.macro THUMB1_Div_Negative
- lsrs result, divisor, #31
- beq 1f
- rsbs divisor, divisor, #0
-
-1: asrs curbit, dividend, #32
- bcc 2f
- rsbs dividend, dividend, #0
-
-2: eors curbit, result
- movs result, #0
- mov ip, curbit
- BranchToDiv #4, LSYM(Lthumb1_div_negative4)
- BranchToDiv #8, LSYM(Lthumb1_div_negative8)
-LSYM(Lthumb1_div_large):
- movs result, #0xfc
- lsls divisor, divisor, #6
- rev result, result
- lsrs curbit, dividend, #8
- cmp curbit, divisor
- bcc LSYM(Lthumb1_div_negative8)
-
- lsls divisor, divisor, #6
- asrs result, result, #6
- cmp curbit, divisor
- bcc LSYM(Lthumb1_div_negative8)
-
- lsls divisor, divisor, #6
- asrs result, result, #6
- cmp curbit, divisor
- bcc LSYM(Lthumb1_div_negative8)
-
- lsls divisor, divisor, #6
- beq LSYM(Ldivbyzero_negative)
- asrs result, result, #6
- b LSYM(Lthumb1_div_negative8)
-LSYM(Lthumb1_div_negative_loop):
- lsrs divisor, divisor, #6
-LSYM(Lthumb1_div_negative8):
- DoDiv #7
- DoDiv #6
- DoDiv #5
- DoDiv #4
-LSYM(Lthumb1_div_negative4):
- DoDiv #3
- DoDiv #2
- bcs LSYM(Lthumb1_div_negative_loop)
- DoDiv #1
- subs divisor, dividend, divisor
- bcs 1f
- mov divisor, dividend
-
-1: mov curbit, ip
- adcs result, result
- asrs curbit, curbit, #1
- mov dividend, result
- bcc 2f
- rsbs dividend, dividend, #0
- cmp curbit, #0
-
-2: bpl 3f
- rsbs divisor, divisor, #0
-
-3: bx lr
-
-LSYM(Ldivbyzero_negative):
- mov curbit, ip
- asrs curbit, curbit, #1
- bcc LSYM(Ldiv0)
- rsbs dividend, dividend, #0
-.endm
-
-/* ------------------------------------------------------------------------ */
-/* Start of the Real Functions */
-/* ------------------------------------------------------------------------ */
-
- FUNC_START aeabi_idiv0
- bx lr
- FUNC_END aeabi_idiv0
-
- FUNC_START divsi3
- FUNC_ALIAS aeabi_idiv divsi3
-
-LSYM(divsi3_skip_div0_test):
- mov curbit, dividend
- orrs curbit, divisor
- bmi LSYM(Lthumb1_div_negative)
-
-LSYM(Lthumb1_div_positive):
- THUMB1_Div_Positive
-
-LSYM(Lthumb1_div_negative):
- THUMB1_Div_Negative
-
- DIV_FUNC_END divsi3 signed
-
- FUNC_START aeabi_idivmod
-
- cmp r1, #0
- beq LSYM(Ldiv0)
- push {r0, r1, lr}
- bl LSYM(divsi3_skip_div0_test)
- POP {r1, r2, r3}
- mul r2, r0
- sub r1, r1, r2
- bx r3
-
- FUNC_END aeabi_idivmod
-/* ------------------------------------------------------------------------ */
diff --git a/contrib/cortex-strings/src/thumb/strcmp-armv6m.S b/contrib/cortex-strings/src/thumb/strcmp-armv6m.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/thumb/strcmp-armv6m.S
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright (c) 2014 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- * products derived from this software without specific prior written
- * permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* Implementation of strcmp for ARMv6m. This version is only used in
- ARMv6-M when we want an efficient implementation. Otherwize if the
- code size is preferred, strcmp-armv4t.S will be used. */
-
- .thumb_func
- .syntax unified
- .arch armv6-m
-
- .macro DoSub n, label
- subs r0, r0, r1
-#ifdef __ARM_BIG_ENDIAN
- lsrs r1, r4, \n
-#else
- lsls r1, r4, \n
-#endif
- orrs r1, r0
- bne \label
- .endm
-
- .macro Byte_Test n, label
- lsrs r0, r2, \n
- lsrs r1, r3, \n
- DoSub \n, \label
- .endm
-
- .text
- .p2align 0
- .global strcmp
- .type strcmp, %function
-strcmp:
- .cfi_startproc
- mov r2, r0
- push {r4, r5, r6, lr}
- orrs r2, r1
- lsls r2, r2, #30
- bne 6f
- ldr r5, =0x01010101
- lsls r6, r5, #7
-1:
- ldmia r0!, {r2}
- ldmia r1!, {r3}
- subs r4, r2, r5
- bics r4, r2
- ands r4, r6
- beq 3f
-
-#ifdef __ARM_BIG_ENDIAN
- Byte_Test #24, 4f
- Byte_Test #16, 4f
- Byte_Test #8, 4f
-
- b 7f
-3:
- cmp r2, r3
- beq 1b
- cmp r2, r3
-#else
- uxtb r0, r2
- uxtb r1, r3
- DoSub #24, 2f
-
- uxth r0, r2
- uxth r1, r3
- DoSub #16, 2f
-
- lsls r0, r2, #8
- lsls r1, r3, #8
- lsrs r0, r0, #8
- lsrs r1, r1, #8
- DoSub #8, 2f
-
- lsrs r0, r2, #24
- lsrs r1, r3, #24
- subs r0, r0, r1
-2:
- pop {r4, r5, r6, pc}
-
-3:
- cmp r2, r3
- beq 1b
- rev r0, r2
- rev r1, r3
- cmp r0, r1
-#endif
-
- bls 5f
- movs r0, #1
-4:
- pop {r4, r5, r6, pc}
-5:
- movs r0, #0
- mvns r0, r0
- pop {r4, r5, r6, pc}
-6:
- ldrb r2, [r0, #0]
- ldrb r3, [r1, #0]
- adds r0, #1
- adds r1, #1
- cmp r2, #0
- beq 7f
- cmp r2, r3
- bne 7f
- ldrb r2, [r0, #0]
- ldrb r3, [r1, #0]
- adds r0, #1
- adds r1, #1
- cmp r2, #0
- beq 7f
- cmp r2, r3
- beq 6b
-7:
- subs r0, r2, r3
- pop {r4, r5, r6, pc}
- .cfi_endproc
- .size strcmp, . - strcmp
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Feb 6, 9:43 AM (20 h, 51 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16491370
Default Alt Text
D48500.diff (237 KB)
Attached To
Mode
D48500: Remove now-unused contrib/cortex-strings
Attached
Detach File
Event Timeline
Log In to Comment