Page MenuHomeFreeBSD

D48500.diff
No OneTemporary

D48500.diff

diff --git a/contrib/cortex-strings/.gitignore b/contrib/cortex-strings/.gitignore
deleted file mode 100644
--- a/contrib/cortex-strings/.gitignore
+++ /dev/null
@@ -1,11 +0,0 @@
-*.a
-*.o
-*.la
-*.lo
-*.png
-*.pyc
-.deps
-.dirstamp
-.libs
-try-*
-cache.txt
diff --git a/contrib/cortex-strings/Makefile.am b/contrib/cortex-strings/Makefile.am
deleted file mode 100644
--- a/contrib/cortex-strings/Makefile.am
+++ /dev/null
@@ -1,327 +0,0 @@
-# Copyright (c) 2011, Linaro Limited
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# * Neither the name of the Linaro nor the
-# names of its contributors may be used to endorse or promote products
-# derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
-# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-
-# Top level Makefile for cortex-strings
-
-# Used to record the compiler version in the executables
-COMPILER = $(shell $(CC) --version 2>&1 | head -n1)
-
-# The main library
-lib_LTLIBRARIES = \
- libcortex-strings.la
-
-## Test suite
-check_PROGRAMS = \
- tests/test-memchr \
- tests/test-memcmp \
- tests/test-memcpy \
- tests/test-memmove \
- tests/test-memset \
- tests/test-strchr \
- tests/test-strcmp \
- tests/test-strcpy \
- tests/test-strlen \
- tests/test-strncmp \
- tests/test-strnlen
-
-# Options for the tests
-tests_cflags = -I$(srcdir)/tests $(AM_CFLAGS)
-tests_ldadd = libcortex-strings.la
-tests_test_memchr_LDADD = $(tests_ldadd)
-tests_test_memchr_CFLAGS = $(tests_cflags)
-tests_test_memcmp_LDADD = $(tests_ldadd)
-tests_test_memcmp_CFLAGS = $(tests_cflags)
-tests_test_memcpy_LDADD = $(tests_ldadd)
-tests_test_memcpy_CFLAGS = $(tests_cflags)
-tests_test_memmove_LDADD = $(tests_ldadd)
-tests_test_memmove_CFLAGS = $(tests_cflags)
-tests_test_memset_LDADD = $(tests_ldadd)
-tests_test_memset_CFLAGS = $(tests_cflags)
-tests_test_strchr_LDADD = $(tests_ldadd)
-tests_test_strchr_CFLAGS = $(tests_cflags)
-tests_test_strcmp_LDADD = $(tests_ldadd)
-tests_test_strcmp_CFLAGS = $(tests_cflags)
-tests_test_strcpy_LDADD = $(tests_ldadd)
-tests_test_strcpy_CFLAGS = $(tests_cflags)
-tests_test_strlen_LDADD = $(tests_ldadd)
-tests_test_strlen_CFLAGS = $(tests_cflags)
-tests_test_strncmp_LDADD = $(tests_ldadd)
-tests_test_strncmp_CFLAGS = $(tests_cflags)
-
-TESTS = $(check_PROGRAMS)
-
-## Benchmarks
-noinst_PROGRAMS = \
- dhry \
- dhry-native \
- try-none \
- try-this \
- try-plain \
- try-newlib-c \
- try-bionic-c \
- try-glibc-c
-
-# Good 'ol Dhrystone
-dhry_SOURCES = \
- benchmarks/dhry/dhry_1.c \
- benchmarks/dhry/dhry_2.c \
- benchmarks/dhry/dhry.h
-
-dhry_CFLAGS = -Dcompiler="\"$(COMPILER)\"" -Doptions="\"$(CFLAGS)\""
-dhry_LDADD = libcortex-strings.la
-
-dhry_native_SOURCES = $(dhry_SOURCES)
-dhry_native_CFLAGS = $(dhry_CFLAGS)
-
-# Benchmark harness
-noinst_LIBRARIES = \
- libmulti.a \
- libbionic-c.a \
- libglibc-c.a \
- libnewlib-c.a \
- libplain.a
-
-libmulti_a_SOURCES = \
- benchmarks/multi/harness.c
-
-libmulti_a_CFLAGS = -DVERSION=\"$(VERSION)\" $(AM_CFLAGS)
-
-## Other architecture independant implementaions
-libbionic_c_a_SOURCES = \
- reference/bionic-c/bcopy.c \
- reference/bionic-c/memchr.c \
- reference/bionic-c/memcmp.c \
- reference/bionic-c/memcpy.c \
- reference/bionic-c/memset.c \
- reference/bionic-c/strchr.c \
- reference/bionic-c/strcmp.c \
- reference/bionic-c/strcpy.c \
- reference/bionic-c/strlen.c
-
-libglibc_c_a_SOURCES = \
- reference/glibc-c/memchr.c \
- reference/glibc-c/memcmp.c \
- reference/glibc-c/memcpy.c \
- reference/glibc-c/memset.c \
- reference/glibc-c/strchr.c \
- reference/glibc-c/strcmp.c \
- reference/glibc-c/strcpy.c \
- reference/glibc-c/strlen.c \
- reference/glibc-c/wordcopy.c \
- reference/glibc-c/memcopy.h \
- reference/glibc-c/pagecopy.h
-
-libnewlib_c_a_SOURCES = \
- reference/newlib-c/memchr.c \
- reference/newlib-c/memcmp.c \
- reference/newlib-c/memcpy.c \
- reference/newlib-c/memset.c \
- reference/newlib-c/strchr.c \
- reference/newlib-c/strcmp.c \
- reference/newlib-c/strcpy.c \
- reference/newlib-c/strlen.c \
- reference/newlib-c/shim.h
-
-libplain_a_SOURCES = \
- reference/plain/memset.c \
- reference/plain/memcpy.c \
- reference/plain/strcmp.c \
- reference/plain/strcpy.c
-
-try_none_SOURCES =
-try_none_LDADD = libmulti.a -lrt
-try_this_SOURCES =
-try_this_LDADD = libmulti.a libcortex-strings.la -lrt
-try_bionic_c_SOURCES =
-try_bionic_c_LDADD = libmulti.a libbionic-c.a -lrt
-try_glibc_c_SOURCES =
-try_glibc_c_LDADD = libmulti.a libglibc-c.a -lrt
-try_newlib_c_SOURCES =
-try_newlib_c_LDADD = libmulti.a libnewlib-c.a -lrt
-try_plain_SOURCES =
-try_plain_LDADD = libmulti.a libplain.a -lrt
-
-# Architecture specific
-
-if HOST_AARCH32
-
-if WITH_NEON
-# Pull in the NEON specific files
-neon_bionic_a9_sources = \
- reference/bionic-a9/memcpy.S \
- reference/bionic-a9/memset.S
-neon_bionic_a15_sources = \
- reference/bionic-a15/memcpy.S \
- reference/bionic-a15/memset.S
-fpu_flags = -mfpu=neon
-else
-if WITH_VFP
-fpu_flags = -mfpu=vfp
-else
-fpu_flags = -msoft-float
-endif
-endif
-
-# Benchmarks and example programs
-noinst_PROGRAMS += \
- try-bionic-a9 \
- try-bionic-a15 \
- try-csl \
- try-glibc \
- try-newlib \
- try-newlib-xscale
-
-# Libraries used in the benchmarks and examples
-noinst_LIBRARIES += \
- libbionic-a9.a \
- libbionic-a15.a \
- libcsl.a \
- libglibc.a \
- libnewlib.a \
- libnewlib-xscale.a
-
-# Main library
-libcortex_strings_la_SOURCES = \
- src/thumb-2/strcpy.c \
- src/arm/memchr.S \
- src/arm/strchr.S \
- src/thumb-2/strlen.S \
- src/arm/memset.S \
- src/arm/memcpy.S \
- src/arm/strcmp.S
-
-# Libraries containing the difference reference versions
-libbionic_a9_a_SOURCES = \
- $(neon_bionic_a9_sources) \
- reference/bionic-a9/memcmp.S \
- reference/bionic-a9/strcmp.S \
- reference/bionic-a9/strcpy.S \
- reference/bionic-a9/strlen.c
-
-libbionic_a9_a_CFLAGS = -Wa,-mimplicit-it=thumb
-
-libbionic_a15_a_SOURCES = \
- $(neon_bionic_a15_sources) \
- reference/bionic-a15/memcmp.S \
- reference/bionic-a15/strcmp.S \
- reference/bionic-a15/strcpy.S \
- reference/bionic-a15/strlen.c
-
-libbionic_a15_a_CFLAGS = -Wa,-mimplicit-it=thumb
-
-libcsl_a_SOURCES = \
- reference/csl/memcpy.c \
- reference/csl/memset.c \
- reference/csl/arm_asm.h
-
-libglibc_a_SOURCES = \
- reference/glibc/memcpy.S \
- reference/glibc/memset.S \
- reference/glibc/strchr.S \
- reference/glibc/strlen.S
-
-libnewlib_a_SOURCES = \
- reference/newlib/memcpy.S \
- reference/newlib/strcmp.S \
- reference/newlib/strcpy.c \
- reference/newlib/strlen.c \
- reference/newlib/arm_asm.h \
- reference/newlib/shim.h
-
-libnewlib_xscale_a_SOURCES = \
- reference/newlib-xscale/memchr.c \
- reference/newlib-xscale/memcpy.c \
- reference/newlib-xscale/memset.c \
- reference/newlib-xscale/strchr.c \
- reference/newlib-xscale/strcmp.c \
- reference/newlib-xscale/strcpy.c \
- reference/newlib-xscale/strlen.c \
- reference/newlib-xscale/xscale.h
-
-# Flags for the benchmark helpers
-try_bionic_a9_SOURCES =
-try_bionic_a9_LDADD = libmulti.a libbionic-a9.a -lrt
-try_bionic_a15_SOURCES =
-try_bionic_a15_LDADD = libmulti.a libbionic-a15.a -lrt
-try_csl_SOURCES =
-try_csl_LDADD = libmulti.a libcsl.a -lrt
-try_glibc_SOURCES =
-try_glibc_LDADD = libmulti.a libglibc.a -lrt
-try_newlib_SOURCES =
-try_newlib_LDADD = libmulti.a libnewlib.a -lrt
-try_newlib_xscale_SOURCES =
-try_newlib_xscale_LDADD = libmulti.a libnewlib-xscale.a -lrt
-
-AM_CPPFLAGS = $(fpu_flags)
-AM_LDFLAGS = $(fpu_flags)
-
-endif
-
-# aarch64 specific
-if HOST_AARCH64
-
-libcortex_strings_la_SOURCES = \
- src/aarch64/memchr.S \
- src/aarch64/memcmp.S \
- src/aarch64/memcpy.S \
- src/aarch64/memmove.S \
- src/aarch64/memset.S \
- src/aarch64/strchr.S \
- src/aarch64/strchrnul.S \
- src/aarch64/strcmp.S \
- src/aarch64/strcpy.S \
- src/aarch64/strlen.S \
- src/aarch64/strncmp.S \
- src/aarch64/strnlen.S
-
-endif
-
-libcortex_strings_la_LDFLAGS = -version-info 1:0:0
-
-AM_CFLAGS = \
- -std=gnu99 -Wall \
- -fno-builtin -fno-stack-protector -U_FORTIFY_SOURCE \
- $(AM_CPPFLAGS)
-
-if WITH_SUBMACHINE
-AM_CFLAGS += \
- -mtune=$(submachine)
-endif
-
-EXTRA_DIST = \
- tests/hp-timing.h \
- tests/test-string.h \
- tests/test-skeleton.c \
- scripts/add-license.sh \
- scripts/bench.py \
- scripts/fixup.py \
- scripts/libplot.py \
- scripts/plot-align.py \
- scripts/plot.py \
- scripts/plot-sizes.py \
- scripts/plot-top.py \
- scripts/trim.sh \
- autogen.sh
diff --git a/contrib/cortex-strings/README b/contrib/cortex-strings/README
deleted file mode 100644
--- a/contrib/cortex-strings/README
+++ /dev/null
@@ -1,111 +0,0 @@
-= Cortex-A String Routines =
-
-This package contains optimised string routines including memcpy(), memset(),
-strcpy(), strlen() for the ARM Cortex-A series of cores.
-
-Various implementations of these routines are provided, including generic
-implementations for ARMv7-A cores with/without Neon, Thumb2 implementations
-and generic implementations for cores supporting AArch64.
-
-== Getting started ==
-First configure and then install libcortex-strings.so. To make other
-applications use this library, either add -lcortex-strings to the link
-command or use LD_PRELOAD to load the library into existing applications.
-
-Our intent is to get these routines into the common C libraries such
-as GLIBC, Bionic, and Newlib. Your system may already include them!
-
-== Contents ==
- * src/ contains the routines themselves
- * tests/ contains the unit tests
- * reference/ contains reference copies of other ARM-focused
- implementations gathered from around the Internet
- * benchmarks/ contains various benchmarks, tools, and scripts used to
- check and report on the different implementations.
-
-The src directory contains different variants organised by the
-implementation they run on and optional features used. For example:
- * src/thumb-2 contains generic non-NEON routines for AArch32 (with Thumb-2).
- * src/arm contains tuned routines for Cortex-A class processors.
- * src/aarch64 contains generic routines for AArch64.
- * src/thumb contains generic routines for armv6-M (with Thumb).
-
-== Reference versions ==
-reference/ contains versions collected from various popular Open
-Source libraries. These have been modified for use in benchmarking.
-Please refer to the individual files for any licensing terms.
-
-The routines were collected from the following releases:
- * EGLIBC 2.13
- * Newlib 1.19.0
- * Bionic android-2.3.5_r1
-
-== Licensing ==
-All Linaro-authored routines are under the modified BSD license:
-
-Copyright (c) 2011, Linaro Limited
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the Linaro nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-All ARM-authored routines are under the modified BSD license:
-
-Copyright (c) 2014 ARM Ltd
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the Linaro nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-All third party routines are under a GPL compatible license.
-
-== Notes and Limitations ==
-Some of the implementations have been collected from other
-projects and have a variety of licenses and copyright holders.
-
-== Style ==
-Assembly code attempts to follow the GLIBC coding convetions. They
-are:
- * Copyright headers in C style comment blocks
- * Instructions indented with one tab
- * Operands indented with one tab
- * Text is wrapped at 70 characters
- * End of line comments are fine
diff --git a/contrib/cortex-strings/autogen.sh b/contrib/cortex-strings/autogen.sh
deleted file mode 100755
--- a/contrib/cortex-strings/autogen.sh
+++ /dev/null
@@ -1,69 +0,0 @@
-#!/bin/sh
-#
-# autogen.sh glue for hplip
-#
-# HPLIP used to have five or so different autotools trees. Upstream
-# has reduced it to two. Still, this script is capable of cleaning
-# just about any possible mess of autoconf files.
-#
-# BE CAREFUL with trees that are not completely automake-generated,
-# this script deletes all Makefile.in files it can find.
-#
-# Requires: automake 1.9, autoconf 2.57+
-# Conflicts: autoconf 2.13
-set -e
-
-# Refresh GNU autotools toolchain.
-echo Cleaning autotools files...
-find -type d -name autom4te.cache -print0 | xargs -0 rm -rf \;
-find -type f \( -name missing -o -name install-sh -o -name mkinstalldirs \
- -o -name depcomp -o -name ltmain.sh -o -name configure \
- -o -name config.sub -o -name config.guess \
- -o -name Makefile.in \) -print0 | xargs -0 rm -f
-
-echo Running autoreconf...
-autoreconf --force --install
-
-# For the Debian package build
-test -d debian && {
- # link these in Debian builds
- rm -f config.sub config.guess
- ln -s /usr/share/misc/config.sub .
- ln -s /usr/share/misc/config.guess .
-
- # refresh list of executable scripts, to avoid possible breakage if
- # upstream tarball does not include the file or if it is mispackaged
- # for whatever reason.
- [ "$1" = "updateexec" ] && {
- echo Generating list of executable files...
- rm -f debian/executable.files
- find -type f -perm +111 ! -name '.*' -fprint debian/executable.files
- }
-
- # Remove any files in upstream tarball that we don't have in the Debian
- # package (because diff cannot remove files)
- version=`dpkg-parsechangelog | awk '/Version:/ { print $2 }' | sed -e 's/-[^-]\+$//'`
- source=`dpkg-parsechangelog | awk '/Source:/ { print $2 }' | tr -d ' '`
- if test -r ../${source}_${version}.orig.tar.gz ; then
- echo Generating list of files that should be removed...
- rm -f debian/deletable.files
- touch debian/deletable.files
- [ -e debian/tmp ] && rm -rf debian/tmp
- mkdir debian/tmp
- ( cd debian/tmp ; tar -zxf ../../../${source}_${version}.orig.tar.gz )
- find debian/tmp/ -type f ! -name '.*' -print0 | xargs -0 -ri echo '{}' | \
- while read -r i ; do
- if test -e "${i}" ; then
- filename=$(echo "${i}" | sed -e 's#.*debian/tmp/[^/]\+/##')
- test -e "${filename}" || echo "${filename}" >>debian/deletable.files
- fi
- done
- rm -fr debian/tmp
- else
- echo Emptying list of files that should be deleted...
- rm -f debian/deletable.files
- touch debian/deletable.files
- fi
-}
-
-exit 0
diff --git a/contrib/cortex-strings/benchmarks/dhry/dhry.h b/contrib/cortex-strings/benchmarks/dhry/dhry.h
deleted file mode 100644
--- a/contrib/cortex-strings/benchmarks/dhry/dhry.h
+++ /dev/null
@@ -1,311 +0,0 @@
-/*
- **************************************************************************
- * DHRYSTONE 2.1 BENCHMARK PC VERSION
- **************************************************************************
- *
- * "DHRYSTONE" Benchmark Program
- * -----------------------------
- *
- * Version: C, Version 2.1
- *
- * File: dhry.h (part 1 of 3)
- *
- * Date: May 25, 1988
- *
- * Author: Reinhold P. Weicker
- * Siemens AG, AUT E 51
- * Postfach 3220
- * 8520 Erlangen
- * Germany (West)
- * Phone: [+49]-9131-7-20330
- * (8-17 Central European Time)
- * Usenet: ..!mcsun!unido!estevax!weicker
- *
- * Original Version (in Ada) published in
- * "Communications of the ACM" vol. 27., no. 10 (Oct. 1984),
- * pp. 1013 - 1030, together with the statistics
- * on which the distribution of statements etc. is based.
- *
- * In this C version, the following C library functions are used:
- * - strcpy, strcmp (inside the measurement loop)
- * - printf, scanf (outside the measurement loop)
- * In addition, Berkeley UNIX system calls "times ()" or "time ()"
- * are used for execution time measurement. For measurements
- * on other systems, these calls have to be changed.
- *
- * Collection of Results:
- * Reinhold Weicker (address see above) and
- *
- * Rick Richardson
- * PC Research. Inc.
- * 94 Apple Orchard Drive
- * Tinton Falls, NJ 07724
- * Phone: (201) 389-8963 (9-17 EST)
- * Usenet: ...!uunet!pcrat!rick
- *
- * Please send results to Rick Richardson and/or Reinhold Weicker.
- * Complete information should be given on hardware and software used.
- * Hardware information includes: Machine type, CPU, type and size
- * of caches; for microprocessors: clock frequency, memory speed
- * (number of wait states).
- * Software information includes: Compiler (and runtime library)
- * manufacturer and version, compilation switches, OS version.
- * The Operating System version may give an indication about the
- * compiler; Dhrystone itself performs no OS calls in the measurement
- * loop.
- *
- * The complete output generated by the program should be mailed
- * such that at least some checks for correctness can be made.
- *
- **************************************************************************
- *
- * This version has changes made by Roy Longbottom to conform to a common
- * format for a series of standard benchmarks for PCs:
- *
- * Running time greater than 5 seconds due to inaccuracy of the PC clock.
- *
- * Automatic adjustment of run time, no manually inserted parameters.
- *
- * Initial display of calibration times to confirm linearity.
- *
- * Display of results within one screen (or at a slow speed as the test
- * progresses) so that it can be seen to have run successfully.
- *
- * Facilities to type in details of system used etc.
- *
- * All results and details appended to a results file.
- *
- *
- * Roy Longbottom
- * 101323.2241@compuserve.com
- *
- **************************************************************************
- *
- * For details of history, changes, other defines, benchmark construction
- * statistics see official versions from ftp.nosc.mil/pub/aburto where
- * the latest table of results (dhry.tbl) are available. See also
- * netlib@ornl.gov
- *
- **************************************************************************
- *
- * Defines: The following "Defines" are possible:
- * -DREG=register (default: Not defined)
- * As an approximation to what an average C programmer
- * might do, the "register" storage class is applied
- * (if enabled by -DREG=register)
- * - for local variables, if they are used (dynamically)
- * five or more times
- * - for parameters if they are used (dynamically)
- * six or more times
- * Note that an optimal "register" strategy is
- * compiler-dependent, and that "register" declarations
- * do not necessarily lead to faster execution.
- * -DNOSTRUCTASSIGN (default: Not defined)
- * Define if the C compiler does not support
- * assignment of structures.
- * -DNOENUMS (default: Not defined)
- * Define if the C compiler does not support
- * enumeration types.
- ***************************************************************************
- *
- * Compilation model and measurement (IMPORTANT):
- *
- * This C version of Dhrystone consists of three files:
- * - dhry.h (this file, containing global definitions and comments)
- * - dhry_1.c (containing the code corresponding to Ada package Pack_1)
- * - dhry_2.c (containing the code corresponding to Ada package Pack_2)
- *
- * The following "ground rules" apply for measurements:
- * - Separate compilation
- * - No procedure merging
- * - Otherwise, compiler optimizations are allowed but should be indicated
- * - Default results are those without register declarations
- * See the companion paper "Rationale for Dhrystone Version 2" for a more
- * detailed discussion of these ground rules.
- *
- * For 16-Bit processors (e.g. 80186, 80286), times for all compilation
- * models ("small", "medium", "large" etc.) should be given if possible,
- * together with a definition of these models for the compiler system used.
- *
- **************************************************************************
- * Examples of Pentium Results
- *
- * Dhrystone Benchmark Version 2.1 (Language: C)
- *
- * Month run 4/1996
- * PC model Escom
- * CPU Pentium
- * Clock MHz 100
- * Cache 256K
- * Options Neptune chipset
- * OS/DOS Windows 95
- * Compiler Watcom C/ C++ 10.5 Win386
- * OptLevel -otexan -zp8 -fp5 -5r
- * Run by Roy Longbottom
- * From UK
- * Mail 101323.2241@compuserve.com
- *
- * Final values (* implementation-dependent):
- *
- * Int_Glob: O.K. 5
- * Bool_Glob: O.K. 1
- * Ch_1_Glob: O.K. A
- * Ch_2_Glob: O.K. B
- * Arr_1_Glob[8]: O.K. 7
- * Arr_2_Glob8/7: O.K. 1600010
- * Ptr_Glob->
- * Ptr_Comp: * 98008
- * Discr: O.K. 0
- * Enum_Comp: O.K. 2
- * Int_Comp: O.K. 17
- * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING
- * Next_Ptr_Glob->
- * Ptr_Comp: * 98008 same as above
- * Discr: O.K. 0
- * Enum_Comp: O.K. 1
- * Int_Comp: O.K. 18
- * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING
- * Int_1_Loc: O.K. 5
- * Int_2_Loc: O.K. 13
- * Int_3_Loc: O.K. 7
- * Enum_Loc: O.K. 1
- * Str_1_Loc: O.K. DHRYSTONE PROGRAM, 1'ST STRING
- * Str_2_Loc: O.K. DHRYSTONE PROGRAM, 2'ND STRING
- *
- * Register option Selected.
- *
- * Microseconds 1 loop: 4.53
- * Dhrystones / second: 220690
- * VAX MIPS rating: 125.61
- *
- *
- * Dhrystone Benchmark Version 2.1 (Language: C)
- *
- * Month run 4/1996
- * PC model Escom
- * CPU Pentium
- * Clock MHz 100
- * Cache 256K
- * Options Neptune chipset
- * OS/DOS Windows 95
- * Compiler Watcom C/ C++ 10.5 Win386
- * OptLevel No optimisation
- * Run by Roy Longbottom
- * From UK
- * Mail 101323.2241@compuserve.com
- *
- * Final values (* implementation-dependent):
- *
- * Int_Glob: O.K. 5
- * Bool_Glob: O.K. 1
- * Ch_1_Glob: O.K. A
- * Ch_2_Glob: O.K. B
- * Arr_1_Glob[8]: O.K. 7
- * Arr_2_Glob8/7: O.K. 320010
- * Ptr_Glob->
- * Ptr_Comp: * 98004
- * Discr: O.K. 0
- * Enum_Comp: O.K. 2
- * Int_Comp: O.K. 17
- * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING
- * Next_Ptr_Glob->
- * Ptr_Comp: * 98004 same as above
- * Discr: O.K. 0
- * Enum_Comp: O.K. 1
- * Int_Comp: O.K. 18
- * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING
- * Int_1_Loc: O.K. 5
- * Int_2_Loc: O.K. 13
- * Int_3_Loc: O.K. 7
- * Enum_Loc: O.K. 1
- * Str_1_Loc: O.K. DHRYSTONE PROGRAM, 1'ST STRING
- * Str_2_Loc: O.K. DHRYSTONE PROGRAM, 2'ND STRING
- *
- * Register option Not selected.
- *
- * Microseconds 1 loop: 20.06
- * Dhrystones / second: 49844
- * VAX MIPS rating: 28.37
- *
- **************************************************************************
- */
-
-/* Compiler and system dependent definitions: */
-
-#ifndef TIME
-#define TIMES
-#endif
- /* Use times(2) time function unless */
- /* explicitly defined otherwise */
-
-#ifdef TIMES
-/* #include <sys/types.h>
- #include <sys/times.h> */
- /* for "times" */
-#endif
-
-#define Mic_secs_Per_Second 1000000.0
- /* Berkeley UNIX C returns process times in seconds/HZ */
-
-#ifdef NOSTRUCTASSIGN
-#define structassign(d, s) memcpy(&(d), &(s), sizeof(d))
-#else
-#define structassign(d, s) d = s
-#endif
-
-#ifdef NOENUM
-#define Ident_1 0
-#define Ident_2 1
-#define Ident_3 2
-#define Ident_4 3
-#define Ident_5 4
- typedef int Enumeration;
-#else
- typedef enum {Ident_1, Ident_2, Ident_3, Ident_4, Ident_5}
- Enumeration;
-#endif
- /* for boolean and enumeration types in Ada, Pascal */
-
-/* General definitions: */
-
-#include <stdio.h>
-#include <string.h>
-
- /* for strcpy, strcmp */
-
-#define Null 0
- /* Value of a Null pointer */
-#define true 1
-#define false 0
-
-typedef int One_Thirty;
-typedef int One_Fifty;
-typedef char Capital_Letter;
-typedef int Boolean;
-typedef char Str_30 [31];
-typedef int Arr_1_Dim [50];
-typedef int Arr_2_Dim [50] [50];
-
-typedef struct record
- {
- struct record *Ptr_Comp;
- Enumeration Discr;
- union {
- struct {
- Enumeration Enum_Comp;
- int Int_Comp;
- char Str_Comp [31];
- } var_1;
- struct {
- Enumeration E_Comp_2;
- char Str_2_Comp [31];
- } var_2;
- struct {
- char Ch_1_Comp;
- char Ch_2_Comp;
- } var_3;
- } variant;
- } Rec_Type, *Rec_Pointer;
-
-
-
diff --git a/contrib/cortex-strings/benchmarks/dhry/dhry_1.c b/contrib/cortex-strings/benchmarks/dhry/dhry_1.c
deleted file mode 100644
--- a/contrib/cortex-strings/benchmarks/dhry/dhry_1.c
+++ /dev/null
@@ -1,778 +0,0 @@
-/*
- *************************************************************************
- *
- * "DHRYSTONE" Benchmark Program
- * -----------------------------
- *
- * Version: C, Version 2.1
- *
- * File: dhry_1.c (part 2 of 3)
- *
- * Date: May 25, 1988
- *
- * Author: Reinhold P. Weicker
- *
- *************************************************************************
- */
-
- #include <time.h>
- #include <stdlib.h>
- #include <stdio.h>
- #include "dhry.h"
- /*COMPILER COMPILER COMPILER COMPILER COMPILER COMPILER COMPILER*/
-
- #ifdef COW
- #define compiler "Watcom C/C++ 10.5 Win386"
- #define options " -otexan -zp8 -5r -ms"
- #endif
- #ifdef CNW
- #define compiler "Watcom C/C++ 10.5 Win386"
- #define options " No optimisation"
- #endif
- #ifdef COD
- #define compiler "Watcom C/C++ 10.5 Dos4GW"
- #define options " -otexan -zp8 -5r -ms"
- #endif
- #ifdef CND
- #define compiler "Watcom C/C++ 10.5 Dos4GW"
- #define options " No optimisation"
- #endif
- #ifdef CONT
- #define compiler "Watcom C/C++ 10.5 Win32NT"
- #define options " -otexan -zp8 -5r -ms"
- #endif
- #ifdef CNNT
- #define compiler "Watcom C/C++ 10.5 Win32NT"
- #define options " No optimisation"
- #endif
- #ifdef COO2
- #define compiler "Watcom C/C++ 10.5 OS/2-32"
- #define options " -otexan -zp8 -5r -ms"
- #endif
- #ifdef CNO2
- #define compiler "Watcom C/C++ 10.5 OS/2-32"
- #define options " No optimisation"
- #endif
-
-
-/* Global Variables: */
-
-Rec_Pointer Ptr_Glob,
- Next_Ptr_Glob;
-int Int_Glob;
- Boolean Bool_Glob;
- char Ch_1_Glob,
- Ch_2_Glob;
- int Arr_1_Glob [50];
- int Arr_2_Glob [50] [50];
- int getinput = 1;
-
-
- char Reg_Define[100] = "Register option Selected.";
-
- Enumeration Func_1 (Capital_Letter Ch_1_Par_Val,
- Capital_Letter Ch_2_Par_Val);
- /*
- forward declaration necessary since Enumeration may not simply be int
- */
-
- #ifndef ROPT
- #define REG
- /* REG becomes defined as empty */
- /* i.e. no register variables */
- #else
- #define REG register
- #endif
-
- void Proc_1 (REG Rec_Pointer Ptr_Val_Par);
- void Proc_2 (One_Fifty *Int_Par_Ref);
- void Proc_3 (Rec_Pointer *Ptr_Ref_Par);
- void Proc_4 ();
- void Proc_5 ();
- void Proc_6 (Enumeration Enum_Val_Par, Enumeration *Enum_Ref_Par);
- void Proc_7 (One_Fifty Int_1_Par_Val, One_Fifty Int_2_Par_Val,
- One_Fifty *Int_Par_Ref);
- void Proc_8 (Arr_1_Dim Arr_1_Par_Ref, Arr_2_Dim Arr_2_Par_Ref,
- int Int_1_Par_Val, int Int_2_Par_Val);
-
- Boolean Func_2 (Str_30 Str_1_Par_Ref, Str_30 Str_2_Par_Ref);
-
-
- /* variables for time measurement: */
-
- #define Too_Small_Time 2
- /* Measurements should last at least 2 seconds */
-
- double Begin_Time,
- End_Time,
- User_Time;
-
- double Microseconds,
- Dhrystones_Per_Second,
- Vax_Mips;
-
- /* end of variables for time measurement */
-
-
- void main (int argc, char *argv[])
- /*****/
-
- /* main program, corresponds to procedures */
- /* Main and Proc_0 in the Ada version */
- {
- double dtime();
-
- One_Fifty Int_1_Loc;
- REG One_Fifty Int_2_Loc;
- One_Fifty Int_3_Loc;
- REG char Ch_Index;
- Enumeration Enum_Loc;
- Str_30 Str_1_Loc;
- Str_30 Str_2_Loc;
- REG int Run_Index;
- REG int Number_Of_Runs;
- int endit, count = 10;
- FILE *Ap;
- char general[9][80] = {" "};
-
- /* Initializations */
- if (argc > 1)
- {
- switch (argv[1][0])
- {
- case 'N':
- getinput = 0;
- break;
- case 'n':
- getinput = 0;
- break;
- }
- }
-
- if ((Ap = fopen("Dhry.txt","a+")) == NULL)
- {
- printf("Can not open Dhry.txt\n\n");
- printf("Press any key\n");
- exit(1);
- }
-
-/***********************************************************************
- * Change for compiler and optimisation used *
- ***********************************************************************/
-
- Next_Ptr_Glob = (Rec_Pointer) malloc (sizeof (Rec_Type));
- Ptr_Glob = (Rec_Pointer) malloc (sizeof (Rec_Type));
-
- Ptr_Glob->Ptr_Comp = Next_Ptr_Glob;
- Ptr_Glob->Discr = Ident_1;
- Ptr_Glob->variant.var_1.Enum_Comp = Ident_3;
- Ptr_Glob->variant.var_1.Int_Comp = 40;
- strcpy (Ptr_Glob->variant.var_1.Str_Comp,
- "DHRYSTONE PROGRAM, SOME STRING");
- strcpy (Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING");
-
- Arr_2_Glob [8][7] = 10;
- /* Was missing in published program. Without this statement, */
- /* Arr_2_Glob [8][7] would have an undefined value. */
- /* Warning: With 16-Bit processors and Number_Of_Runs > 32000, */
- /* overflow may occur for this array element. */
-
- printf ("\n");
- printf ("Dhrystone Benchmark, Version 2.1 (Language: C or C++)\n");
- printf ("\n");
-
- if (getinput == 0)
- {
- printf ("No run time input data\n\n");
- }
- else
- {
- printf ("With run time input data\n\n");
- }
-
- printf ("Compiler %s\n", compiler);
- printf ("Optimisation %s\n", options);
- #ifdef ROPT
- printf ("Register option selected\n\n");
- #else
- printf ("Register option not selected\n\n");
- strcpy(Reg_Define, "Register option Not selected.");
- #endif
-
- /*
- if (Reg)
- {
- printf ("Program compiled with 'register' attribute\n");
- printf ("\n");
- }
- else
- {
- printf ("Program compiled without 'register' attribute\n");
- printf ("\n");
- }
-
- printf ("Please give the number of runs through the benchmark: ");
- {
- int n;
- scanf ("%d", &n);
- Number_Of_Runs = n;
- }
- printf ("\n");
- printf ("Execution starts, %d runs through Dhrystone\n",
- Number_Of_Runs);
- */
-
- Number_Of_Runs = 5000;
-
- do
- {
-
- Number_Of_Runs = Number_Of_Runs * 2;
- count = count - 1;
- Arr_2_Glob [8][7] = 10;
-
- /***************/
- /* Start timer */
- /***************/
-
- Begin_Time = dtime();
-
- for (Run_Index = 1; Run_Index <= Number_Of_Runs; ++Run_Index)
- {
-
- Proc_5();
- Proc_4();
- /* Ch_1_Glob == 'A', Ch_2_Glob == 'B', Bool_Glob == true */
- Int_1_Loc = 2;
- Int_2_Loc = 3;
- strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING");
- Enum_Loc = Ident_2;
- Bool_Glob = ! Func_2 (Str_1_Loc, Str_2_Loc);
- /* Bool_Glob == 1 */
- while (Int_1_Loc < Int_2_Loc) /* loop body executed once */
- {
- Int_3_Loc = 5 * Int_1_Loc - Int_2_Loc;
- /* Int_3_Loc == 7 */
- Proc_7 (Int_1_Loc, Int_2_Loc, &Int_3_Loc);
- /* Int_3_Loc == 7 */
- Int_1_Loc += 1;
- } /* while */
- /* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
- Proc_8 (Arr_1_Glob, Arr_2_Glob, Int_1_Loc, Int_3_Loc);
- /* Int_Glob == 5 */
- Proc_1 (Ptr_Glob);
- for (Ch_Index = 'A'; Ch_Index <= Ch_2_Glob; ++Ch_Index)
- /* loop body executed twice */
- {
- if (Enum_Loc == Func_1 (Ch_Index, 'C'))
- /* then, not executed */
- {
- Proc_6 (Ident_1, &Enum_Loc);
- strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 3'RD STRING");
- Int_2_Loc = Run_Index;
- Int_Glob = Run_Index;
- }
- }
- /* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
- Int_2_Loc = Int_2_Loc * Int_1_Loc;
- Int_1_Loc = Int_2_Loc / Int_3_Loc;
- Int_2_Loc = 7 * (Int_2_Loc - Int_3_Loc) - Int_1_Loc;
- /* Int_1_Loc == 1, Int_2_Loc == 13, Int_3_Loc == 7 */
- Proc_2 (&Int_1_Loc);
- /* Int_1_Loc == 5 */
-
- } /* loop "for Run_Index" */
-
- /**************/
- /* Stop timer */
- /**************/
-
- End_Time = dtime();
- User_Time = End_Time - Begin_Time;
-
- printf ("%12.0f runs %6.2f seconds \n",(double) Number_Of_Runs, User_Time);
- if (User_Time > 5)
- {
- count = 0;
- }
- else
- {
- if (User_Time < 0.1)
- {
- Number_Of_Runs = Number_Of_Runs * 5;
- }
- }
- } /* calibrate/run do while */
- while (count >0);
-
- printf ("\n");
- printf ("Final values (* implementation-dependent):\n");
- printf ("\n");
- printf ("Int_Glob: ");
- if (Int_Glob == 5) printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d ", Int_Glob);
-
- printf ("Bool_Glob: ");
- if (Bool_Glob == 1) printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d\n", Bool_Glob);
-
- printf ("Ch_1_Glob: ");
- if (Ch_1_Glob == 'A') printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%c ", Ch_1_Glob);
-
- printf ("Ch_2_Glob: ");
- if (Ch_2_Glob == 'B') printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%c\n", Ch_2_Glob);
-
- printf ("Arr_1_Glob[8]: ");
- if (Arr_1_Glob[8] == 7) printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d ", Arr_1_Glob[8]);
-
- printf ("Arr_2_Glob8/7: ");
- if (Arr_2_Glob[8][7] == Number_Of_Runs + 10)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%10d\n", Arr_2_Glob[8][7]);
-
- printf ("Ptr_Glob-> ");
- printf (" Ptr_Comp: * %d\n", (int) Ptr_Glob->Ptr_Comp);
-
- printf (" Discr: ");
- if (Ptr_Glob->Discr == 0) printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d ", Ptr_Glob->Discr);
-
- printf ("Enum_Comp: ");
- if (Ptr_Glob->variant.var_1.Enum_Comp == 2)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d\n", Ptr_Glob->variant.var_1.Enum_Comp);
-
- printf (" Int_Comp: ");
- if (Ptr_Glob->variant.var_1.Int_Comp == 17) printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d ", Ptr_Glob->variant.var_1.Int_Comp);
-
- printf ("Str_Comp: ");
- if (strcmp(Ptr_Glob->variant.var_1.Str_Comp,
- "DHRYSTONE PROGRAM, SOME STRING") == 0)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%s\n", Ptr_Glob->variant.var_1.Str_Comp);
-
- printf ("Next_Ptr_Glob-> ");
- printf (" Ptr_Comp: * %d", (int) Next_Ptr_Glob->Ptr_Comp);
- printf (" same as above\n");
-
- printf (" Discr: ");
- if (Next_Ptr_Glob->Discr == 0)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d ", Next_Ptr_Glob->Discr);
-
- printf ("Enum_Comp: ");
- if (Next_Ptr_Glob->variant.var_1.Enum_Comp == 1)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d\n", Next_Ptr_Glob->variant.var_1.Enum_Comp);
-
- printf (" Int_Comp: ");
- if (Next_Ptr_Glob->variant.var_1.Int_Comp == 18)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d ", Next_Ptr_Glob->variant.var_1.Int_Comp);
-
- printf ("Str_Comp: ");
- if (strcmp(Next_Ptr_Glob->variant.var_1.Str_Comp,
- "DHRYSTONE PROGRAM, SOME STRING") == 0)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%s\n", Next_Ptr_Glob->variant.var_1.Str_Comp);
-
- printf ("Int_1_Loc: ");
- if (Int_1_Loc == 5)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d ", Int_1_Loc);
-
- printf ("Int_2_Loc: ");
- if (Int_2_Loc == 13)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d\n", Int_2_Loc);
-
- printf ("Int_3_Loc: ");
- if (Int_3_Loc == 7)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d ", Int_3_Loc);
-
- printf ("Enum_Loc: ");
- if (Enum_Loc == 1)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d\n", Enum_Loc);
-
- printf ("Str_1_Loc: ");
- if (strcmp(Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING") == 0)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%s\n", Str_1_Loc);
-
- printf ("Str_2_Loc: ");
- if (strcmp(Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING") == 0)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%s\n", Str_2_Loc);
-
- printf ("\n");
-
-
- if (User_Time < Too_Small_Time)
- {
- printf ("Measured time too small to obtain meaningful results\n");
- printf ("Please increase number of runs\n");
- printf ("\n");
- }
- else
- {
- Microseconds = User_Time * Mic_secs_Per_Second
- / (double) Number_Of_Runs;
- Dhrystones_Per_Second = (double) Number_Of_Runs / User_Time;
- Vax_Mips = Dhrystones_Per_Second / 1757.0;
-
- printf ("Microseconds for one run through Dhrystone: ");
- printf ("%12.2lf \n", Microseconds);
- printf ("Dhrystones per Second: ");
- printf ("%10.0lf \n", Dhrystones_Per_Second);
- printf ("VAX MIPS rating = ");
- printf ("%12.2lf \n",Vax_Mips);
- printf ("\n");
-
-/************************************************************************
- * Type details of hardware, software etc. *
- ************************************************************************/
-
- if (getinput == 1)
- {
- printf ("Enter the following which will be added with results to file DHRY.TXT\n");
- printf ("When submitting a number of results you need only provide details once\n");
- printf ("but a cross reference such as an abbreviated CPU type would be useful.\n");
- printf ("You can kill (exit or close) the program now and no data will be added.\n\n");
-
- printf ("PC Supplier/model ? ");
- gets(general[1]);
-
- printf ("CPU chip ? ");
- gets(general[2]);
-
- printf ("Clock MHz ? ");
- gets(general[3]);
-
- printf ("Cache size ? ");
- gets(general[4]);
-
- printf ("Chipset & H/W options ? ");
- gets(general[5]);
-
- printf ("OS/DOS version ? ");
- gets(general[6]);
-
- printf ("Your name ? ");
- gets(general[7]);
-
- printf ("Company/Location ? ");
- gets(general[8]);
-
- printf ("E-mail address ? ");
- gets(general[0]);
- }
-/************************************************************************
- * Add results to output file Dhry.txt *
- ************************************************************************/
- fprintf (Ap, "-------------------- -----------------------------------"
- "\n");
- fprintf (Ap, "Dhrystone Benchmark Version 2.1 (Language: C++)\n\n");
- fprintf (Ap, "PC model %s\n", general[1]);
- fprintf (Ap, "CPU %s\n", general[2]);
- fprintf (Ap, "Clock MHz %s\n", general[3]);
- fprintf (Ap, "Cache %s\n", general[4]);
- fprintf (Ap, "Options %s\n", general[5]);
- fprintf (Ap, "OS/DOS %s\n", general[6]);
- fprintf (Ap, "Compiler %s\n", compiler);
- fprintf (Ap, "OptLevel %s\n", options);
- fprintf (Ap, "Run by %s\n", general[7]);
- fprintf (Ap, "From %s\n", general[8]);
- fprintf (Ap, "Mail %s\n\n", general[0]);
-
- fprintf (Ap, "Final values (* implementation-dependent):\n");
- fprintf (Ap, "\n");
- fprintf (Ap, "Int_Glob: ");
- if (Int_Glob == 5) fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Int_Glob);
-
- fprintf (Ap, "Bool_Glob: ");
- if (Bool_Glob == 1) fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Bool_Glob);
-
- fprintf (Ap, "Ch_1_Glob: ");
- if (Ch_1_Glob == 'A') fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%c\n", Ch_1_Glob);
-
- fprintf (Ap, "Ch_2_Glob: ");
- if (Ch_2_Glob == 'B') fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%c\n", Ch_2_Glob);
-
- fprintf (Ap, "Arr_1_Glob[8]: ");
- if (Arr_1_Glob[8] == 7) fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Arr_1_Glob[8]);
-
- fprintf (Ap, "Arr_2_Glob8/7: ");
- if (Arr_2_Glob[8][7] == Number_Of_Runs + 10)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%10d\n", Arr_2_Glob[8][7]);
-
- fprintf (Ap, "Ptr_Glob-> \n");
- fprintf (Ap, " Ptr_Comp: * %d\n", (int) Ptr_Glob->Ptr_Comp);
-
- fprintf (Ap, " Discr: ");
- if (Ptr_Glob->Discr == 0) fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Ptr_Glob->Discr);
-
- fprintf (Ap, " Enum_Comp: ");
- if (Ptr_Glob->variant.var_1.Enum_Comp == 2)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Ptr_Glob->variant.var_1.Enum_Comp);
-
- fprintf (Ap, " Int_Comp: ");
- if (Ptr_Glob->variant.var_1.Int_Comp == 17) fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Ptr_Glob->variant.var_1.Int_Comp);
-
- fprintf (Ap, " Str_Comp: ");
- if (strcmp(Ptr_Glob->variant.var_1.Str_Comp,
- "DHRYSTONE PROGRAM, SOME STRING") == 0)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%s\n", Ptr_Glob->variant.var_1.Str_Comp);
-
- fprintf (Ap, "Next_Ptr_Glob-> \n");
- fprintf (Ap, " Ptr_Comp: * %d", (int) Next_Ptr_Glob->Ptr_Comp);
- fprintf (Ap, " same as above\n");
-
- fprintf (Ap, " Discr: ");
- if (Next_Ptr_Glob->Discr == 0)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Next_Ptr_Glob->Discr);
-
- fprintf (Ap, " Enum_Comp: ");
- if (Next_Ptr_Glob->variant.var_1.Enum_Comp == 1)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Next_Ptr_Glob->variant.var_1.Enum_Comp);
-
- fprintf (Ap, " Int_Comp: ");
- if (Next_Ptr_Glob->variant.var_1.Int_Comp == 18)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Next_Ptr_Glob->variant.var_1.Int_Comp);
-
- fprintf (Ap, " Str_Comp: ");
- if (strcmp(Next_Ptr_Glob->variant.var_1.Str_Comp,
- "DHRYSTONE PROGRAM, SOME STRING") == 0)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%s\n", Next_Ptr_Glob->variant.var_1.Str_Comp);
-
- fprintf (Ap, "Int_1_Loc: ");
- if (Int_1_Loc == 5)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Int_1_Loc);
-
- fprintf (Ap, "Int_2_Loc: ");
- if (Int_2_Loc == 13)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Int_2_Loc);
-
- fprintf (Ap, "Int_3_Loc: ");
- if (Int_3_Loc == 7)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Int_3_Loc);
-
- fprintf (Ap, "Enum_Loc: ");
- if (Enum_Loc == 1)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Enum_Loc);
-
- fprintf (Ap, "Str_1_Loc: ");
- if (strcmp(Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING") == 0)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%s\n", Str_1_Loc);
-
- fprintf (Ap, "Str_2_Loc: ");
- if (strcmp(Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING") == 0)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%s\n", Str_2_Loc);
-
-
- fprintf (Ap, "\n");
- fprintf(Ap,"%s\n",Reg_Define);
- fprintf (Ap, "\n");
- fprintf(Ap,"Microseconds 1 loop: %12.2lf\n",Microseconds);
- fprintf(Ap,"Dhrystones / second: %10.0lf\n",Dhrystones_Per_Second);
- fprintf(Ap,"VAX MIPS rating: %12.2lf\n\n",Vax_Mips);
- fclose(Ap);
- }
-
- printf ("\n");
- printf ("A new results file will have been created in the same directory as the\n");
- printf (".EXE files if one did not already exist. If you made a mistake on input, \n");
- printf ("you can use a text editor to correct it, delete the results or copy \n");
- printf ("them to a different file name. If you intend to run multiple tests you\n");
- printf ("you may wish to rename DHRY.TXT with a more informative title.\n\n");
- printf ("Please submit feedback and results files as a posting in Section 12\n");
- printf ("or to Roy_Longbottom@compuserve.com\n\n");
-
- if (getinput == 1)
- {
- printf("Press any key to exit\n");
- printf ("\nIf this is displayed you must close the window in the normal way\n");
- }
- }
-
-
- void Proc_1 (REG Rec_Pointer Ptr_Val_Par)
- /******************/
-
- /* executed once */
- {
- REG Rec_Pointer Next_Record = Ptr_Val_Par->Ptr_Comp;
- /* == Ptr_Glob_Next */
- /* Local variable, initialized with Ptr_Val_Par->Ptr_Comp, */
- /* corresponds to "rename" in Ada, "with" in Pascal */
-
- structassign (*Ptr_Val_Par->Ptr_Comp, *Ptr_Glob);
- Ptr_Val_Par->variant.var_1.Int_Comp = 5;
- Next_Record->variant.var_1.Int_Comp
- = Ptr_Val_Par->variant.var_1.Int_Comp;
- Next_Record->Ptr_Comp = Ptr_Val_Par->Ptr_Comp;
- Proc_3 (&Next_Record->Ptr_Comp);
- /* Ptr_Val_Par->Ptr_Comp->Ptr_Comp
- == Ptr_Glob->Ptr_Comp */
- if (Next_Record->Discr == Ident_1)
- /* then, executed */
- {
- Next_Record->variant.var_1.Int_Comp = 6;
- Proc_6 (Ptr_Val_Par->variant.var_1.Enum_Comp,
- &Next_Record->variant.var_1.Enum_Comp);
- Next_Record->Ptr_Comp = Ptr_Glob->Ptr_Comp;
- Proc_7 (Next_Record->variant.var_1.Int_Comp, 10,
- &Next_Record->variant.var_1.Int_Comp);
- }
- else /* not executed */
- structassign (*Ptr_Val_Par, *Ptr_Val_Par->Ptr_Comp);
- } /* Proc_1 */
-
-
- void Proc_2 (One_Fifty *Int_Par_Ref)
- /******************/
- /* executed once */
- /* *Int_Par_Ref == 1, becomes 4 */
-
- {
- One_Fifty Int_Loc;
- Enumeration Enum_Loc;
-
- Int_Loc = *Int_Par_Ref + 10;
- do /* executed once */
- if (Ch_1_Glob == 'A')
- /* then, executed */
- {
- Int_Loc -= 1;
- *Int_Par_Ref = Int_Loc - Int_Glob;
- Enum_Loc = Ident_1;
- } /* if */
- while (Enum_Loc != Ident_1); /* true */
- } /* Proc_2 */
-
-
- void Proc_3 (Rec_Pointer *Ptr_Ref_Par)
- /******************/
- /* executed once */
- /* Ptr_Ref_Par becomes Ptr_Glob */
-
- {
- if (Ptr_Glob != Null)
- /* then, executed */
- *Ptr_Ref_Par = Ptr_Glob->Ptr_Comp;
- Proc_7 (10, Int_Glob, &Ptr_Glob->variant.var_1.Int_Comp);
- } /* Proc_3 */
-
-
-void Proc_4 () /* without parameters */
- /*******/
- /* executed once */
- {
- Boolean Bool_Loc;
-
- Bool_Loc = Ch_1_Glob == 'A';
- Bool_Glob = Bool_Loc | Bool_Glob;
- Ch_2_Glob = 'B';
- } /* Proc_4 */
-
-
- void Proc_5 () /* without parameters */
- /*******/
- /* executed once */
- {
- Ch_1_Glob = 'A';
- Bool_Glob = false;
- } /* Proc_5 */
-
-
- /* Procedure for the assignment of structures, */
- /* if the C compiler doesn't support this feature */
- #ifdef NOSTRUCTASSIGN
- memcpy (d, s, l)
- register char *d;
- register char *s;
- register int l;
- {
- while (l--) *d++ = *s++;
- }
- #endif
-
-
-double dtime()
-{
-
- /* #include <ctype.h> */
-
- #define HZ CLOCKS_PER_SEC
- clock_t tnow;
-
- double q;
- tnow = clock();
- q = (double)tnow / (double)HZ;
- return q;
-}
diff --git a/contrib/cortex-strings/benchmarks/dhry/dhry_2.c b/contrib/cortex-strings/benchmarks/dhry/dhry_2.c
deleted file mode 100644
--- a/contrib/cortex-strings/benchmarks/dhry/dhry_2.c
+++ /dev/null
@@ -1,186 +0,0 @@
- /*
- *************************************************************************
- *
- * "DHRYSTONE" Benchmark Program
- * -----------------------------
- *
- * Version: C, Version 2.1
- *
- * File: dhry_2.c (part 3 of 3)
- *
- * Date: May 25, 1988
- *
- * Author: Reinhold P. Weicker
- *
- *************************************************************************
- */
-
- #include "dhry.h"
-
- #ifndef REG
- #define REG
- /* REG becomes defined as empty */
- /* i.e. no register variables */
- #else
- #define REG register
- #endif
-
- extern int Int_Glob;
- extern char Ch_1_Glob;
-
- Boolean Func_3 (Enumeration Enum_Par_Val);
-
- void Proc_6 (Enumeration Enum_Val_Par, Enumeration *Enum_Ref_Par)
- /*********************************/
- /* executed once */
- /* Enum_Val_Par == Ident_3, Enum_Ref_Par becomes Ident_2 */
-
- {
- *Enum_Ref_Par = Enum_Val_Par;
- if (! Func_3 (Enum_Val_Par))
- /* then, not executed */
- *Enum_Ref_Par = Ident_4;
- switch (Enum_Val_Par)
- {
- case Ident_1:
- *Enum_Ref_Par = Ident_1;
- break;
- case Ident_2:
- if (Int_Glob > 100)
- /* then */
- *Enum_Ref_Par = Ident_1;
- else *Enum_Ref_Par = Ident_4;
- break;
- case Ident_3: /* executed */
- *Enum_Ref_Par = Ident_2;
- break;
- case Ident_4: break;
- case Ident_5:
- *Enum_Ref_Par = Ident_3;
- break;
- } /* switch */
- } /* Proc_6 */
-
-
- void Proc_7 (One_Fifty Int_1_Par_Val, One_Fifty Int_2_Par_Val,
- One_Fifty *Int_Par_Ref)
- /**********************************************/
- /* executed three times */
- /* first call: Int_1_Par_Val == 2, Int_2_Par_Val == 3, */
- /* Int_Par_Ref becomes 7 */
- /* second call: Int_1_Par_Val == 10, Int_2_Par_Val == 5, */
- /* Int_Par_Ref becomes 17 */
- /* third call: Int_1_Par_Val == 6, Int_2_Par_Val == 10, */
- /* Int_Par_Ref becomes 18 */
-
- {
- One_Fifty Int_Loc;
-
- Int_Loc = Int_1_Par_Val + 2;
- *Int_Par_Ref = Int_2_Par_Val + Int_Loc;
- } /* Proc_7 */
-
-
- void Proc_8 (Arr_1_Dim Arr_1_Par_Ref, Arr_2_Dim Arr_2_Par_Ref,
- int Int_1_Par_Val, int Int_2_Par_Val)
- /*********************************************************************/
- /* executed once */
- /* Int_Par_Val_1 == 3 */
- /* Int_Par_Val_2 == 7 */
-
- {
- REG One_Fifty Int_Index;
- REG One_Fifty Int_Loc;
-
- Int_Loc = Int_1_Par_Val + 5;
- Arr_1_Par_Ref [Int_Loc] = Int_2_Par_Val;
- Arr_1_Par_Ref [Int_Loc+1] = Arr_1_Par_Ref [Int_Loc];
- Arr_1_Par_Ref [Int_Loc+30] = Int_Loc;
- for (Int_Index = Int_Loc; Int_Index <= Int_Loc+1; ++Int_Index)
- Arr_2_Par_Ref [Int_Loc] [Int_Index] = Int_Loc;
- Arr_2_Par_Ref [Int_Loc] [Int_Loc-1] += 1;
- Arr_2_Par_Ref [Int_Loc+20] [Int_Loc] = Arr_1_Par_Ref [Int_Loc];
- Int_Glob = 5;
- } /* Proc_8 */
-
-
- Enumeration Func_1 (Capital_Letter Ch_1_Par_Val,
- Capital_Letter Ch_2_Par_Val)
- /*************************************************/
- /* executed three times */
- /* first call: Ch_1_Par_Val == 'H', Ch_2_Par_Val == 'R' */
- /* second call: Ch_1_Par_Val == 'A', Ch_2_Par_Val == 'C' */
- /* third call: Ch_1_Par_Val == 'B', Ch_2_Par_Val == 'C' */
-
- {
- Capital_Letter Ch_1_Loc;
- Capital_Letter Ch_2_Loc;
-
- Ch_1_Loc = Ch_1_Par_Val;
- Ch_2_Loc = Ch_1_Loc;
- if (Ch_2_Loc != Ch_2_Par_Val)
- /* then, executed */
- return (Ident_1);
- else /* not executed */
- {
- Ch_1_Glob = Ch_1_Loc;
- return (Ident_2);
- }
- } /* Func_1 */
-
-
- Boolean Func_2 (Str_30 Str_1_Par_Ref, Str_30 Str_2_Par_Ref)
- /*************************************************/
- /* executed once */
- /* Str_1_Par_Ref == "DHRYSTONE PROGRAM, 1'ST STRING" */
- /* Str_2_Par_Ref == "DHRYSTONE PROGRAM, 2'ND STRING" */
-
- {
- REG One_Thirty Int_Loc;
- Capital_Letter Ch_Loc;
-
- Int_Loc = 2;
- while (Int_Loc <= 2) /* loop body executed once */
- if (Func_1 (Str_1_Par_Ref[Int_Loc],
- Str_2_Par_Ref[Int_Loc+1]) == Ident_1)
- /* then, executed */
- {
- Ch_Loc = 'A';
- Int_Loc += 1;
- } /* if, while */
- if (Ch_Loc >= 'W' && Ch_Loc < 'Z')
- /* then, not executed */
- Int_Loc = 7;
- if (Ch_Loc == 'R')
- /* then, not executed */
- return (true);
- else /* executed */
- {
- if (strcmp (Str_1_Par_Ref, Str_2_Par_Ref) > 0)
- /* then, not executed */
- {
- Int_Loc += 7;
- Int_Glob = Int_Loc;
- return (true);
- }
- else /* executed */
- return (false);
- } /* if Ch_Loc */
- } /* Func_2 */
-
-
- Boolean Func_3 (Enumeration Enum_Par_Val)
- /***************************/
- /* executed once */
- /* Enum_Par_Val == Ident_3 */
-
- {
- Enumeration Enum_Loc;
-
- Enum_Loc = Enum_Par_Val;
- if (Enum_Loc == Ident_3)
- /* then, executed */
- return (true);
- else /* not executed */
- return (false);
- } /* Func_3 */
diff --git a/contrib/cortex-strings/benchmarks/multi/harness.c b/contrib/cortex-strings/benchmarks/multi/harness.c
deleted file mode 100644
--- a/contrib/cortex-strings/benchmarks/multi/harness.c
+++ /dev/null
@@ -1,407 +0,0 @@
-/*
- * Copyright (c) 2011, Linaro Limited
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of the Linaro nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** A simple harness that times how long a string function takes to
- * run.
- */
-
-/* PENDING: Add EPL */
-
-#include <string.h>
-#include <time.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <stdbool.h>
-#include <assert.h>
-#include <unistd.h>
-#include <errno.h>
-
-#define NUM_ELEMS(_x) (sizeof(_x) / sizeof((_x)[0]))
-
-#ifndef VERSION
-#define VERSION "(unknown version)"
-#endif
-
-/** Make sure a function is called by using the return value */
-#define SPOIL(_x) volatile long x = (long)(_x); (void)x
-
-/** Type of functions that can be tested */
-typedef void (*stub_t)(void *dest, void *src, size_t n);
-
-/** Meta data about one test */
-struct test
-{
- /** Test name */
- const char *name;
- /** Function to test */
- stub_t stub;
-};
-
-/** Flush the cache by reading a chunk of memory */
-static void empty(volatile char *against)
-{
- /* We know that there's a 16 k cache with 64 byte lines giving
- a total of 256 lines. Read randomly from 256*5 places should
- flush everything */
- int offset = (1024 - 256)*1024;
-
- for (int i = offset; i < offset + 16*1024*3; i += 64)
- {
- against[i];
- }
-}
-
-/** Stub that does nothing. Used for calibrating */
-static void xbounce(void *dest, void *src, size_t n)
-{
- SPOIL(0);
-}
-
-/** Stub that calls memcpy */
-static void xmemcpy(void *dest, void *src, size_t n)
-{
- SPOIL(memcpy(dest, src, n));
-}
-
-/** Stub that calls memset */
-static void xmemset(void *dest, void *src, size_t n)
-{
- SPOIL(memset(dest, 0, n));
-}
-
-/** Stub that calls memcmp */
-static void xmemcmp(void *dest, void *src, size_t n)
-{
- SPOIL(memcmp(dest, src, n));
-}
-
-/** Stub that calls strcpy */
-static void xstrcpy(void *dest, void *src, size_t n)
-{
- SPOIL(strcpy(dest, src));
-}
-
-/** Stub that calls strlen */
-static void xstrlen(void *dest, void *src, size_t n)
-{
- SPOIL(strlen(dest));
-}
-
-/** Stub that calls strcmp */
-static void xstrcmp(void *dest, void *src, size_t n)
-{
- SPOIL(strcmp(dest, src));
-}
-
-/** Stub that calls strchr */
-static void xstrchr(void *dest, void *src, size_t n)
-{
- /* Put the character at the end of the string and before the null */
- ((char *)src)[n-1] = 32;
- SPOIL(strchr(src, 32));
-}
-
-/** Stub that calls memchr */
-static void xmemchr(void *dest, void *src, size_t n)
-{
- /* Put the character at the end of the block */
- ((char *)src)[n-1] = 32;
- SPOIL(memchr(src, 32, n));
-}
-
-/** All functions that can be tested */
-static const struct test tests[] =
- {
- { "bounce", xbounce },
- { "memchr", xmemchr },
- { "memcpy", xmemcpy },
- { "memset", xmemset },
- { "memcmp", xmemcmp },
- { "strchr", xstrchr },
- { "strcmp", xstrcmp },
- { "strcpy", xstrcpy },
- { "strlen", xstrlen },
- { NULL }
- };
-
-/** Show basic usage */
-static void usage(const char* name)
-{
- printf("%s %s: run a string related benchmark.\n"
- "usage: %s [-c block-size] [-l loop-count] [-a alignment|src_alignment:dst_alignment] [-f] [-t test-name] [-r run-id]\n"
- , name, VERSION, name);
-
- printf("Tests:");
-
- for (const struct test *ptest = tests; ptest->name != NULL; ptest++)
- {
- printf(" %s", ptest->name);
- }
-
- printf("\n");
-
- exit(-1);
-}
-
-/** Find the test by name */
-static const struct test *find_test(const char *name)
-{
- if (name == NULL)
- {
- return tests + 0;
- }
- else
- {
- for (const struct test *p = tests; p->name != NULL; p++)
- {
- if (strcmp(p->name, name) == 0)
- {
- return p;
- }
- }
- }
-
- return NULL;
-}
-
-#define MIN_BUFFER_SIZE 1024*1024
-#define MAX_ALIGNMENT 256
-
-/** Take a pointer and ensure that the lower bits == alignment */
-static char *realign(char *p, int alignment)
-{
- uintptr_t pp = (uintptr_t)p;
- pp = (pp + (MAX_ALIGNMENT - 1)) & ~(MAX_ALIGNMENT - 1);
- pp += alignment;
-
- return (char *)pp;
-}
-
-static int parse_int_arg(const char *arg, const char *exe_name)
-{
- long int ret;
-
- errno = 0;
- ret = strtol(arg, NULL, 0);
-
- if (errno)
- {
- usage(exe_name);
- }
-
- return (int)ret;
-}
-
-static void parse_alignment_arg(const char *arg, const char *exe_name,
- int *src_alignment, int *dst_alignment)
-{
- long int ret;
- char *endptr;
-
- errno = 0;
- ret = strtol(arg, &endptr, 0);
-
- if (errno)
- {
- usage(exe_name);
- }
-
- *src_alignment = (int)ret;
-
- if (ret > 256 || ret < 1)
- {
- printf("Alignment should be in the range [1, 256].\n");
- usage(exe_name);
- }
-
- if (ret == 256)
- ret = 0;
-
- if (endptr && *endptr == ':')
- {
- errno = 0;
- ret = strtol(endptr + 1, NULL, 0);
-
- if (errno)
- {
- usage(exe_name);
- }
-
- if (ret > 256 || ret < 1)
- {
- printf("Alignment should be in the range [1, 256].\n");
- usage(exe_name);
- }
-
- if (ret == 256)
- ret = 0;
- }
-
- *dst_alignment = (int)ret;
-}
-
-/** Setup and run a test */
-int main(int argc, char **argv)
-{
- /* Size of src and dest buffers */
- size_t buffer_size = MIN_BUFFER_SIZE;
-
- /* Number of bytes per call */
- int count = 31;
- /* Number of times to run */
- int loops = 10000000;
- /* True to flush the cache each time */
- int flush = 0;
- /* Name of the test */
- const char *name = NULL;
- /* Alignment of buffers */
- int src_alignment = 8;
- int dst_alignment = 8;
- /* Name of the run */
- const char *run_id = "0";
-
- int opt;
-
- while ((opt = getopt(argc, argv, "c:l:ft:r:hva:")) > 0)
- {
- switch (opt)
- {
- case 'c':
- count = parse_int_arg(optarg, argv[0]);
- break;
- case 'l':
- loops = parse_int_arg(optarg, argv[0]);
- break;
- case 'a':
- parse_alignment_arg(optarg, argv[0], &src_alignment, &dst_alignment);
- break;
- case 'f':
- flush = 1;
- break;
- case 't':
- name = strdup(optarg);
- break;
- case 'r':
- run_id = strdup(optarg);
- break;
- case 'h':
- usage(argv[0]);
- break;
- default:
- usage(argv[0]);
- break;
- }
- }
-
- /* Find the test by name */
- const struct test *ptest = find_test(name);
-
- if (ptest == NULL)
- {
- usage(argv[0]);
- }
-
- if (count + MAX_ALIGNMENT * 2 > MIN_BUFFER_SIZE)
- {
- buffer_size = count + MAX_ALIGNMENT * 2;
- }
-
- /* Buffers to read and write from */
- char *src = malloc(buffer_size);
- char *dest = malloc(buffer_size);
-
- assert(src != NULL && dest != NULL);
-
- src = realign(src, src_alignment);
- dest = realign(dest, dst_alignment);
-
- /* Fill the buffer with non-zero, reproducable random data */
- srandom(1539);
-
- for (int i = 0; i < buffer_size; i++)
- {
- src[i] = (char)random() | 1;
- dest[i] = src[i];
- }
-
- /* Make sure the buffers are null terminated for any string tests */
- src[count] = 0;
- dest[count] = 0;
-
- struct timespec start, end;
- int err = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
- assert(err == 0);
-
- /* Preload */
- stub_t stub = ptest->stub;
-
- /* Run two variants to reduce the cost of testing for the flush */
- if (flush == 0)
- {
- for (int i = 0; i < loops; i++)
- {
- (*stub)(dest, src, count);
- }
- }
- else
- {
- for (int i = 0; i < loops; i++)
- {
- (*stub)(dest, src, count);
- empty(dest);
- }
- }
-
- err = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);
- assert(err == 0);
-
- /* Drop any leading path and pull the variant name out of the executable */
- char *variant = strrchr(argv[0], '/');
-
- if (variant == NULL)
- {
- variant = argv[0];
- }
-
- variant = strstr(variant, "try-");
- assert(variant != NULL);
-
- double elapsed = (end.tv_sec - start.tv_sec) + (end.tv_nsec - start.tv_nsec) * 1e-9;
- /* Estimate the bounce time. Measured on a Panda. */
- double bounced = 0.448730 * loops / 50000000;
-
- /* Dump both machine and human readable versions */
- printf("%s:%s:%u:%u:%d:%d:%s:%.6f: took %.6f s for %u calls to %s of %u bytes. ~%.3f MB/s corrected.\n",
- variant + 4, ptest->name,
- count, loops, src_alignment, dst_alignment, run_id,
- elapsed,
- elapsed, loops, ptest->name, count,
- (double)loops*count/(elapsed - bounced)/(1024*1024));
-
- return 0;
-}
diff --git a/contrib/cortex-strings/configure.ac b/contrib/cortex-strings/configure.ac
deleted file mode 100644
--- a/contrib/cortex-strings/configure.ac
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright (c) 2011-2012, Linaro Limited
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# * Neither the name of the Linaro nor the
-# names of its contributors may be used to endorse or promote products
-# derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
-# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-AC_INIT(cortex-strings, 1.1-2012.06~dev)
-AM_INIT_AUTOMAKE(foreign subdir-objects color-tests dist-bzip2)
-AC_CONFIG_HEADERS([config.h])
-AC_CONFIG_FILES(Makefile)
-AC_CANONICAL_HOST
-AM_PROG_AS
-AC_PROG_CC
-AC_PROG_LIBTOOL
-
-default_submachine=
-
-case $host in
-aarch64*-*-*)
- arch=aarch64
- ;;
-arm*-*-*)
- arch=aarch32
- default_submachine=cortex-a9
- ;;
-x86_64-*-*-*)
- arch=generic
- ;;
-*)
- AC_MSG_ERROR([unknown architecture $host])
- ;;
-esac
-
-AM_CONDITIONAL([HOST_AARCH32], [test x$arch = xaarch32])
-AM_CONDITIONAL([HOST_AARCH64], [test x$arch = xaarch64])
-AM_CONDITIONAL([HOST_GENERIC], [test x$arch = xgeneric])
-
-AC_ARG_WITH([cpu],
- AS_HELP_STRING([--with-cpu=CPU],
- [select code for CPU variant @<:@default=cortex-a9@:>@]]),
- [dnl
- case "$withval" in
- yes|'') AC_MSG_ERROR([--with-cpu requires an argument]) ;;
- no) ;;
- *) submachine="$withval" ;;
- esac
-],
-[submachine=$default_submachine])
-
-AC_SUBST(submachine)
-AM_CONDITIONAL([WITH_SUBMACHINE], [test x$submachine != x])
-
-AC_ARG_WITH([neon],
- AC_HELP_STRING([--with-neon],
- [include NEON specific routines @<:@default=yes@:>@]),
- [with_neon=$withval],
- [with_neon=yes])
-AC_SUBST(with_neon)
-AM_CONDITIONAL(WITH_NEON, test x$with_neon = xyes)
-
-AC_ARG_WITH([vfp],
- AC_HELP_STRING([--with-vfp],
- [include VFP specific routines @<:@default=yes@:>@]),
- [with_vfp=$withval],
- [with_vfp=yes])
-AC_SUBST(with_vfp)
-AM_CONDITIONAL(WITH_VFP, test x$with_vfp = xyes)
-
-AC_OUTPUT
diff --git a/contrib/cortex-strings/scripts/add-license.sh b/contrib/cortex-strings/scripts/add-license.sh
deleted file mode 100755
--- a/contrib/cortex-strings/scripts/add-license.sh
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/bin/bash
-#
-# Add the modified BSD license to a file
-#
-
-f=`mktemp -d`
-trap "rm -rf $f" EXIT
-
-year=`date +%Y`
-cat > $f/original <<EOF
-Copyright (c) $year, Linaro Limited
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the Linaro nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-EOF
-
-# Translate it to C style
-echo "/*" > $f/c
-sed -r 's/(.*)/ * \1/' $f/original | sed -r 's/ +$//' >> $f/c
-echo " */" >> $f/c
-echo >> $f/c
-
-# ...and shell style
-sed -r 's/(.*)/# \1/' $f/original | sed -r 's/ +$//' >> $f/shell
-echo '#' >> $f/shell
-echo >> $f/shell
-
-for name in $@; do
- if grep -q Copyright $name; then
- echo $name already has some type of copyright
- continue
- fi
-
- case $name in
- # These files don't have an explicit license
- *autogen.sh*)
- continue;;
- *reference/newlib/*)
- continue;;
- *reference/newlib-xscale/*)
- continue;;
- */dhry/*)
- continue;;
-
- *.c)
- src=$f/c
- ;;
- *.sh|*.am|*.ac)
- src=$f/shell
- ;;
- *)
- echo Unrecognied extension on $name
- continue
- esac
-
- cat $src $name > $f/next
- mv $f/next $name
- echo Updated $name
-done
diff --git a/contrib/cortex-strings/scripts/bench.py b/contrib/cortex-strings/scripts/bench.py
deleted file mode 100644
--- a/contrib/cortex-strings/scripts/bench.py
+++ /dev/null
@@ -1,175 +0,0 @@
-#!/usr/bin/env python
-
-"""Simple harness that benchmarks different variants of the routines,
-caches the results, and emits all of the records at the end.
-
-Results are generated for different values of:
- * Source
- * Routine
- * Length
- * Alignment
-"""
-
-import argparse
-import subprocess
-import math
-import sys
-
-# Prefix to the executables
-build = '../build/try-'
-
-ALL = 'memchr memcmp memcpy memset strchr strcmp strcpy strlen'
-
-HAS = {
- 'this': 'bounce memchr memcpy memset strchr strcmp strcpy strlen',
- 'bionic-a9': 'memcmp memcpy memset strcmp strcpy strlen',
- 'bionic-a15': 'memcmp memcpy memset strcmp strcpy strlen',
- 'bionic-c': ALL,
- 'csl': 'memcpy memset',
- 'glibc': 'memcpy memset strchr strlen',
- 'glibc-c': ALL,
- 'newlib': 'memcpy strcmp strcpy strlen',
- 'newlib-c': ALL,
- 'newlib-xscale': 'memchr memcpy memset strchr strcmp strcpy strlen',
- 'plain': 'memset memcpy strcmp strcpy',
-}
-
-BOUNCE_ALIGNMENTS = ['1']
-SINGLE_BUFFER_ALIGNMENTS = ['1', '2', '4', '8', '16', '32']
-DUAL_BUFFER_ALIGNMENTS = ['1:32', '2:32', '4:32', '8:32', '16:32', '32:32']
-
-ALIGNMENTS = {
- 'bounce': BOUNCE_ALIGNMENTS,
- 'memchr': SINGLE_BUFFER_ALIGNMENTS,
- 'memset': SINGLE_BUFFER_ALIGNMENTS,
- 'strchr': SINGLE_BUFFER_ALIGNMENTS,
- 'strlen': SINGLE_BUFFER_ALIGNMENTS,
- 'memcmp': DUAL_BUFFER_ALIGNMENTS,
- 'memcpy': DUAL_BUFFER_ALIGNMENTS,
- 'strcmp': DUAL_BUFFER_ALIGNMENTS,
- 'strcpy': DUAL_BUFFER_ALIGNMENTS,
-}
-
-VARIANTS = sorted(HAS.keys())
-FUNCTIONS = sorted(ALIGNMENTS.keys())
-
-NUM_RUNS = 5
-
-def run(cache, variant, function, bytes, loops, alignment, run_id, quiet=False):
- """Perform a single run, exercising the cache as appropriate."""
- key = ':'.join('%s' % x for x in (variant, function, bytes, loops, alignment, run_id))
-
- if key in cache:
- got = cache[key]
- else:
- xbuild = build
- cmd = '%(xbuild)s%(variant)s -t %(function)s -c %(bytes)s -l %(loops)s -a %(alignment)s -r %(run_id)s' % locals()
-
- try:
- got = subprocess.check_output(cmd.split()).strip()
- except OSError, ex:
- assert False, 'Error %s while running %s' % (ex, cmd)
-
- parts = got.split(':')
- took = float(parts[7])
-
- cache[key] = got
-
- if not quiet:
- print got
- sys.stdout.flush()
-
- return took
-
-def run_many(cache, variants, bytes, all_functions):
- # We want the data to come out in a useful order. So fix an
- # alignment and function, and do all sizes for a variant first
- bytes = sorted(bytes)
- mid = bytes[int(len(bytes)/1.5)]
-
- if not all_functions:
- # Use the ordering in 'this' as the default
- all_functions = HAS['this'].split()
-
- # Find all other functions
- for functions in HAS.values():
- for function in functions.split():
- if function not in all_functions:
- all_functions.append(function)
-
- for function in all_functions:
- for alignment in ALIGNMENTS[function]:
- for variant in variants:
- if function not in HAS[variant].split():
- continue
-
- # Run a tracer through and see how long it takes and
- # adjust the number of loops based on that. Not great
- # for memchr() and similar which are O(n), but it will
- # do
- f = 50000000
- want = 5.0
-
- loops = int(f / math.sqrt(max(1, mid)))
- took = run(cache, variant, function, mid, loops, alignment, 0,
- quiet=True)
- # Keep it reasonable for silly routines like bounce
- factor = min(20, max(0.05, want/took))
- f = f * factor
-
- # Round f to a few significant figures
- scale = 10**int(math.log10(f) - 1)
- f = scale*int(f/scale)
-
- for b in sorted(bytes):
- # Figure out the number of loops to give a roughly consistent run
- loops = int(f / math.sqrt(max(1, b)))
- for run_id in range(0, NUM_RUNS):
- run(cache, variant, function, b, loops, alignment,
- run_id)
-
-def run_top(cache):
- parser = argparse.ArgumentParser()
- parser.add_argument("-v", "--variants", nargs="+", help="library variant to run (run all if not specified)", default = VARIANTS, choices = VARIANTS)
- parser.add_argument("-f", "--functions", nargs="+", help="function to run (run all if not specified)", default = FUNCTIONS, choices = FUNCTIONS)
- parser.add_argument("-l", "--limit", type=int, help="upper limit to test to (in bytes)", default = 512*1024)
- args = parser.parse_args()
-
- # Test all powers of 2
- step1 = 2.0
- # Test intermediate powers of 1.4
- step2 = 1.4
-
- bytes = []
-
- for step in [step1, step2]:
- if step:
- # Figure out how many steps get us up to the top
- steps = int(round(math.log(args.limit) / math.log(step)))
- bytes.extend([int(step**x) for x in range(0, steps+1)])
-
- run_many(cache, args.variants, bytes, args.functions)
-
-def main():
- cachename = 'cache.txt'
-
- cache = {}
-
- try:
- with open(cachename) as f:
- for line in f:
- line = line.strip()
- parts = line.split(':')
- cache[':'.join(parts[:7])] = line
- except:
- pass
-
- try:
- run_top(cache)
- finally:
- with open(cachename, 'w') as f:
- for line in sorted(cache.values()):
- print >> f, line
-
-if __name__ == '__main__':
- main()
diff --git a/contrib/cortex-strings/scripts/fixup.py b/contrib/cortex-strings/scripts/fixup.py
deleted file mode 100644
--- a/contrib/cortex-strings/scripts/fixup.py
+++ /dev/null
@@ -1,27 +0,0 @@
-"""Simple script that enables target specific blocks based on the first argument.
-
-Matches comment blocks like this:
-
-/* For Foo: abc
-def
-*/
-
-and de-comments them giving:
-abc
-def
-"""
-import re
-import sys
-
-def main():
- key = sys.argv[1]
- expr = re.compile(r'/\* For %s:\s([^*]+)\*/' % key, re.M)
-
- for arg in sys.argv[2:]:
- with open(arg) as f:
- body = f.read()
- with open(arg, 'w') as f:
- f.write(expr.sub(r'\1', body))
-
-if __name__ == '__main__':
- main()
diff --git a/contrib/cortex-strings/scripts/libplot.py b/contrib/cortex-strings/scripts/libplot.py
deleted file mode 100644
--- a/contrib/cortex-strings/scripts/libplot.py
+++ /dev/null
@@ -1,78 +0,0 @@
-"""Shared routines for the plotters."""
-
-import fileinput
-import collections
-
-Record = collections.namedtuple('Record', 'variant function bytes loops src_alignment dst_alignment run_id elapsed rest')
-
-
-def make_colours():
- return iter('m b g r c y k pink orange brown grey'.split())
-
-def parse_value(v):
- """Turn text into a primitive"""
- try:
- if '.' in v:
- return float(v)
- else:
- return int(v)
- except ValueError:
- return v
-
-def create_column_tuple(record, names):
- cols = [getattr(record, name) for name in names]
- return tuple(cols)
-
-def unique(records, name, prefer=''):
- """Return the unique values of a column in the records"""
- if type(name) == tuple:
- values = list(set(create_column_tuple(x, name) for x in records))
- else:
- values = list(set(getattr(x, name) for x in records))
-
- if not values:
- return values
- elif type(values[0]) == str:
- return sorted(values, key=lambda x: '%-06d|%s' % (-prefer.find(x), x))
- else:
- return sorted(values)
-
-def alignments_equal(alignments):
- for alignment in alignments:
- if alignment[0] != alignment[1]:
- return False
- return True
-
-def parse_row(line):
- return Record(*[parse_value(y) for y in line.split(':')])
-
-def parse():
- """Parse a record file into named tuples, correcting for loop
- overhead along the way.
- """
- records = [parse_row(x) for x in fileinput.input()]
-
- # Pull out any bounce values
- costs = {}
-
- for record in [x for x in records if x.function=='bounce']:
- costs[(record.bytes, record.loops)] = record.elapsed
-
- # Fix up all of the records for cost
- out = []
-
- for record in records:
- if record.function == 'bounce':
- continue
-
- cost = costs.get((record.bytes, record.loops), None)
-
- if not cost:
- out.append(record)
- else:
- # Unfortunately you can't update a namedtuple...
- values = list(record)
- values[-2] -= cost
- out.append(Record(*values))
-
- return out
diff --git a/contrib/cortex-strings/scripts/plot-align.py b/contrib/cortex-strings/scripts/plot-align.py
deleted file mode 100644
--- a/contrib/cortex-strings/scripts/plot-align.py
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/usr/bin/env python
-
-"""Plot the performance of different variants of one routine versus alignment.
-"""
-
-import libplot
-
-import pylab
-
-
-def plot(records, bytes, function):
- records = [x for x in records if x.bytes==bytes and x.function==function]
-
- variants = libplot.unique(records, 'variant', prefer='this')
- alignments = libplot.unique(records, ('src_alignment', 'dst_alignment'))
-
- X = pylab.arange(len(alignments))
- width = 1.0/(len(variants)+1)
-
- colours = libplot.make_colours()
-
- pylab.figure(1).set_size_inches((16, 12))
- pylab.clf()
-
- for i, variant in enumerate(variants):
- heights = []
-
- for alignment in alignments:
- matches = [x for x in records if x.variant==variant and x.src_alignment==alignment[0] and x.dst_alignment==alignment[1]]
-
- if matches:
- vals = [match.bytes*match.loops/match.elapsed/(1024*1024) for
- match in matches]
- mean = sum(vals)/len(vals)
- heights.append(mean)
- else:
- heights.append(0)
-
- pylab.bar(X+i*width, heights, width, color=colours.next(), label=variant)
-
-
- axes = pylab.axes()
- if libplot.alignments_equal(alignments):
- alignment_labels = ["%s" % x[0] for x in alignments]
- else:
- alignment_labels = ["%s:%s" % (x[0], x[1]) for x in alignments]
- axes.set_xticklabels(alignment_labels)
- axes.set_xticks(X + 0.5)
-
- pylab.title('Performance of different variants of %(function)s for %(bytes)d byte blocks' % locals())
- pylab.xlabel('Alignment')
- pylab.ylabel('Rate (MB/s)')
- pylab.legend(loc='lower right', ncol=3)
- pylab.grid()
- pylab.savefig('alignment-%(function)s-%(bytes)d.png' % locals(), dpi=72)
-
-def main():
- records = libplot.parse()
-
- for function in libplot.unique(records, 'function'):
- for bytes in libplot.unique(records, 'bytes'):
- plot(records, bytes, function)
-
- pylab.show()
-
-if __name__ == '__main__':
- main()
diff --git a/contrib/cortex-strings/scripts/plot-sizes.py b/contrib/cortex-strings/scripts/plot-sizes.py
deleted file mode 100644
--- a/contrib/cortex-strings/scripts/plot-sizes.py
+++ /dev/null
@@ -1,120 +0,0 @@
-#!/usr/bin/env python
-
-"""Plot the performance for different block sizes of one function across
-variants.
-"""
-
-import libplot
-
-import pylab
-import pdb
-import math
-
-def pretty_kb(v):
- if v < 1024:
- return '%d' % v
- else:
- if v % 1024 == 0:
- return '%d k' % (v//1024)
- else:
- return '%.1f k' % (v/1024)
-
-def plot(records, function, alignment=None, scale=1):
- variants = libplot.unique(records, 'variant', prefer='this')
- records = [x for x in records if x.function==function]
-
- if alignment != None:
- records = [x for x in records if x.src_alignment==alignment[0] and
- x.dst_alignment==alignment[1]]
-
- alignments = libplot.unique(records, ('src_alignment', 'dst_alignment'))
- if len(alignments) != 1:
- return False
- if libplot.alignments_equal(alignments):
- aalignment = alignments[0][0]
- else:
- aalignment = "%s:%s" % (alignments[0][0], alignments[0][1])
-
- bytes = libplot.unique(records, 'bytes')[0]
-
- colours = libplot.make_colours()
- all_x = []
-
- pylab.figure(1).set_size_inches((6.4*scale, 4.8*scale))
- pylab.clf()
-
- if 'str' in function:
- # The harness fills out to 16k. Anything past that is an
- # early match
- top = 16384
- else:
- top = 2**31
-
- for variant in variants:
- matches = [x for x in records if x.variant==variant and x.bytes <= top]
- matches.sort(key=lambda x: x.bytes)
-
- X = sorted(list(set([x.bytes for x in matches])))
- Y = []
- Yerr = []
- for xbytes in X:
- vals = [x.bytes*x.loops/x.elapsed/(1024*1024) for x in matches if x.bytes == xbytes]
- if len(vals) > 1:
- mean = sum(vals)/len(vals)
- Y.append(mean)
- if len(Yerr) == 0:
- Yerr = [[], []]
- err1 = max(vals) - mean
- assert err1 >= 0
- err2 = min(vals) - mean
- assert err2 <= 0
- Yerr[0].append(abs(err2))
- Yerr[1].append(err1)
- else:
- Y.append(vals[0])
-
- all_x.extend(X)
- colour = colours.next()
-
- if X:
- pylab.plot(X, Y, c=colour)
- if len(Yerr) > 0:
- pylab.errorbar(X, Y, yerr=Yerr, c=colour, label=variant, fmt='o')
- else:
- pylab.scatter(X, Y, c=colour, label=variant, edgecolors='none')
-
- pylab.legend(loc='upper left', ncol=3, prop={'size': 'small'})
- pylab.grid()
- pylab.title('%(function)s of %(aalignment)s byte aligned blocks' % locals())
- pylab.xlabel('Size (B)')
- pylab.ylabel('Rate (MB/s)')
-
- # Figure out how high the range goes
- top = max(all_x)
-
- power = int(round(math.log(max(all_x)) / math.log(2)))
-
- pylab.semilogx()
-
- pylab.axes().set_xticks([2**x for x in range(0, power+1)])
- pylab.axes().set_xticklabels([pretty_kb(2**x) for x in range(0, power+1)])
- pylab.xlim(0, top)
- pylab.ylim(0, pylab.ylim()[1])
- return True
-
-def main():
- records = libplot.parse()
-
- functions = libplot.unique(records, 'function')
- alignments = libplot.unique(records, ('src_alignment', 'dst_alignment'))
-
- for function in functions:
- for alignment in alignments:
- for scale in [1, 2.5]:
- if plot(records, function, alignment, scale):
- pylab.savefig('sizes-%s-%02d-%02d-%.1f.png' % (function, alignment[0], alignment[1], scale), dpi=72)
-
- pylab.show()
-
-if __name__ == '__main__':
- main()
diff --git a/contrib/cortex-strings/scripts/plot-top.py b/contrib/cortex-strings/scripts/plot-top.py
deleted file mode 100644
--- a/contrib/cortex-strings/scripts/plot-top.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python
-
-"""Plot the performance of different variants of the string routines
-for one size.
-"""
-
-import libplot
-
-import pylab
-
-
-def plot(records, bytes):
- records = [x for x in records if x.bytes==bytes]
-
- variants = libplot.unique(records, 'variant', prefer='this')
- functions = libplot.unique(records, 'function')
-
- X = pylab.arange(len(functions))
- width = 1.0/(len(variants)+1)
-
- colours = libplot.make_colours()
-
- pylab.figure(1).set_size_inches((16, 12))
- pylab.clf()
-
- for i, variant in enumerate(variants):
- heights = []
-
- for function in functions:
- matches = [x for x in records if x.variant==variant and x.function==function and x.src_alignment==8]
-
- if matches:
- vals = [match.bytes*match.loops/match.elapsed/(1024*1024) for
- match in matches]
- mean = sum(vals)/len(vals)
- heights.append(mean)
- else:
- heights.append(0)
-
- pylab.bar(X+i*width, heights, width, color=colours.next(), label=variant)
-
- axes = pylab.axes()
- axes.set_xticklabels(functions)
- axes.set_xticks(X + 0.5)
-
- pylab.title('Performance of different variants for %d byte blocks' % bytes)
- pylab.ylabel('Rate (MB/s)')
- pylab.legend(loc='upper left', ncol=3)
- pylab.grid()
- pylab.savefig('top-%06d.png' % bytes, dpi=72)
-
-def main():
- records = libplot.parse()
-
- for bytes in libplot.unique(records, 'bytes'):
- plot(records, bytes)
-
- pylab.show()
-
-if __name__ == '__main__':
- main()
diff --git a/contrib/cortex-strings/scripts/plot.py b/contrib/cortex-strings/scripts/plot.py
deleted file mode 100644
--- a/contrib/cortex-strings/scripts/plot.py
+++ /dev/null
@@ -1,123 +0,0 @@
-"""Plot the results for each test. Spits out a set of images into the
-current directory.
-"""
-
-import libplot
-
-import fileinput
-import collections
-import pprint
-
-import pylab
-
-Record = collections.namedtuple('Record', 'variant test size loops src_alignment dst_alignment run_id rawtime comment time bytes rate')
-
-def unique(rows, name):
- """Takes a list of values, pulls out the named field, and returns
- a list of the unique values of this field.
- """
- return sorted(set(getattr(x, name) for x in rows))
-
-def to_float(v):
- """Convert a string into a better type.
-
- >>> to_float('foo')
- 'foo'
- >>> to_float('1.23')
- 1.23
- >>> to_float('45')
- 45
- """
- try:
- if '.' in v:
- return float(v)
- else:
- return int(v)
- except:
- return v
-
-def parse():
- # Split the input up
- rows = [x.strip().split(':') for x in fileinput.input()]
- # Automatically turn numbers into the base type
- rows = [[to_float(y) for y in x] for x in rows]
-
- # Scan once to calculate the overhead
- r = [Record(*(x + [0, 0, 0])) for x in rows]
- bounces = pylab.array([(x.loops, x.rawtime) for x in r if x.test == 'bounce'])
- fit = pylab.polyfit(bounces[:,0], bounces[:,1], 1)
-
- records = []
-
- for row in rows:
- # Make a dummy record so we can use the names
- r1 = Record(*(row + [0, 0, 0]))
-
- bytes = r1.size * r1.loops
- # Calculate the bounce time
- delta = pylab.polyval(fit, [r1.loops])
- time = r1.rawtime - delta
- rate = bytes / time
-
- records.append(Record(*(row + [time, bytes, rate])))
-
- return records
-
-def plot(records, field, scale, ylabel):
- variants = unique(records, 'variant')
- tests = unique(records, 'test')
-
- colours = libplot.make_colours()
-
- # A little hack. We want the 'all' record to be drawn last so
- # that it's obvious on the graph. Assume that no tests come
- # before it alphabetically
- variants.reverse()
-
- for test in tests:
- for variant in variants:
- v = [x for x in records if x.test==test and x.variant==variant]
- v.sort(key=lambda x: x.size)
- V = pylab.array([(x.size, getattr(x, field)) for x in v])
-
- # Ensure our results appear
- order = 1 if variant == 'this' else 0
-
- try:
- # A little hack. We want the 'all' to be obvious on
- # the graph
- if variant == 'all':
- pylab.scatter(V[:,0], V[:,1]/scale, label=variant)
- pylab.plot(V[:,0], V[:,1]/scale)
- else:
- pylab.plot(V[:,0], V[:,1]/scale, label=variant,
- zorder=order, c = colours.next())
-
- except Exception, ex:
- # michaelh1 likes to run this script while the test is
- # still running which can lead to bad data
- print ex, 'on %s of %s' % (variant, test)
-
- pylab.legend(loc='lower right', ncol=2, prop={'size': 'small'})
- pylab.xlabel('Block size (B)')
- pylab.ylabel(ylabel)
- pylab.title('%s %s' % (test, field))
- pylab.grid()
-
- pylab.savefig('%s-%s.png' % (test, field), dpi=100)
- pylab.semilogx(basex=2)
- pylab.savefig('%s-%s-semilog.png' % (test, field), dpi=100)
- pylab.clf()
-
-def test():
- import doctest
- doctest.testmod()
-
-def main():
- records = parse()
-
- plot(records, 'rate', 1024**2, 'Rate (MB/s)')
- plot(records, 'time', 1, 'Total time (s)')
-
-if __name__ == '__main__':
- main()
diff --git a/contrib/cortex-strings/scripts/trim.sh b/contrib/cortex-strings/scripts/trim.sh
deleted file mode 100755
--- a/contrib/cortex-strings/scripts/trim.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-#
-# Trims the whitespace from around any given images
-#
-
-for i in $@; do
- convert $i -bordercolor white -border 1x1 -trim +repage -alpha off +dither -colors 32 PNG8:next-$i
- mv next-$i $i
-done
diff --git a/contrib/cortex-strings/src/aarch64/memchr.S b/contrib/cortex-strings/src/aarch64/memchr.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/aarch64/memchr.S
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * memchr - find a character in a memory zone
- *
- * Copyright (c) 2014, ARM Limited
- * All rights Reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of the company nor the names of its contributors
- * may be used to endorse or promote products derived from this
- * software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64
- * Neon Available.
- */
-
-/* Arguments and results. */
-#define srcin x0
-#define chrin w1
-#define cntin x2
-
-#define result x0
-
-#define src x3
-#define tmp x4
-#define wtmp2 w5
-#define synd x6
-#define soff x9
-#define cntrem x10
-
-#define vrepchr v0
-#define vdata1 v1
-#define vdata2 v2
-#define vhas_chr1 v3
-#define vhas_chr2 v4
-#define vrepmask v5
-#define vend v6
-
-/*
- * Core algorithm:
- *
- * For each 32-byte chunk we calculate a 64-bit syndrome value, with two bits
- * per byte. For each tuple, bit 0 is set if the relevant byte matched the
- * requested character and bit 1 is not used (faster than using a 32bit
- * syndrome). Since the bits in the syndrome reflect exactly the order in which
- * things occur in the original string, counting trailing zeros allows to
- * identify exactly which byte has matched.
- */
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-def_fn memchr
- /* Do not dereference srcin if no bytes to compare. */
- cbz cntin, .Lzero_length
- /*
- * Magic constant 0x40100401 allows us to identify which lane matches
- * the requested byte.
- */
- mov wtmp2, #0x0401
- movk wtmp2, #0x4010, lsl #16
- dup vrepchr.16b, chrin
- /* Work with aligned 32-byte chunks */
- bic src, srcin, #31
- dup vrepmask.4s, wtmp2
- ands soff, srcin, #31
- and cntrem, cntin, #31
- b.eq .Lloop
-
- /*
- * Input string is not 32-byte aligned. We calculate the syndrome
- * value for the aligned 32 bytes block containing the first bytes
- * and mask the irrelevant part.
- */
-
- ld1 {vdata1.16b, vdata2.16b}, [src], #32
- sub tmp, soff, #32
- adds cntin, cntin, tmp
- cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
- cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
- and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
- and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
- addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
- addp vend.16b, vend.16b, vend.16b /* 128->64 */
- mov synd, vend.d[0]
- /* Clear the soff*2 lower bits */
- lsl tmp, soff, #1
- lsr synd, synd, tmp
- lsl synd, synd, tmp
- /* The first block can also be the last */
- b.ls .Lmasklast
- /* Have we found something already? */
- cbnz synd, .Ltail
-
-.Lloop:
- ld1 {vdata1.16b, vdata2.16b}, [src], #32
- subs cntin, cntin, #32
- cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
- cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
- /* If we're out of data we finish regardless of the result */
- b.ls .Lend
- /* Use a fast check for the termination condition */
- orr vend.16b, vhas_chr1.16b, vhas_chr2.16b
- addp vend.2d, vend.2d, vend.2d
- mov synd, vend.d[0]
- /* We're not out of data, loop if we haven't found the character */
- cbz synd, .Lloop
-
-.Lend:
- /* Termination condition found, let's calculate the syndrome value */
- and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
- and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
- addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
- addp vend.16b, vend.16b, vend.16b /* 128->64 */
- mov synd, vend.d[0]
- /* Only do the clear for the last possible block */
- b.hi .Ltail
-
-.Lmasklast:
- /* Clear the (32 - ((cntrem + soff) % 32)) * 2 upper bits */
- add tmp, cntrem, soff
- and tmp, tmp, #31
- sub tmp, tmp, #32
- neg tmp, tmp, lsl #1
- lsl synd, synd, tmp
- lsr synd, synd, tmp
-
-.Ltail:
- /* Count the trailing zeros using bit reversing */
- rbit synd, synd
- /* Compensate the last post-increment */
- sub src, src, #32
- /* Check that we have found a character */
- cmp synd, #0
- /* And count the leading zeros */
- clz synd, synd
- /* Compute the potential result */
- add result, src, synd, lsr #1
- /* Select result or NULL */
- csel result, xzr, result, eq
- ret
-
-.Lzero_length:
- mov result, #0
- ret
-
- .size memchr, . - memchr
diff --git a/contrib/cortex-strings/src/aarch64/memcmp.S b/contrib/cortex-strings/src/aarch64/memcmp.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/aarch64/memcmp.S
+++ /dev/null
@@ -1,162 +0,0 @@
-/* memcmp - compare memory
-
- Copyright (c) 2013, Linaro Limited
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the Linaro nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64
- */
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-/* Parameters and result. */
-#define src1 x0
-#define src2 x1
-#define limit x2
-#define result x0
-
-/* Internal variables. */
-#define data1 x3
-#define data1w w3
-#define data2 x4
-#define data2w w4
-#define has_nul x5
-#define diff x6
-#define endloop x7
-#define tmp1 x8
-#define tmp2 x9
-#define tmp3 x10
-#define pos x11
-#define limit_wd x12
-#define mask x13
-
-def_fn memcmp p2align=6
- cbz limit, .Lret0
- eor tmp1, src1, src2
- tst tmp1, #7
- b.ne .Lmisaligned8
- ands tmp1, src1, #7
- b.ne .Lmutual_align
- add limit_wd, limit, #7
- lsr limit_wd, limit_wd, #3
- /* Start of performance-critical section -- one 64B cache line. */
-.Lloop_aligned:
- ldr data1, [src1], #8
- ldr data2, [src2], #8
-.Lstart_realigned:
- subs limit_wd, limit_wd, #1
- eor diff, data1, data2 /* Non-zero if differences found. */
- csinv endloop, diff, xzr, ne /* Last Dword or differences. */
- cbz endloop, .Lloop_aligned
- /* End of performance-critical section -- one 64B cache line. */
-
- /* Not reached the limit, must have found a diff. */
- cbnz limit_wd, .Lnot_limit
-
- /* Limit % 8 == 0 => all bytes significant. */
- ands limit, limit, #7
- b.eq .Lnot_limit
-
- lsl limit, limit, #3 /* Bits -> bytes. */
- mov mask, #~0
-#ifdef __AARCH64EB__
- lsr mask, mask, limit
-#else
- lsl mask, mask, limit
-#endif
- bic data1, data1, mask
- bic data2, data2, mask
-
- orr diff, diff, mask
-.Lnot_limit:
-
-#ifndef __AARCH64EB__
- rev diff, diff
- rev data1, data1
- rev data2, data2
-#endif
- /* The MS-non-zero bit of DIFF marks either the first bit
- that is different, or the end of the significant data.
- Shifting left now will bring the critical information into the
- top bits. */
- clz pos, diff
- lsl data1, data1, pos
- lsl data2, data2, pos
- /* But we need to zero-extend (char is unsigned) the value and then
- perform a signed 32-bit subtraction. */
- lsr data1, data1, #56
- sub result, data1, data2, lsr #56
- ret
-
-.Lmutual_align:
- /* Sources are mutually aligned, but are not currently at an
- alignment boundary. Round down the addresses and then mask off
- the bytes that precede the start point. */
- bic src1, src1, #7
- bic src2, src2, #7
- add limit, limit, tmp1 /* Adjust the limit for the extra. */
- lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
- ldr data1, [src1], #8
- neg tmp1, tmp1 /* Bits to alignment -64. */
- ldr data2, [src2], #8
- mov tmp2, #~0
-#ifdef __AARCH64EB__
- /* Big-endian. Early bytes are at MSB. */
- lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
-#else
- /* Little-endian. Early bytes are at LSB. */
- lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
-#endif
- add limit_wd, limit, #7
- orr data1, data1, tmp2
- orr data2, data2, tmp2
- lsr limit_wd, limit_wd, #3
- b .Lstart_realigned
-
-.Lret0:
- mov result, #0
- ret
-
- .p2align 6
-.Lmisaligned8:
- sub limit, limit, #1
-1:
- /* Perhaps we can do better than this. */
- ldrb data1w, [src1], #1
- ldrb data2w, [src2], #1
- subs limit, limit, #1
- ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
- b.eq 1b
- sub result, data1, data2
- ret
- .size memcmp, . - memcmp
diff --git a/contrib/cortex-strings/src/aarch64/memcpy.S b/contrib/cortex-strings/src/aarch64/memcpy.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/aarch64/memcpy.S
+++ /dev/null
@@ -1,225 +0,0 @@
-/* Copyright (c) 2012, Linaro Limited
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the Linaro nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
-
-/*
- * Copyright (c) 2015 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- * products derived from this software without specific prior written
- * permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64, unaligned accesses.
- *
- */
-
-#define dstin x0
-#define src x1
-#define count x2
-#define dst x3
-#define srcend x4
-#define dstend x5
-#define A_l x6
-#define A_lw w6
-#define A_h x7
-#define A_hw w7
-#define B_l x8
-#define B_lw w8
-#define B_h x9
-#define C_l x10
-#define C_h x11
-#define D_l x12
-#define D_h x13
-#define E_l src
-#define E_h count
-#define F_l dst
-#define F_h srcend
-#define tmp1 x9
-
-#define L(l) .L ## l
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-/* Copies are split into 3 main cases: small copies of up to 16 bytes,
- medium copies of 17..96 bytes which are fully unrolled. Large copies
- of more than 96 bytes align the destination and use an unrolled loop
- processing 64 bytes per iteration.
- Small and medium copies read all data before writing, allowing any
- kind of overlap, and memmove tailcalls memcpy for these cases as
- well as non-overlapping copies.
-*/
-
-def_fn memcpy p2align=6
- prfm PLDL1KEEP, [src]
- add srcend, src, count
- add dstend, dstin, count
- cmp count, 16
- b.ls L(copy16)
- cmp count, 96
- b.hi L(copy_long)
-
- /* Medium copies: 17..96 bytes. */
- sub tmp1, count, 1
- ldp A_l, A_h, [src]
- tbnz tmp1, 6, L(copy96)
- ldp D_l, D_h, [srcend, -16]
- tbz tmp1, 5, 1f
- ldp B_l, B_h, [src, 16]
- ldp C_l, C_h, [srcend, -32]
- stp B_l, B_h, [dstin, 16]
- stp C_l, C_h, [dstend, -32]
-1:
- stp A_l, A_h, [dstin]
- stp D_l, D_h, [dstend, -16]
- ret
-
- .p2align 4
- /* Small copies: 0..16 bytes. */
-L(copy16):
- cmp count, 8
- b.lo 1f
- ldr A_l, [src]
- ldr A_h, [srcend, -8]
- str A_l, [dstin]
- str A_h, [dstend, -8]
- ret
- .p2align 4
-1:
- tbz count, 2, 1f
- ldr A_lw, [src]
- ldr A_hw, [srcend, -4]
- str A_lw, [dstin]
- str A_hw, [dstend, -4]
- ret
-
- /* Copy 0..3 bytes. Use a branchless sequence that copies the same
- byte 3 times if count==1, or the 2nd byte twice if count==2. */
-1:
- cbz count, 2f
- lsr tmp1, count, 1
- ldrb A_lw, [src]
- ldrb A_hw, [srcend, -1]
- ldrb B_lw, [src, tmp1]
- strb A_lw, [dstin]
- strb B_lw, [dstin, tmp1]
- strb A_hw, [dstend, -1]
-2: ret
-
- .p2align 4
- /* Copy 64..96 bytes. Copy 64 bytes from the start and
- 32 bytes from the end. */
-L(copy96):
- ldp B_l, B_h, [src, 16]
- ldp C_l, C_h, [src, 32]
- ldp D_l, D_h, [src, 48]
- ldp E_l, E_h, [srcend, -32]
- ldp F_l, F_h, [srcend, -16]
- stp A_l, A_h, [dstin]
- stp B_l, B_h, [dstin, 16]
- stp C_l, C_h, [dstin, 32]
- stp D_l, D_h, [dstin, 48]
- stp E_l, E_h, [dstend, -32]
- stp F_l, F_h, [dstend, -16]
- ret
-
- /* Align DST to 16 byte alignment so that we don't cross cache line
- boundaries on both loads and stores. There are at least 96 bytes
- to copy, so copy 16 bytes unaligned and then align. The loop
- copies 64 bytes per iteration and prefetches one iteration ahead. */
-
- .p2align 4
-L(copy_long):
- and tmp1, dstin, 15
- bic dst, dstin, 15
- ldp D_l, D_h, [src]
- sub src, src, tmp1
- add count, count, tmp1 /* Count is now 16 too large. */
- ldp A_l, A_h, [src, 16]
- stp D_l, D_h, [dstin]
- ldp B_l, B_h, [src, 32]
- ldp C_l, C_h, [src, 48]
- ldp D_l, D_h, [src, 64]!
- subs count, count, 128 + 16 /* Test and readjust count. */
- b.ls 2f
-1:
- stp A_l, A_h, [dst, 16]
- ldp A_l, A_h, [src, 16]
- stp B_l, B_h, [dst, 32]
- ldp B_l, B_h, [src, 32]
- stp C_l, C_h, [dst, 48]
- ldp C_l, C_h, [src, 48]
- stp D_l, D_h, [dst, 64]!
- ldp D_l, D_h, [src, 64]!
- subs count, count, 64
- b.hi 1b
-
- /* Write the last full set of 64 bytes. The remainder is at most 64
- bytes, so it is safe to always copy 64 bytes from the end even if
- there is just 1 byte left. */
-2:
- ldp E_l, E_h, [srcend, -64]
- stp A_l, A_h, [dst, 16]
- ldp A_l, A_h, [srcend, -48]
- stp B_l, B_h, [dst, 32]
- ldp B_l, B_h, [srcend, -32]
- stp C_l, C_h, [dst, 48]
- ldp C_l, C_h, [srcend, -16]
- stp D_l, D_h, [dst, 64]
- stp E_l, E_h, [dstend, -64]
- stp A_l, A_h, [dstend, -48]
- stp B_l, B_h, [dstend, -32]
- stp C_l, C_h, [dstend, -16]
- ret
-
- .size memcpy, . - memcpy
diff --git a/contrib/cortex-strings/src/aarch64/memmove.S b/contrib/cortex-strings/src/aarch64/memmove.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/aarch64/memmove.S
+++ /dev/null
@@ -1,150 +0,0 @@
-/* Copyright (c) 2013, Linaro Limited
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the Linaro nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
-
-/*
- * Copyright (c) 2015 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- * products derived from this software without specific prior written
- * permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64, unaligned accesses
- */
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-/* Parameters and result. */
-#define dstin x0
-#define src x1
-#define count x2
-#define srcend x3
-#define dstend x4
-#define tmp1 x5
-#define A_l x6
-#define A_h x7
-#define B_l x8
-#define B_h x9
-#define C_l x10
-#define C_h x11
-#define D_l x12
-#define D_h x13
-#define E_l count
-#define E_h tmp1
-
-/* All memmoves up to 96 bytes are done by memcpy as it supports overlaps.
- Larger backwards copies are also handled by memcpy. The only remaining
- case is forward large copies. The destination is aligned, and an
- unrolled loop processes 64 bytes per iteration.
-*/
-
-def_fn memmove, 6
- sub tmp1, dstin, src
- cmp count, 96
- ccmp tmp1, count, 2, hi
- b.hs memcpy
-
- cbz tmp1, 3f
- add dstend, dstin, count
- add srcend, src, count
-
- /* Align dstend to 16 byte alignment so that we don't cross cache line
- boundaries on both loads and stores. There are at least 96 bytes
- to copy, so copy 16 bytes unaligned and then align. The loop
- copies 64 bytes per iteration and prefetches one iteration ahead. */
-
- and tmp1, dstend, 15
- ldp D_l, D_h, [srcend, -16]
- sub srcend, srcend, tmp1
- sub count, count, tmp1
- ldp A_l, A_h, [srcend, -16]
- stp D_l, D_h, [dstend, -16]
- ldp B_l, B_h, [srcend, -32]
- ldp C_l, C_h, [srcend, -48]
- ldp D_l, D_h, [srcend, -64]!
- sub dstend, dstend, tmp1
- subs count, count, 128
- b.ls 2f
- nop
-1:
- stp A_l, A_h, [dstend, -16]
- ldp A_l, A_h, [srcend, -16]
- stp B_l, B_h, [dstend, -32]
- ldp B_l, B_h, [srcend, -32]
- stp C_l, C_h, [dstend, -48]
- ldp C_l, C_h, [srcend, -48]
- stp D_l, D_h, [dstend, -64]!
- ldp D_l, D_h, [srcend, -64]!
- subs count, count, 64
- b.hi 1b
-
- /* Write the last full set of 64 bytes. The remainder is at most 64
- bytes, so it is safe to always copy 64 bytes from the start even if
- there is just 1 byte left. */
-2:
- ldp E_l, E_h, [src, 48]
- stp A_l, A_h, [dstend, -16]
- ldp A_l, A_h, [src, 32]
- stp B_l, B_h, [dstend, -32]
- ldp B_l, B_h, [src, 16]
- stp C_l, C_h, [dstend, -48]
- ldp C_l, C_h, [src]
- stp D_l, D_h, [dstend, -64]
- stp E_l, E_h, [dstin, 48]
- stp A_l, A_h, [dstin, 32]
- stp B_l, B_h, [dstin, 16]
- stp C_l, C_h, [dstin]
-3: ret
-
- .size memmove, . - memmove
diff --git a/contrib/cortex-strings/src/aarch64/memset.S b/contrib/cortex-strings/src/aarch64/memset.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/aarch64/memset.S
+++ /dev/null
@@ -1,235 +0,0 @@
-/* Copyright (c) 2012, Linaro Limited
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the Linaro nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
-
-/*
- * Copyright (c) 2015 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- * products derived from this software without specific prior written
- * permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64, unaligned accesses
- *
- */
-
-
-#define dstin x0
-#define val x1
-#define valw w1
-#define count x2
-#define dst x3
-#define dstend x4
-#define tmp1 x5
-#define tmp1w w5
-#define tmp2 x6
-#define tmp2w w6
-#define zva_len x7
-#define zva_lenw w7
-
-#define L(l) .L ## l
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-def_fn memset p2align=6
-
- dup v0.16B, valw
- add dstend, dstin, count
-
- cmp count, 96
- b.hi L(set_long)
- cmp count, 16
- b.hs L(set_medium)
- mov val, v0.D[0]
-
- /* Set 0..15 bytes. */
- tbz count, 3, 1f
- str val, [dstin]
- str val, [dstend, -8]
- ret
- nop
-1: tbz count, 2, 2f
- str valw, [dstin]
- str valw, [dstend, -4]
- ret
-2: cbz count, 3f
- strb valw, [dstin]
- tbz count, 1, 3f
- strh valw, [dstend, -2]
-3: ret
-
- /* Set 17..96 bytes. */
-L(set_medium):
- str q0, [dstin]
- tbnz count, 6, L(set96)
- str q0, [dstend, -16]
- tbz count, 5, 1f
- str q0, [dstin, 16]
- str q0, [dstend, -32]
-1: ret
-
- .p2align 4
- /* Set 64..96 bytes. Write 64 bytes from the start and
- 32 bytes from the end. */
-L(set96):
- str q0, [dstin, 16]
- stp q0, q0, [dstin, 32]
- stp q0, q0, [dstend, -32]
- ret
-
- .p2align 3
- nop
-L(set_long):
- and valw, valw, 255
- bic dst, dstin, 15
- str q0, [dstin]
- cmp count, 256
- ccmp valw, 0, 0, cs
- b.eq L(try_zva)
-L(no_zva):
- sub count, dstend, dst /* Count is 16 too large. */
- add dst, dst, 16
- sub count, count, 64 + 16 /* Adjust count and bias for loop. */
-1: stp q0, q0, [dst], 64
- stp q0, q0, [dst, -32]
-L(tail64):
- subs count, count, 64
- b.hi 1b
-2: stp q0, q0, [dstend, -64]
- stp q0, q0, [dstend, -32]
- ret
-
- .p2align 3
-L(try_zva):
- mrs tmp1, dczid_el0
- tbnz tmp1w, 4, L(no_zva)
- and tmp1w, tmp1w, 15
- cmp tmp1w, 4 /* ZVA size is 64 bytes. */
- b.ne L(zva_128)
-
- /* Write the first and last 64 byte aligned block using stp rather
- than using DC ZVA. This is faster on some cores.
- */
-L(zva_64):
- str q0, [dst, 16]
- stp q0, q0, [dst, 32]
- bic dst, dst, 63
- stp q0, q0, [dst, 64]
- stp q0, q0, [dst, 96]
- sub count, dstend, dst /* Count is now 128 too large. */
- sub count, count, 128+64+64 /* Adjust count and bias for loop. */
- add dst, dst, 128
- nop
-1: dc zva, dst
- add dst, dst, 64
- subs count, count, 64
- b.hi 1b
- stp q0, q0, [dst, 0]
- stp q0, q0, [dst, 32]
- stp q0, q0, [dstend, -64]
- stp q0, q0, [dstend, -32]
- ret
-
- .p2align 3
-L(zva_128):
- cmp tmp1w, 5 /* ZVA size is 128 bytes. */
- b.ne L(zva_other)
-
- str q0, [dst, 16]
- stp q0, q0, [dst, 32]
- stp q0, q0, [dst, 64]
- stp q0, q0, [dst, 96]
- bic dst, dst, 127
- sub count, dstend, dst /* Count is now 128 too large. */
- sub count, count, 128+128 /* Adjust count and bias for loop. */
- add dst, dst, 128
-1: dc zva, dst
- add dst, dst, 128
- subs count, count, 128
- b.hi 1b
- stp q0, q0, [dstend, -128]
- stp q0, q0, [dstend, -96]
- stp q0, q0, [dstend, -64]
- stp q0, q0, [dstend, -32]
- ret
-
-L(zva_other):
- mov tmp2w, 4
- lsl zva_lenw, tmp2w, tmp1w
- add tmp1, zva_len, 64 /* Max alignment bytes written. */
- cmp count, tmp1
- blo L(no_zva)
-
- sub tmp2, zva_len, 1
- add tmp1, dst, zva_len
- add dst, dst, 16
- subs count, tmp1, dst /* Actual alignment bytes to write. */
- bic tmp1, tmp1, tmp2 /* Aligned dc zva start address. */
- beq 2f
-1: stp q0, q0, [dst], 64
- stp q0, q0, [dst, -32]
- subs count, count, 64
- b.hi 1b
-2: mov dst, tmp1
- sub count, dstend, tmp1 /* Remaining bytes to write. */
- subs count, count, zva_len
- b.lo 4f
-3: dc zva, dst
- add dst, dst, zva_len
- subs count, count, zva_len
- b.hs 3b
-4: add count, count, zva_len
- b L(tail64)
-
- .size memset, . - memset
diff --git a/contrib/cortex-strings/src/aarch64/strchr.S b/contrib/cortex-strings/src/aarch64/strchr.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/aarch64/strchr.S
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- strchr - find a character in a string
-
- Copyright (c) 2014, ARM Limited
- All rights Reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the company nor the names of its contributors
- may be used to endorse or promote products derived from this
- software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64
- * Neon Available.
- */
-
-/* Arguments and results. */
-#define srcin x0
-#define chrin w1
-
-#define result x0
-
-#define src x2
-#define tmp1 x3
-#define wtmp2 w4
-#define tmp3 x5
-
-#define vrepchr v0
-#define vdata1 v1
-#define vdata2 v2
-#define vhas_nul1 v3
-#define vhas_nul2 v4
-#define vhas_chr1 v5
-#define vhas_chr2 v6
-#define vrepmask_0 v7
-#define vrepmask_c v16
-#define vend1 v17
-#define vend2 v18
-
-/* Core algorithm.
-
- For each 32-byte hunk we calculate a 64-bit syndrome value, with
- two bits per byte (LSB is always in bits 0 and 1, for both big
- and little-endian systems). For each tuple, bit 0 is set iff
- the relevant byte matched the requested character; bit 1 is set
- iff the relevant byte matched the NUL end of string (we trigger
- off bit0 for the special case of looking for NUL). Since the bits
- in the syndrome reflect exactly the order in which things occur
- in the original string a count_trailing_zeros() operation will
- identify exactly which byte is causing the termination, and why. */
-
-/* Locals and temporaries. */
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
- .macro def_alias f a
- .weak \a
- .set \a,\f
- .endm
-
-def_fn strchr
-def_alias strchr index
- /* Magic constant 0x40100401 to allow us to identify which lane
- matches the requested byte. Magic constant 0x80200802 used
- similarly for NUL termination. */
- mov wtmp2, #0x0401
- movk wtmp2, #0x4010, lsl #16
- dup vrepchr.16b, chrin
- bic src, srcin, #31 /* Work with aligned 32-byte hunks. */
- dup vrepmask_c.4s, wtmp2
- ands tmp1, srcin, #31
- add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */
- b.eq .Lloop
-
- /* Input string is not 32-byte aligned. Rather than forcing
- the padding bytes to a safe value, we calculate the syndrome
- for all the bytes, but then mask off those bits of the
- syndrome that are related to the padding. */
- ld1 {vdata1.16b, vdata2.16b}, [src], #32
- neg tmp1, tmp1
- cmeq vhas_nul1.16b, vdata1.16b, #0
- cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
- cmeq vhas_nul2.16b, vdata2.16b, #0
- cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
- and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
- and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
- and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
- and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
- orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
- orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
- lsl tmp1, tmp1, #1
- addp vend1.16b, vend1.16b, vend2.16b // 256->128
- mov tmp3, #~0
- addp vend1.16b, vend1.16b, vend2.16b // 128->64
- lsr tmp1, tmp3, tmp1
-
- mov tmp3, vend1.d[0]
- bic tmp1, tmp3, tmp1 // Mask padding bits.
- cbnz tmp1, .Ltail
-
-.Lloop:
- ld1 {vdata1.16b, vdata2.16b}, [src], #32
- cmeq vhas_nul1.16b, vdata1.16b, #0
- cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
- cmeq vhas_nul2.16b, vdata2.16b, #0
- cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
- /* Use a fast check for the termination condition. */
- orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
- orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
- orr vend1.16b, vend1.16b, vend2.16b
- addp vend1.2d, vend1.2d, vend1.2d
- mov tmp1, vend1.d[0]
- cbz tmp1, .Lloop
-
- /* Termination condition found. Now need to establish exactly why
- we terminated. */
- and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
- and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
- and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
- and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
- orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
- orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
- addp vend1.16b, vend1.16b, vend2.16b // 256->128
- addp vend1.16b, vend1.16b, vend2.16b // 128->64
-
- mov tmp1, vend1.d[0]
-.Ltail:
- /* Count the trailing zeros, by bit reversing... */
- rbit tmp1, tmp1
- /* Re-bias source. */
- sub src, src, #32
- clz tmp1, tmp1 /* And counting the leading zeros. */
- /* Tmp1 is even if the target charager was found first. Otherwise
- we've found the end of string and we weren't looking for NUL. */
- tst tmp1, #1
- add result, src, tmp1, lsr #1
- csel result, result, xzr, eq
- ret
-
- .size strchr, . - strchr
diff --git a/contrib/cortex-strings/src/aarch64/strchrnul.S b/contrib/cortex-strings/src/aarch64/strchrnul.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/aarch64/strchrnul.S
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- strchrnul - find a character or nul in a string
-
- Copyright (c) 2014, ARM Limited
- All rights Reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the company nor the names of its contributors
- may be used to endorse or promote products derived from this
- software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64
- * Neon Available.
- */
-
-/* Arguments and results. */
-#define srcin x0
-#define chrin w1
-
-#define result x0
-
-#define src x2
-#define tmp1 x3
-#define wtmp2 w4
-#define tmp3 x5
-
-#define vrepchr v0
-#define vdata1 v1
-#define vdata2 v2
-#define vhas_nul1 v3
-#define vhas_nul2 v4
-#define vhas_chr1 v5
-#define vhas_chr2 v6
-#define vrepmask v7
-#define vend1 v16
-
-/* Core algorithm.
-
- For each 32-byte hunk we calculate a 64-bit syndrome value, with
- two bits per byte (LSB is always in bits 0 and 1, for both big
- and little-endian systems). For each tuple, bit 0 is set iff
- the relevant byte matched the requested character or nul. Since the
- bits in the syndrome reflect exactly the order in which things occur
- in the original string a count_trailing_zeros() operation will
- identify exactly which byte is causing the termination. */
-
-/* Locals and temporaries. */
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-def_fn strchrnul
- /* Magic constant 0x40100401 to allow us to identify which lane
- matches the termination condition. */
- mov wtmp2, #0x0401
- movk wtmp2, #0x4010, lsl #16
- dup vrepchr.16b, chrin
- bic src, srcin, #31 /* Work with aligned 32-byte hunks. */
- dup vrepmask.4s, wtmp2
- ands tmp1, srcin, #31
- b.eq .Lloop
-
- /* Input string is not 32-byte aligned. Rather than forcing
- the padding bytes to a safe value, we calculate the syndrome
- for all the bytes, but then mask off those bits of the
- syndrome that are related to the padding. */
- ld1 {vdata1.16b, vdata2.16b}, [src], #32
- neg tmp1, tmp1
- cmeq vhas_nul1.16b, vdata1.16b, #0
- cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
- cmeq vhas_nul2.16b, vdata2.16b, #0
- cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
- orr vhas_chr1.16b, vhas_chr1.16b, vhas_nul1.16b
- orr vhas_chr2.16b, vhas_chr2.16b, vhas_nul2.16b
- and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
- and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
- lsl tmp1, tmp1, #1
- addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
- mov tmp3, #~0
- addp vend1.16b, vend1.16b, vend1.16b // 128->64
- lsr tmp1, tmp3, tmp1
-
- mov tmp3, vend1.d[0]
- bic tmp1, tmp3, tmp1 // Mask padding bits.
- cbnz tmp1, .Ltail
-
-.Lloop:
- ld1 {vdata1.16b, vdata2.16b}, [src], #32
- cmeq vhas_nul1.16b, vdata1.16b, #0
- cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
- cmeq vhas_nul2.16b, vdata2.16b, #0
- cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
- /* Use a fast check for the termination condition. */
- orr vhas_chr1.16b, vhas_nul1.16b, vhas_chr1.16b
- orr vhas_chr2.16b, vhas_nul2.16b, vhas_chr2.16b
- orr vend1.16b, vhas_chr1.16b, vhas_chr2.16b
- addp vend1.2d, vend1.2d, vend1.2d
- mov tmp1, vend1.d[0]
- cbz tmp1, .Lloop
-
- /* Termination condition found. Now need to establish exactly why
- we terminated. */
- and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
- and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
- addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
- addp vend1.16b, vend1.16b, vend1.16b // 128->64
-
- mov tmp1, vend1.d[0]
-.Ltail:
- /* Count the trailing zeros, by bit reversing... */
- rbit tmp1, tmp1
- /* Re-bias source. */
- sub src, src, #32
- clz tmp1, tmp1 /* ... and counting the leading zeros. */
- /* tmp1 is twice the offset into the fragment. */
- add result, src, tmp1, lsr #1
- ret
-
- .size strchrnul, . - strchrnul
diff --git a/contrib/cortex-strings/src/aarch64/strcmp.S b/contrib/cortex-strings/src/aarch64/strcmp.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/aarch64/strcmp.S
+++ /dev/null
@@ -1,166 +0,0 @@
-/* Copyright (c) 2012, Linaro Limited
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the Linaro nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64
- */
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-#define REP8_01 0x0101010101010101
-#define REP8_7f 0x7f7f7f7f7f7f7f7f
-#define REP8_80 0x8080808080808080
-
-/* Parameters and result. */
-#define src1 x0
-#define src2 x1
-#define result x0
-
-/* Internal variables. */
-#define data1 x2
-#define data1w w2
-#define data2 x3
-#define data2w w3
-#define has_nul x4
-#define diff x5
-#define syndrome x6
-#define tmp1 x7
-#define tmp2 x8
-#define tmp3 x9
-#define zeroones x10
-#define pos x11
-
- /* Start of performance-critical section -- one 64B cache line. */
-def_fn strcmp p2align=6
- eor tmp1, src1, src2
- mov zeroones, #REP8_01
- tst tmp1, #7
- b.ne .Lmisaligned8
- ands tmp1, src1, #7
- b.ne .Lmutual_align
- /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
- (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
- can be done in parallel across the entire word. */
-.Lloop_aligned:
- ldr data1, [src1], #8
- ldr data2, [src2], #8
-.Lstart_realigned:
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- eor diff, data1, data2 /* Non-zero if differences found. */
- bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
- orr syndrome, diff, has_nul
- cbz syndrome, .Lloop_aligned
- /* End of performance-critical section -- one 64B cache line. */
-
-#ifndef __AARCH64EB__
- rev syndrome, syndrome
- rev data1, data1
- /* The MS-non-zero bit of the syndrome marks either the first bit
- that is different, or the top bit of the first zero byte.
- Shifting left now will bring the critical information into the
- top bits. */
- clz pos, syndrome
- rev data2, data2
- lsl data1, data1, pos
- lsl data2, data2, pos
- /* But we need to zero-extend (char is unsigned) the value and then
- perform a signed 32-bit subtraction. */
- lsr data1, data1, #56
- sub result, data1, data2, lsr #56
- ret
-#else
- /* For big-endian we cannot use the trick with the syndrome value
- as carry-propagation can corrupt the upper bits if the trailing
- bytes in the string contain 0x01. */
- /* However, if there is no NUL byte in the dword, we can generate
- the result directly. We can't just subtract the bytes as the
- MSB might be significant. */
- cbnz has_nul, 1f
- cmp data1, data2
- cset result, ne
- cneg result, result, lo
- ret
-1:
- /* Re-compute the NUL-byte detection, using a byte-reversed value. */
- rev tmp3, data1
- sub tmp1, tmp3, zeroones
- orr tmp2, tmp3, #REP8_7f
- bic has_nul, tmp1, tmp2
- rev has_nul, has_nul
- orr syndrome, diff, has_nul
- clz pos, syndrome
- /* The MS-non-zero bit of the syndrome marks either the first bit
- that is different, or the top bit of the first zero byte.
- Shifting left now will bring the critical information into the
- top bits. */
- lsl data1, data1, pos
- lsl data2, data2, pos
- /* But we need to zero-extend (char is unsigned) the value and then
- perform a signed 32-bit subtraction. */
- lsr data1, data1, #56
- sub result, data1, data2, lsr #56
- ret
-#endif
-
-.Lmutual_align:
- /* Sources are mutually aligned, but are not currently at an
- alignment boundary. Round down the addresses and then mask off
- the bytes that preceed the start point. */
- bic src1, src1, #7
- bic src2, src2, #7
- lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
- ldr data1, [src1], #8
- neg tmp1, tmp1 /* Bits to alignment -64. */
- ldr data2, [src2], #8
- mov tmp2, #~0
-#ifdef __AARCH64EB__
- /* Big-endian. Early bytes are at MSB. */
- lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
-#else
- /* Little-endian. Early bytes are at LSB. */
- lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
-#endif
- orr data1, data1, tmp2
- orr data2, data2, tmp2
- b .Lstart_realigned
-
-.Lmisaligned8:
- /* We can do better than this. */
- ldrb data1w, [src1], #1
- ldrb data2w, [src2], #1
- cmp data1w, #1
- ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
- b.eq .Lmisaligned8
- sub result, data1, data2
- ret
diff --git a/contrib/cortex-strings/src/aarch64/strcpy.S b/contrib/cortex-strings/src/aarch64/strcpy.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/aarch64/strcpy.S
+++ /dev/null
@@ -1,336 +0,0 @@
-/*
- strcpy/stpcpy - copy a string returning pointer to start/end.
-
- Copyright (c) 2013, 2014, 2015 ARM Ltd.
- All Rights Reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the company nor the names of its contributors
- may be used to endorse or promote products derived from this
- software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64, unaligned accesses, min page size 4k.
- */
-
-/* To build as stpcpy, define BUILD_STPCPY before compiling this file.
-
- To test the page crossing code path more thoroughly, compile with
- -DSTRCPY_TEST_PAGE_CROSS - this will force all copies through the slower
- entry path. This option is not intended for production use. */
-
-/* Arguments and results. */
-#define dstin x0
-#define srcin x1
-
-/* Locals and temporaries. */
-#define src x2
-#define dst x3
-#define data1 x4
-#define data1w w4
-#define data2 x5
-#define data2w w5
-#define has_nul1 x6
-#define has_nul2 x7
-#define tmp1 x8
-#define tmp2 x9
-#define tmp3 x10
-#define tmp4 x11
-#define zeroones x12
-#define data1a x13
-#define data2a x14
-#define pos x15
-#define len x16
-#define to_align x17
-
-#ifdef BUILD_STPCPY
-#define STRCPY stpcpy
-#else
-#define STRCPY strcpy
-#endif
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
- /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
- (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
- can be done in parallel across the entire word. */
-
-#define REP8_01 0x0101010101010101
-#define REP8_7f 0x7f7f7f7f7f7f7f7f
-#define REP8_80 0x8080808080808080
-
- /* AArch64 systems have a minimum page size of 4k. We can do a quick
- page size check for crossing this boundary on entry and if we
- do not, then we can short-circuit much of the entry code. We
- expect early page-crossing strings to be rare (probability of
- 16/MIN_PAGE_SIZE ~= 0.4%), so the branch should be quite
- predictable, even with random strings.
-
- We don't bother checking for larger page sizes, the cost of setting
- up the correct page size is just not worth the extra gain from
- a small reduction in the cases taking the slow path. Note that
- we only care about whether the first fetch, which may be
- misaligned, crosses a page boundary - after that we move to aligned
- fetches for the remainder of the string. */
-
-#ifdef STRCPY_TEST_PAGE_CROSS
- /* Make everything that isn't Qword aligned look like a page cross. */
-#define MIN_PAGE_P2 4
-#else
-#define MIN_PAGE_P2 12
-#endif
-
-#define MIN_PAGE_SIZE (1 << MIN_PAGE_P2)
-
-def_fn STRCPY p2align=6
- /* For moderately short strings, the fastest way to do the copy is to
- calculate the length of the string in the same way as strlen, then
- essentially do a memcpy of the result. This avoids the need for
- multiple byte copies and further means that by the time we
- reach the bulk copy loop we know we can always use DWord
- accesses. We expect strcpy to rarely be called repeatedly
- with the same source string, so branch prediction is likely to
- always be difficult - we mitigate against this by preferring
- conditional select operations over branches whenever this is
- feasible. */
- and tmp2, srcin, #(MIN_PAGE_SIZE - 1)
- mov zeroones, #REP8_01
- and to_align, srcin, #15
- cmp tmp2, #(MIN_PAGE_SIZE - 16)
- neg tmp1, to_align
- /* The first fetch will straddle a (possible) page boundary iff
- srcin + 15 causes bit[MIN_PAGE_P2] to change value. A 16-byte
- aligned string will never fail the page align check, so will
- always take the fast path. */
- b.gt .Lpage_cross
-
-.Lpage_cross_ok:
- ldp data1, data2, [srcin]
-#ifdef __AARCH64EB__
- /* Because we expect the end to be found within 16 characters
- (profiling shows this is the most common case), it's worth
- swapping the bytes now to save having to recalculate the
- termination syndrome later. We preserve data1 and data2
- so that we can re-use the values later on. */
- rev tmp2, data1
- sub tmp1, tmp2, zeroones
- orr tmp2, tmp2, #REP8_7f
- bics has_nul1, tmp1, tmp2
- b.ne .Lfp_le8
- rev tmp4, data2
- sub tmp3, tmp4, zeroones
- orr tmp4, tmp4, #REP8_7f
-#else
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- bics has_nul1, tmp1, tmp2
- b.ne .Lfp_le8
- sub tmp3, data2, zeroones
- orr tmp4, data2, #REP8_7f
-#endif
- bics has_nul2, tmp3, tmp4
- b.eq .Lbulk_entry
-
- /* The string is short (<=16 bytes). We don't know exactly how
- short though, yet. Work out the exact length so that we can
- quickly select the optimal copy strategy. */
-.Lfp_gt8:
- rev has_nul2, has_nul2
- clz pos, has_nul2
- mov tmp2, #56
- add dst, dstin, pos, lsr #3 /* Bits to bytes. */
- sub pos, tmp2, pos
-#ifdef __AARCH64EB__
- lsr data2, data2, pos
-#else
- lsl data2, data2, pos
-#endif
- str data2, [dst, #1]
- str data1, [dstin]
-#ifdef BUILD_STPCPY
- add dstin, dst, #8
-#endif
- ret
-
-.Lfp_le8:
- rev has_nul1, has_nul1
- clz pos, has_nul1
- add dst, dstin, pos, lsr #3 /* Bits to bytes. */
- subs tmp2, pos, #24 /* Pos in bits. */
- b.lt .Lfp_lt4
-#ifdef __AARCH64EB__
- mov tmp2, #56
- sub pos, tmp2, pos
- lsr data2, data1, pos
- lsr data1, data1, #32
-#else
- lsr data2, data1, tmp2
-#endif
- /* 4->7 bytes to copy. */
- str data2w, [dst, #-3]
- str data1w, [dstin]
-#ifdef BUILD_STPCPY
- mov dstin, dst
-#endif
- ret
-.Lfp_lt4:
- cbz pos, .Lfp_lt2
- /* 2->3 bytes to copy. */
-#ifdef __AARCH64EB__
- lsr data1, data1, #48
-#endif
- strh data1w, [dstin]
- /* Fall-through, one byte (max) to go. */
-.Lfp_lt2:
- /* Null-terminated string. Last character must be zero! */
- strb wzr, [dst]
-#ifdef BUILD_STPCPY
- mov dstin, dst
-#endif
- ret
-
- .p2align 6
- /* Aligning here ensures that the entry code and main loop all lies
- within one 64-byte cache line. */
-.Lbulk_entry:
- sub to_align, to_align, #16
- stp data1, data2, [dstin]
- sub src, srcin, to_align
- sub dst, dstin, to_align
- b .Lentry_no_page_cross
-
- /* The inner loop deals with two Dwords at a time. This has a
- slightly higher start-up cost, but we should win quite quickly,
- especially on cores with a high number of issue slots per
- cycle, as we get much better parallelism out of the operations. */
-.Lmain_loop:
- stp data1, data2, [dst], #16
-.Lentry_no_page_cross:
- ldp data1, data2, [src], #16
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- sub tmp3, data2, zeroones
- orr tmp4, data2, #REP8_7f
- bic has_nul1, tmp1, tmp2
- bics has_nul2, tmp3, tmp4
- ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */
- b.eq .Lmain_loop
-
- /* Since we know we are copying at least 16 bytes, the fastest way
- to deal with the tail is to determine the location of the
- trailing NUL, then (re)copy the 16 bytes leading up to that. */
- cmp has_nul1, #0
-#ifdef __AARCH64EB__
- /* For big-endian, carry propagation (if the final byte in the
- string is 0x01) means we cannot use has_nul directly. The
- easiest way to get the correct byte is to byte-swap the data
- and calculate the syndrome a second time. */
- csel data1, data1, data2, ne
- rev data1, data1
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- bic has_nul1, tmp1, tmp2
-#else
- csel has_nul1, has_nul1, has_nul2, ne
-#endif
- rev has_nul1, has_nul1
- clz pos, has_nul1
- add tmp1, pos, #72
- add pos, pos, #8
- csel pos, pos, tmp1, ne
- add src, src, pos, lsr #3
- add dst, dst, pos, lsr #3
- ldp data1, data2, [src, #-32]
- stp data1, data2, [dst, #-16]
-#ifdef BUILD_STPCPY
- sub dstin, dst, #1
-#endif
- ret
-
-.Lpage_cross:
- bic src, srcin, #15
- /* Start by loading two words at [srcin & ~15], then forcing the
- bytes that precede srcin to 0xff. This means they never look
- like termination bytes. */
- ldp data1, data2, [src]
- lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
- tst to_align, #7
- csetm tmp2, ne
-#ifdef __AARCH64EB__
- lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
-#else
- lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
-#endif
- orr data1, data1, tmp2
- orr data2a, data2, tmp2
- cmp to_align, #8
- csinv data1, data1, xzr, lt
- csel data2, data2, data2a, lt
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- sub tmp3, data2, zeroones
- orr tmp4, data2, #REP8_7f
- bic has_nul1, tmp1, tmp2
- bics has_nul2, tmp3, tmp4
- ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */
- b.eq .Lpage_cross_ok
- /* We now need to make data1 and data2 look like they've been
- loaded directly from srcin. Do a rotate on the 128-bit value. */
- lsl tmp1, to_align, #3 /* Bytes->bits. */
- neg tmp2, to_align, lsl #3
-#ifdef __AARCH64EB__
- lsl data1a, data1, tmp1
- lsr tmp4, data2, tmp2
- lsl data2, data2, tmp1
- orr tmp4, tmp4, data1a
- cmp to_align, #8
- csel data1, tmp4, data2, lt
- rev tmp2, data1
- rev tmp4, data2
- sub tmp1, tmp2, zeroones
- orr tmp2, tmp2, #REP8_7f
- sub tmp3, tmp4, zeroones
- orr tmp4, tmp4, #REP8_7f
-#else
- lsr data1a, data1, tmp1
- lsl tmp4, data2, tmp2
- lsr data2, data2, tmp1
- orr tmp4, tmp4, data1a
- cmp to_align, #8
- csel data1, tmp4, data2, lt
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- sub tmp3, data2, zeroones
- orr tmp4, data2, #REP8_7f
-#endif
- bic has_nul1, tmp1, tmp2
- cbnz has_nul1, .Lfp_le8
- bic has_nul2, tmp3, tmp4
- b .Lfp_gt8
-
- .size STRCPY, . - STRCPY
diff --git a/contrib/cortex-strings/src/aarch64/strlen.S b/contrib/cortex-strings/src/aarch64/strlen.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/aarch64/strlen.S
+++ /dev/null
@@ -1,233 +0,0 @@
-/* Copyright (c) 2013-2015, Linaro Limited
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the Linaro nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64, unaligned accesses, min page size 4k.
- */
-
-/* To test the page crossing code path more thoroughly, compile with
- -DTEST_PAGE_CROSS - this will force all calls through the slower
- entry path. This option is not intended for production use. */
-
-/* Arguments and results. */
-#define srcin x0
-#define len x0
-
-/* Locals and temporaries. */
-#define src x1
-#define data1 x2
-#define data2 x3
-#define has_nul1 x4
-#define has_nul2 x5
-#define tmp1 x4
-#define tmp2 x5
-#define tmp3 x6
-#define tmp4 x7
-#define zeroones x8
-
-#define L(l) .L ## l
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
- /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
- (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
- can be done in parallel across the entire word. A faster check
- (X - 1) & 0x80 is zero for non-NUL ASCII characters, but gives
- false hits for characters 129..255. */
-
-#define REP8_01 0x0101010101010101
-#define REP8_7f 0x7f7f7f7f7f7f7f7f
-#define REP8_80 0x8080808080808080
-
-#ifdef TEST_PAGE_CROSS
-# define MIN_PAGE_SIZE 15
-#else
-# define MIN_PAGE_SIZE 4096
-#endif
-
- /* Since strings are short on average, we check the first 16 bytes
- of the string for a NUL character. In order to do an unaligned ldp
- safely we have to do a page cross check first. If there is a NUL
- byte we calculate the length from the 2 8-byte words using
- conditional select to reduce branch mispredictions (it is unlikely
- strlen will be repeatedly called on strings with the same length).
-
- If the string is longer than 16 bytes, we align src so don't need
- further page cross checks, and process 32 bytes per iteration
- using the fast NUL check. If we encounter non-ASCII characters,
- fallback to a second loop using the full NUL check.
-
- If the page cross check fails, we read 16 bytes from an aligned
- address, remove any characters before the string, and continue
- in the main loop using aligned loads. Since strings crossing a
- page in the first 16 bytes are rare (probability of
- 16/MIN_PAGE_SIZE ~= 0.4%), this case does not need to be optimized.
-
- AArch64 systems have a minimum page size of 4k. We don't bother
- checking for larger page sizes - the cost of setting up the correct
- page size is just not worth the extra gain from a small reduction in
- the cases taking the slow path. Note that we only care about
- whether the first fetch, which may be misaligned, crosses a page
- boundary. */
-
-def_fn strlen p2align=6
- and tmp1, srcin, MIN_PAGE_SIZE - 1
- mov zeroones, REP8_01
- cmp tmp1, MIN_PAGE_SIZE - 16
- b.gt L(page_cross)
- ldp data1, data2, [srcin]
-#ifdef __AARCH64EB__
- /* For big-endian, carry propagation (if the final byte in the
- string is 0x01) means we cannot use has_nul1/2 directly.
- Since we expect strings to be small and early-exit,
- byte-swap the data now so has_null1/2 will be correct. */
- rev data1, data1
- rev data2, data2
-#endif
- sub tmp1, data1, zeroones
- orr tmp2, data1, REP8_7f
- sub tmp3, data2, zeroones
- orr tmp4, data2, REP8_7f
- bics has_nul1, tmp1, tmp2
- bic has_nul2, tmp3, tmp4
- ccmp has_nul2, 0, 0, eq
- beq L(main_loop_entry)
-
- /* Enter with C = has_nul1 == 0. */
- csel has_nul1, has_nul1, has_nul2, cc
- mov len, 8
- rev has_nul1, has_nul1
- clz tmp1, has_nul1
- csel len, xzr, len, cc
- add len, len, tmp1, lsr 3
- ret
-
- /* The inner loop processes 32 bytes per iteration and uses the fast
- NUL check. If we encounter non-ASCII characters, use a second
- loop with the accurate NUL check. */
- .p2align 4
-L(main_loop_entry):
- bic src, srcin, 15
- sub src, src, 16
-L(main_loop):
- ldp data1, data2, [src, 32]!
-.Lpage_cross_entry:
- sub tmp1, data1, zeroones
- sub tmp3, data2, zeroones
- orr tmp2, tmp1, tmp3
- tst tmp2, zeroones, lsl 7
- bne 1f
- ldp data1, data2, [src, 16]
- sub tmp1, data1, zeroones
- sub tmp3, data2, zeroones
- orr tmp2, tmp1, tmp3
- tst tmp2, zeroones, lsl 7
- beq L(main_loop)
- add src, src, 16
-1:
- /* The fast check failed, so do the slower, accurate NUL check. */
- orr tmp2, data1, REP8_7f
- orr tmp4, data2, REP8_7f
- bics has_nul1, tmp1, tmp2
- bic has_nul2, tmp3, tmp4
- ccmp has_nul2, 0, 0, eq
- beq L(nonascii_loop)
-
- /* Enter with C = has_nul1 == 0. */
-L(tail):
-#ifdef __AARCH64EB__
- /* For big-endian, carry propagation (if the final byte in the
- string is 0x01) means we cannot use has_nul1/2 directly. The
- easiest way to get the correct byte is to byte-swap the data
- and calculate the syndrome a second time. */
- csel data1, data1, data2, cc
- rev data1, data1
- sub tmp1, data1, zeroones
- orr tmp2, data1, REP8_7f
- bic has_nul1, tmp1, tmp2
-#else
- csel has_nul1, has_nul1, has_nul2, cc
-#endif
- sub len, src, srcin
- rev has_nul1, has_nul1
- add tmp2, len, 8
- clz tmp1, has_nul1
- csel len, len, tmp2, cc
- add len, len, tmp1, lsr 3
- ret
-
-L(nonascii_loop):
- ldp data1, data2, [src, 16]!
- sub tmp1, data1, zeroones
- orr tmp2, data1, REP8_7f
- sub tmp3, data2, zeroones
- orr tmp4, data2, REP8_7f
- bics has_nul1, tmp1, tmp2
- bic has_nul2, tmp3, tmp4
- ccmp has_nul2, 0, 0, eq
- bne L(tail)
- ldp data1, data2, [src, 16]!
- sub tmp1, data1, zeroones
- orr tmp2, data1, REP8_7f
- sub tmp3, data2, zeroones
- orr tmp4, data2, REP8_7f
- bics has_nul1, tmp1, tmp2
- bic has_nul2, tmp3, tmp4
- ccmp has_nul2, 0, 0, eq
- beq L(nonascii_loop)
- b L(tail)
-
- /* Load 16 bytes from [srcin & ~15] and force the bytes that precede
- srcin to 0x7f, so we ignore any NUL bytes before the string.
- Then continue in the aligned loop. */
-L(page_cross):
- bic src, srcin, 15
- ldp data1, data2, [src]
- lsl tmp1, srcin, 3
- mov tmp4, -1
-#ifdef __AARCH64EB__
- /* Big-endian. Early bytes are at MSB. */
- lsr tmp1, tmp4, tmp1 /* Shift (tmp1 & 63). */
-#else
- /* Little-endian. Early bytes are at LSB. */
- lsl tmp1, tmp4, tmp1 /* Shift (tmp1 & 63). */
-#endif
- orr tmp1, tmp1, REP8_80
- orn data1, data1, tmp1
- orn tmp2, data2, tmp1
- tst srcin, 8
- csel data1, data1, tmp4, eq
- csel data2, data2, tmp2, eq
- b L(page_cross_entry)
-
- .size strlen, . - strlen
diff --git a/contrib/cortex-strings/src/aarch64/strncmp.S b/contrib/cortex-strings/src/aarch64/strncmp.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/aarch64/strncmp.S
+++ /dev/null
@@ -1,222 +0,0 @@
-/* Copyright (c) 2013, Linaro Limited
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the Linaro nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64
- */
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-#define REP8_01 0x0101010101010101
-#define REP8_7f 0x7f7f7f7f7f7f7f7f
-#define REP8_80 0x8080808080808080
-
-/* Parameters and result. */
-#define src1 x0
-#define src2 x1
-#define limit x2
-#define result x0
-
-/* Internal variables. */
-#define data1 x3
-#define data1w w3
-#define data2 x4
-#define data2w w4
-#define has_nul x5
-#define diff x6
-#define syndrome x7
-#define tmp1 x8
-#define tmp2 x9
-#define tmp3 x10
-#define zeroones x11
-#define pos x12
-#define limit_wd x13
-#define mask x14
-#define endloop x15
-
- .text
- .p2align 6
- .rep 7
- nop /* Pad so that the loop below fits a cache line. */
- .endr
-def_fn strncmp
- cbz limit, .Lret0
- eor tmp1, src1, src2
- mov zeroones, #REP8_01
- tst tmp1, #7
- b.ne .Lmisaligned8
- ands tmp1, src1, #7
- b.ne .Lmutual_align
- /* Calculate the number of full and partial words -1. */
- sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
- lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */
-
- /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
- (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
- can be done in parallel across the entire word. */
- /* Start of performance-critical section -- one 64B cache line. */
-.Lloop_aligned:
- ldr data1, [src1], #8
- ldr data2, [src2], #8
-.Lstart_realigned:
- subs limit_wd, limit_wd, #1
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- eor diff, data1, data2 /* Non-zero if differences found. */
- csinv endloop, diff, xzr, pl /* Last Dword or differences. */
- bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
- ccmp endloop, #0, #0, eq
- b.eq .Lloop_aligned
- /* End of performance-critical section -- one 64B cache line. */
-
- /* Not reached the limit, must have found the end or a diff. */
- tbz limit_wd, #63, .Lnot_limit
-
- /* Limit % 8 == 0 => all bytes significant. */
- ands limit, limit, #7
- b.eq .Lnot_limit
-
- lsl limit, limit, #3 /* Bits -> bytes. */
- mov mask, #~0
-#ifdef __AARCH64EB__
- lsr mask, mask, limit
-#else
- lsl mask, mask, limit
-#endif
- bic data1, data1, mask
- bic data2, data2, mask
-
- /* Make sure that the NUL byte is marked in the syndrome. */
- orr has_nul, has_nul, mask
-
-.Lnot_limit:
- orr syndrome, diff, has_nul
-
-#ifndef __AARCH64EB__
- rev syndrome, syndrome
- rev data1, data1
- /* The MS-non-zero bit of the syndrome marks either the first bit
- that is different, or the top bit of the first zero byte.
- Shifting left now will bring the critical information into the
- top bits. */
- clz pos, syndrome
- rev data2, data2
- lsl data1, data1, pos
- lsl data2, data2, pos
- /* But we need to zero-extend (char is unsigned) the value and then
- perform a signed 32-bit subtraction. */
- lsr data1, data1, #56
- sub result, data1, data2, lsr #56
- ret
-#else
- /* For big-endian we cannot use the trick with the syndrome value
- as carry-propagation can corrupt the upper bits if the trailing
- bytes in the string contain 0x01. */
- /* However, if there is no NUL byte in the dword, we can generate
- the result directly. We can't just subtract the bytes as the
- MSB might be significant. */
- cbnz has_nul, 1f
- cmp data1, data2
- cset result, ne
- cneg result, result, lo
- ret
-1:
- /* Re-compute the NUL-byte detection, using a byte-reversed value. */
- rev tmp3, data1
- sub tmp1, tmp3, zeroones
- orr tmp2, tmp3, #REP8_7f
- bic has_nul, tmp1, tmp2
- rev has_nul, has_nul
- orr syndrome, diff, has_nul
- clz pos, syndrome
- /* The MS-non-zero bit of the syndrome marks either the first bit
- that is different, or the top bit of the first zero byte.
- Shifting left now will bring the critical information into the
- top bits. */
- lsl data1, data1, pos
- lsl data2, data2, pos
- /* But we need to zero-extend (char is unsigned) the value and then
- perform a signed 32-bit subtraction. */
- lsr data1, data1, #56
- sub result, data1, data2, lsr #56
- ret
-#endif
-
-.Lmutual_align:
- /* Sources are mutually aligned, but are not currently at an
- alignment boundary. Round down the addresses and then mask off
- the bytes that precede the start point.
- We also need to adjust the limit calculations, but without
- overflowing if the limit is near ULONG_MAX. */
- bic src1, src1, #7
- bic src2, src2, #7
- ldr data1, [src1], #8
- neg tmp3, tmp1, lsl #3 /* 64 - bits(bytes beyond align). */
- ldr data2, [src2], #8
- mov tmp2, #~0
- sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
-#ifdef __AARCH64EB__
- /* Big-endian. Early bytes are at MSB. */
- lsl tmp2, tmp2, tmp3 /* Shift (tmp1 & 63). */
-#else
- /* Little-endian. Early bytes are at LSB. */
- lsr tmp2, tmp2, tmp3 /* Shift (tmp1 & 63). */
-#endif
- and tmp3, limit_wd, #7
- lsr limit_wd, limit_wd, #3
- /* Adjust the limit. Only low 3 bits used, so overflow irrelevant. */
- add limit, limit, tmp1
- add tmp3, tmp3, tmp1
- orr data1, data1, tmp2
- orr data2, data2, tmp2
- add limit_wd, limit_wd, tmp3, lsr #3
- b .Lstart_realigned
-
-.Lret0:
- mov result, #0
- ret
-
- .p2align 6
-.Lmisaligned8:
- sub limit, limit, #1
-1:
- /* Perhaps we can do better than this. */
- ldrb data1w, [src1], #1
- ldrb data2w, [src2], #1
- subs limit, limit, #1
- ccmp data1w, #1, #0, cs /* NZCV = 0b0000. */
- ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
- b.eq 1b
- sub result, data1, data2
- ret
- .size strncmp, . - strncmp
diff --git a/contrib/cortex-strings/src/aarch64/strnlen.S b/contrib/cortex-strings/src/aarch64/strnlen.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/aarch64/strnlen.S
+++ /dev/null
@@ -1,181 +0,0 @@
-/* strnlen - calculate the length of a string with limit.
-
- Copyright (c) 2013, Linaro Limited
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the Linaro nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64
- */
-
-/* Arguments and results. */
-#define srcin x0
-#define len x0
-#define limit x1
-
-/* Locals and temporaries. */
-#define src x2
-#define data1 x3
-#define data2 x4
-#define data2a x5
-#define has_nul1 x6
-#define has_nul2 x7
-#define tmp1 x8
-#define tmp2 x9
-#define tmp3 x10
-#define tmp4 x11
-#define zeroones x12
-#define pos x13
-#define limit_wd x14
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-#define REP8_01 0x0101010101010101
-#define REP8_7f 0x7f7f7f7f7f7f7f7f
-#define REP8_80 0x8080808080808080
-
- .text
- .p2align 6
-.Lstart:
- /* Pre-pad to ensure critical loop begins an icache line. */
- .rep 7
- nop
- .endr
- /* Put this code here to avoid wasting more space with pre-padding. */
-.Lhit_limit:
- mov len, limit
- ret
-
-def_fn strnlen
- cbz limit, .Lhit_limit
- mov zeroones, #REP8_01
- bic src, srcin, #15
- ands tmp1, srcin, #15
- b.ne .Lmisaligned
- /* Calculate the number of full and partial words -1. */
- sub limit_wd, limit, #1 /* Limit != 0, so no underflow. */
- lsr limit_wd, limit_wd, #4 /* Convert to Qwords. */
-
- /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
- (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
- can be done in parallel across the entire word. */
- /* The inner loop deals with two Dwords at a time. This has a
- slightly higher start-up cost, but we should win quite quickly,
- especially on cores with a high number of issue slots per
- cycle, as we get much better parallelism out of the operations. */
-
- /* Start of critial section -- keep to one 64Byte cache line. */
-.Lloop:
- ldp data1, data2, [src], #16
-.Lrealigned:
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- sub tmp3, data2, zeroones
- orr tmp4, data2, #REP8_7f
- bic has_nul1, tmp1, tmp2
- bic has_nul2, tmp3, tmp4
- subs limit_wd, limit_wd, #1
- orr tmp1, has_nul1, has_nul2
- ccmp tmp1, #0, #0, pl /* NZCV = 0000 */
- b.eq .Lloop
- /* End of critical section -- keep to one 64Byte cache line. */
-
- orr tmp1, has_nul1, has_nul2
- cbz tmp1, .Lhit_limit /* No null in final Qword. */
-
- /* We know there's a null in the final Qword. The easiest thing
- to do now is work out the length of the string and return
- MIN (len, limit). */
-
- sub len, src, srcin
- cbz has_nul1, .Lnul_in_data2
-#ifdef __AARCH64EB__
- mov data2, data1
-#endif
- sub len, len, #8
- mov has_nul2, has_nul1
-.Lnul_in_data2:
-#ifdef __AARCH64EB__
- /* For big-endian, carry propagation (if the final byte in the
- string is 0x01) means we cannot use has_nul directly. The
- easiest way to get the correct byte is to byte-swap the data
- and calculate the syndrome a second time. */
- rev data2, data2
- sub tmp1, data2, zeroones
- orr tmp2, data2, #REP8_7f
- bic has_nul2, tmp1, tmp2
-#endif
- sub len, len, #8
- rev has_nul2, has_nul2
- clz pos, has_nul2
- add len, len, pos, lsr #3 /* Bits to bytes. */
- cmp len, limit
- csel len, len, limit, ls /* Return the lower value. */
- ret
-
-.Lmisaligned:
- /* Deal with a partial first word.
- We're doing two things in parallel here;
- 1) Calculate the number of words (but avoiding overflow if
- limit is near ULONG_MAX) - to do this we need to work out
- limit + tmp1 - 1 as a 65-bit value before shifting it;
- 2) Load and mask the initial data words - we force the bytes
- before the ones we are interested in to 0xff - this ensures
- early bytes will not hit any zero detection. */
- sub limit_wd, limit, #1
- neg tmp4, tmp1
- cmp tmp1, #8
-
- and tmp3, limit_wd, #15
- lsr limit_wd, limit_wd, #4
- mov tmp2, #~0
-
- ldp data1, data2, [src], #16
- lsl tmp4, tmp4, #3 /* Bytes beyond alignment -> bits. */
- add tmp3, tmp3, tmp1
-
-#ifdef __AARCH64EB__
- /* Big-endian. Early bytes are at MSB. */
- lsl tmp2, tmp2, tmp4 /* Shift (tmp1 & 63). */
-#else
- /* Little-endian. Early bytes are at LSB. */
- lsr tmp2, tmp2, tmp4 /* Shift (tmp1 & 63). */
-#endif
- add limit_wd, limit_wd, tmp3, lsr #4
-
- orr data1, data1, tmp2
- orr data2a, data2, tmp2
-
- csinv data1, data1, xzr, le
- csel data2, data2, data2a, le
- b .Lrealigned
- .size strnlen, . - .Lstart /* Include pre-padding in size. */
diff --git a/contrib/cortex-strings/src/arm/memchr.S b/contrib/cortex-strings/src/arm/memchr.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/arm/memchr.S
+++ /dev/null
@@ -1,155 +0,0 @@
-/* Copyright (c) 2010-2011, Linaro Limited
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
- * Neither the name of Linaro Limited nor the names of its
- contributors may be used to endorse or promote products derived
- from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- Written by Dave Gilbert <david.gilbert@linaro.org>
-
- This memchr routine is optimised on a Cortex-A9 and should work on
- all ARMv7 processors. It has a fast past for short sizes, and has
- an optimised path for large data sets; the worst case is finding the
- match early in a large data set.
-
- */
-
-@ 2011-02-07 david.gilbert@linaro.org
-@ Extracted from local git a5b438d861
-@ 2011-07-14 david.gilbert@linaro.org
-@ Import endianness fix from local git ea786f1b
-@ 2011-12-07 david.gilbert@linaro.org
-@ Removed unneeded cbz from align loop
-
- .syntax unified
- .arch armv7-a
-
-@ this lets us check a flag in a 00/ff byte easily in either endianness
-#ifdef __ARMEB__
-#define CHARTSTMASK(c) 1<<(31-(c*8))
-#else
-#define CHARTSTMASK(c) 1<<(c*8)
-#endif
- .text
- .thumb
-
-@ ---------------------------------------------------------------------------
- .thumb_func
- .align 2
- .p2align 4,,15
- .global memchr
- .type memchr,%function
-memchr:
- @ r0 = start of memory to scan
- @ r1 = character to look for
- @ r2 = length
- @ returns r0 = pointer to character or NULL if not found
- and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char
-
- cmp r2,#16 @ If it's short don't bother with anything clever
- blt 20f
-
- tst r0, #7 @ If it's already aligned skip the next bit
- beq 10f
-
- @ Work up to an aligned point
-5:
- ldrb r3, [r0],#1
- subs r2, r2, #1
- cmp r3, r1
- beq 50f @ If it matches exit found
- tst r0, #7
- bne 5b @ If not aligned yet then do next byte
-
-10:
- @ At this point, we are aligned, we know we have at least 8 bytes to work with
- push {r4,r5,r6,r7}
- orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes
- orr r1, r1, r1, lsl #16
- bic r4, r2, #7 @ Number of double words to work with
- mvns r7, #0 @ all F's
- movs r3, #0
-
-15:
- ldmia r0!,{r5,r6}
- subs r4, r4, #8
- eor r5,r5, r1 @ Get it so that r5,r6 have 00's where the bytes match the target
- eor r6,r6, r1
- uadd8 r5, r5, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0
- sel r5, r3, r7 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
- uadd8 r6, r6, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0
- sel r6, r5, r7 @ chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
- cbnz r6, 60f
- bne 15b @ (Flags from the subs above) If not run out of bytes then go around again
-
- pop {r4,r5,r6,r7}
- and r1,r1,#0xff @ Get r1 back to a single character from the expansion above
- and r2,r2,#7 @ Leave the count remaining as the number after the double words have been done
-
-20:
- cbz r2, 40f @ 0 length or hit the end already then not found
-
-21: @ Post aligned section, or just a short call
- ldrb r3,[r0],#1
- subs r2,r2,#1
- eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub
- cbz r3, 50f
- bne 21b @ on r2 flags
-
-40:
- movs r0,#0 @ not found
- bx lr
-
-50:
- subs r0,r0,#1 @ found
- bx lr
-
-60: @ We're here because the fast path found a hit - now we have to track down exactly which word it was
- @ r0 points to the start of the double word after the one that was tested
- @ r5 has the 00/ff pattern for the first word, r6 has the chained value
- cmp r5, #0
- itte eq
- moveq r5, r6 @ the end is in the 2nd word
- subeq r0,r0,#3 @ Points to 2nd byte of 2nd word
- subne r0,r0,#7 @ or 2nd byte of 1st word
-
- @ r0 currently points to the 3rd byte of the word containing the hit
- tst r5, # CHARTSTMASK(0) @ 1st character
- bne 61f
- adds r0,r0,#1
- tst r5, # CHARTSTMASK(1) @ 2nd character
- ittt eq
- addeq r0,r0,#1
- tsteq r5, # (3<<15) @ 2nd & 3rd character
- @ If not the 3rd must be the last one
- addeq r0,r0,#1
-
-61:
- pop {r4,r5,r6,r7}
- subs r0,r0,#1
- bx lr
diff --git a/contrib/cortex-strings/src/arm/memcpy.S b/contrib/cortex-strings/src/arm/memcpy.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/arm/memcpy.S
+++ /dev/null
@@ -1,617 +0,0 @@
-/* Copyright (c) 2013, Linaro Limited
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
- * Neither the name of Linaro Limited nor the names of its
- contributors may be used to endorse or promote products derived
- from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- This memcpy routine is optimised for Cortex-A15 cores and takes advantage
- of VFP or NEON when built with the appropriate flags.
-
- Assumptions:
-
- ARMv6 (ARMv7-a if using Neon)
- ARM state
- Unaligned accesses
-
- */
-
- .syntax unified
- /* This implementation requires ARM state. */
- .arm
-
-#ifdef __ARM_NEON__
-
- .fpu neon
- .arch armv7-a
-# define FRAME_SIZE 4
-# define USE_VFP
-# define USE_NEON
-
-#elif !defined (__SOFTFP__)
-
- .arch armv6
- .fpu vfpv2
-# define FRAME_SIZE 32
-# define USE_VFP
-
-#else
- .arch armv6
-# define FRAME_SIZE 32
-
-#endif
-
-/* Old versions of GAS incorrectly implement the NEON align semantics. */
-#ifdef BROKEN_ASM_NEON_ALIGN
-#define ALIGN(addr, align) addr,:align
-#else
-#define ALIGN(addr, align) addr:align
-#endif
-
-#define PC_OFFSET 8 /* PC pipeline compensation. */
-#define INSN_SIZE 4
-
-/* Call parameters. */
-#define dstin r0
-#define src r1
-#define count r2
-
-/* Locals. */
-#define tmp1 r3
-#define dst ip
-#define tmp2 r10
-
-#ifndef USE_NEON
-/* For bulk copies using GP registers. */
-#define A_l r2 /* Call-clobbered. */
-#define A_h r3 /* Call-clobbered. */
-#define B_l r4
-#define B_h r5
-#define C_l r6
-#define C_h r7
-#define D_l r8
-#define D_h r9
-#endif
-
-/* Number of lines ahead to pre-fetch data. If you change this the code
- below will need adjustment to compensate. */
-
-#define prefetch_lines 5
-
-#ifdef USE_VFP
- .macro cpy_line_vfp vreg, base
- vstr \vreg, [dst, #\base]
- vldr \vreg, [src, #\base]
- vstr d0, [dst, #\base + 8]
- vldr d0, [src, #\base + 8]
- vstr d1, [dst, #\base + 16]
- vldr d1, [src, #\base + 16]
- vstr d2, [dst, #\base + 24]
- vldr d2, [src, #\base + 24]
- vstr \vreg, [dst, #\base + 32]
- vldr \vreg, [src, #\base + prefetch_lines * 64 - 32]
- vstr d0, [dst, #\base + 40]
- vldr d0, [src, #\base + 40]
- vstr d1, [dst, #\base + 48]
- vldr d1, [src, #\base + 48]
- vstr d2, [dst, #\base + 56]
- vldr d2, [src, #\base + 56]
- .endm
-
- .macro cpy_tail_vfp vreg, base
- vstr \vreg, [dst, #\base]
- vldr \vreg, [src, #\base]
- vstr d0, [dst, #\base + 8]
- vldr d0, [src, #\base + 8]
- vstr d1, [dst, #\base + 16]
- vldr d1, [src, #\base + 16]
- vstr d2, [dst, #\base + 24]
- vldr d2, [src, #\base + 24]
- vstr \vreg, [dst, #\base + 32]
- vstr d0, [dst, #\base + 40]
- vldr d0, [src, #\base + 40]
- vstr d1, [dst, #\base + 48]
- vldr d1, [src, #\base + 48]
- vstr d2, [dst, #\base + 56]
- vldr d2, [src, #\base + 56]
- .endm
-#endif
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-def_fn memcpy p2align=6
-
- mov dst, dstin /* Preserve dstin, we need to return it. */
- cmp count, #64
- bge .Lcpy_not_short
- /* Deal with small copies quickly by dropping straight into the
- exit block. */
-
-.Ltail63unaligned:
-#ifdef USE_NEON
- and tmp1, count, #0x38
- rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE)
- add pc, pc, tmp1
- vld1.8 {d0}, [src]! /* 14 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 12 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 10 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 8 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 6 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 4 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 2 words to go. */
- vst1.8 {d0}, [dst]!
-
- tst count, #4
- ldrne tmp1, [src], #4
- strne tmp1, [dst], #4
-#else
- /* Copy up to 15 full words of data. May not be aligned. */
- /* Cannot use VFP for unaligned data. */
- and tmp1, count, #0x3c
- add dst, dst, tmp1
- add src, src, tmp1
- rsb tmp1, tmp1, #(60 - PC_OFFSET/2 + INSN_SIZE/2)
- /* Jump directly into the sequence below at the correct offset. */
- add pc, pc, tmp1, lsl #1
-
- ldr tmp1, [src, #-60] /* 15 words to go. */
- str tmp1, [dst, #-60]
-
- ldr tmp1, [src, #-56] /* 14 words to go. */
- str tmp1, [dst, #-56]
- ldr tmp1, [src, #-52]
- str tmp1, [dst, #-52]
-
- ldr tmp1, [src, #-48] /* 12 words to go. */
- str tmp1, [dst, #-48]
- ldr tmp1, [src, #-44]
- str tmp1, [dst, #-44]
-
- ldr tmp1, [src, #-40] /* 10 words to go. */
- str tmp1, [dst, #-40]
- ldr tmp1, [src, #-36]
- str tmp1, [dst, #-36]
-
- ldr tmp1, [src, #-32] /* 8 words to go. */
- str tmp1, [dst, #-32]
- ldr tmp1, [src, #-28]
- str tmp1, [dst, #-28]
-
- ldr tmp1, [src, #-24] /* 6 words to go. */
- str tmp1, [dst, #-24]
- ldr tmp1, [src, #-20]
- str tmp1, [dst, #-20]
-
- ldr tmp1, [src, #-16] /* 4 words to go. */
- str tmp1, [dst, #-16]
- ldr tmp1, [src, #-12]
- str tmp1, [dst, #-12]
-
- ldr tmp1, [src, #-8] /* 2 words to go. */
- str tmp1, [dst, #-8]
- ldr tmp1, [src, #-4]
- str tmp1, [dst, #-4]
-#endif
-
- lsls count, count, #31
- ldrhcs tmp1, [src], #2
- ldrbne src, [src] /* Src is dead, use as a scratch. */
- strhcs tmp1, [dst], #2
- strbne src, [dst]
- bx lr
-
-.Lcpy_not_short:
- /* At least 64 bytes to copy, but don't know the alignment yet. */
- str tmp2, [sp, #-FRAME_SIZE]!
- and tmp2, src, #7
- and tmp1, dst, #7
- cmp tmp1, tmp2
- bne .Lcpy_notaligned
-
-#ifdef USE_VFP
- /* Magic dust alert! Force VFP on Cortex-A9. Experiments show
- that the FP pipeline is much better at streaming loads and
- stores. This is outside the critical loop. */
- vmov.f32 s0, s0
-#endif
-
- /* SRC and DST have the same mutual 64-bit alignment, but we may
- still need to pre-copy some bytes to get to natural alignment.
- We bring SRC and DST into full 64-bit alignment. */
- lsls tmp2, dst, #29
- beq 1f
- rsbs tmp2, tmp2, #0
- sub count, count, tmp2, lsr #29
- ldrmi tmp1, [src], #4
- strmi tmp1, [dst], #4
- lsls tmp2, tmp2, #2
- ldrhcs tmp1, [src], #2
- ldrbne tmp2, [src], #1
- strhcs tmp1, [dst], #2
- strbne tmp2, [dst], #1
-
-1:
- subs tmp2, count, #64 /* Use tmp2 for count. */
- blt .Ltail63aligned
-
- cmp tmp2, #512
- bge .Lcpy_body_long
-
-.Lcpy_body_medium: /* Count in tmp2. */
-#ifdef USE_VFP
-1:
- vldr d0, [src, #0]
- subs tmp2, tmp2, #64
- vldr d1, [src, #8]
- vstr d0, [dst, #0]
- vldr d0, [src, #16]
- vstr d1, [dst, #8]
- vldr d1, [src, #24]
- vstr d0, [dst, #16]
- vldr d0, [src, #32]
- vstr d1, [dst, #24]
- vldr d1, [src, #40]
- vstr d0, [dst, #32]
- vldr d0, [src, #48]
- vstr d1, [dst, #40]
- vldr d1, [src, #56]
- vstr d0, [dst, #48]
- add src, src, #64
- vstr d1, [dst, #56]
- add dst, dst, #64
- bge 1b
- tst tmp2, #0x3f
- beq .Ldone
-
-.Ltail63aligned: /* Count in tmp2. */
- and tmp1, tmp2, #0x38
- add dst, dst, tmp1
- add src, src, tmp1
- rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE)
- add pc, pc, tmp1
-
- vldr d0, [src, #-56] /* 14 words to go. */
- vstr d0, [dst, #-56]
- vldr d0, [src, #-48] /* 12 words to go. */
- vstr d0, [dst, #-48]
- vldr d0, [src, #-40] /* 10 words to go. */
- vstr d0, [dst, #-40]
- vldr d0, [src, #-32] /* 8 words to go. */
- vstr d0, [dst, #-32]
- vldr d0, [src, #-24] /* 6 words to go. */
- vstr d0, [dst, #-24]
- vldr d0, [src, #-16] /* 4 words to go. */
- vstr d0, [dst, #-16]
- vldr d0, [src, #-8] /* 2 words to go. */
- vstr d0, [dst, #-8]
-#else
- sub src, src, #8
- sub dst, dst, #8
-1:
- ldrd A_l, A_h, [src, #8]
- strd A_l, A_h, [dst, #8]
- ldrd A_l, A_h, [src, #16]
- strd A_l, A_h, [dst, #16]
- ldrd A_l, A_h, [src, #24]
- strd A_l, A_h, [dst, #24]
- ldrd A_l, A_h, [src, #32]
- strd A_l, A_h, [dst, #32]
- ldrd A_l, A_h, [src, #40]
- strd A_l, A_h, [dst, #40]
- ldrd A_l, A_h, [src, #48]
- strd A_l, A_h, [dst, #48]
- ldrd A_l, A_h, [src, #56]
- strd A_l, A_h, [dst, #56]
- ldrd A_l, A_h, [src, #64]!
- strd A_l, A_h, [dst, #64]!
- subs tmp2, tmp2, #64
- bge 1b
- tst tmp2, #0x3f
- bne 1f
- ldr tmp2,[sp], #FRAME_SIZE
- bx lr
-1:
- add src, src, #8
- add dst, dst, #8
-
-.Ltail63aligned: /* Count in tmp2. */
- /* Copy up to 7 d-words of data. Similar to Ltail63unaligned, but
- we know that the src and dest are 64-bit aligned so we can use
- LDRD/STRD to improve efficiency. */
- /* TMP2 is now negative, but we don't care about that. The bottom
- six bits still tell us how many bytes are left to copy. */
-
- and tmp1, tmp2, #0x38
- add dst, dst, tmp1
- add src, src, tmp1
- rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE)
- add pc, pc, tmp1
- ldrd A_l, A_h, [src, #-56] /* 14 words to go. */
- strd A_l, A_h, [dst, #-56]
- ldrd A_l, A_h, [src, #-48] /* 12 words to go. */
- strd A_l, A_h, [dst, #-48]
- ldrd A_l, A_h, [src, #-40] /* 10 words to go. */
- strd A_l, A_h, [dst, #-40]
- ldrd A_l, A_h, [src, #-32] /* 8 words to go. */
- strd A_l, A_h, [dst, #-32]
- ldrd A_l, A_h, [src, #-24] /* 6 words to go. */
- strd A_l, A_h, [dst, #-24]
- ldrd A_l, A_h, [src, #-16] /* 4 words to go. */
- strd A_l, A_h, [dst, #-16]
- ldrd A_l, A_h, [src, #-8] /* 2 words to go. */
- strd A_l, A_h, [dst, #-8]
-
-#endif
- tst tmp2, #4
- ldrne tmp1, [src], #4
- strne tmp1, [dst], #4
- lsls tmp2, tmp2, #31 /* Count (tmp2) now dead. */
- ldrhcs tmp1, [src], #2
- ldrbne tmp2, [src]
- strhcs tmp1, [dst], #2
- strbne tmp2, [dst]
-
-.Ldone:
- ldr tmp2, [sp], #FRAME_SIZE
- bx lr
-
-.Lcpy_body_long: /* Count in tmp2. */
-
- /* Long copy. We know that there's at least (prefetch_lines * 64)
- bytes to go. */
-#ifdef USE_VFP
- /* Don't use PLD. Instead, read some data in advance of the current
- copy position into a register. This should act like a PLD
- operation but we won't have to repeat the transfer. */
-
- vldr d3, [src, #0]
- vldr d4, [src, #64]
- vldr d5, [src, #128]
- vldr d6, [src, #192]
- vldr d7, [src, #256]
-
- vldr d0, [src, #8]
- vldr d1, [src, #16]
- vldr d2, [src, #24]
- add src, src, #32
-
- subs tmp2, tmp2, #prefetch_lines * 64 * 2
- blt 2f
-1:
- cpy_line_vfp d3, 0
- cpy_line_vfp d4, 64
- cpy_line_vfp d5, 128
- add dst, dst, #3 * 64
- add src, src, #3 * 64
- cpy_line_vfp d6, 0
- cpy_line_vfp d7, 64
- add dst, dst, #2 * 64
- add src, src, #2 * 64
- subs tmp2, tmp2, #prefetch_lines * 64
- bge 1b
-
-2:
- cpy_tail_vfp d3, 0
- cpy_tail_vfp d4, 64
- cpy_tail_vfp d5, 128
- add src, src, #3 * 64
- add dst, dst, #3 * 64
- cpy_tail_vfp d6, 0
- vstr d7, [dst, #64]
- vldr d7, [src, #64]
- vstr d0, [dst, #64 + 8]
- vldr d0, [src, #64 + 8]
- vstr d1, [dst, #64 + 16]
- vldr d1, [src, #64 + 16]
- vstr d2, [dst, #64 + 24]
- vldr d2, [src, #64 + 24]
- vstr d7, [dst, #64 + 32]
- add src, src, #96
- vstr d0, [dst, #64 + 40]
- vstr d1, [dst, #64 + 48]
- vstr d2, [dst, #64 + 56]
- add dst, dst, #128
- add tmp2, tmp2, #prefetch_lines * 64
- b .Lcpy_body_medium
-#else
- /* Long copy. Use an SMS style loop to maximize the I/O
- bandwidth of the core. We don't have enough spare registers
- to synthesise prefetching, so use PLD operations. */
- /* Pre-bias src and dst. */
- sub src, src, #8
- sub dst, dst, #8
- pld [src, #8]
- pld [src, #72]
- subs tmp2, tmp2, #64
- pld [src, #136]
- ldrd A_l, A_h, [src, #8]
- strd B_l, B_h, [sp, #8]
- ldrd B_l, B_h, [src, #16]
- strd C_l, C_h, [sp, #16]
- ldrd C_l, C_h, [src, #24]
- strd D_l, D_h, [sp, #24]
- pld [src, #200]
- ldrd D_l, D_h, [src, #32]!
- b 1f
- .p2align 6
-2:
- pld [src, #232]
- strd A_l, A_h, [dst, #40]
- ldrd A_l, A_h, [src, #40]
- strd B_l, B_h, [dst, #48]
- ldrd B_l, B_h, [src, #48]
- strd C_l, C_h, [dst, #56]
- ldrd C_l, C_h, [src, #56]
- strd D_l, D_h, [dst, #64]!
- ldrd D_l, D_h, [src, #64]!
- subs tmp2, tmp2, #64
-1:
- strd A_l, A_h, [dst, #8]
- ldrd A_l, A_h, [src, #8]
- strd B_l, B_h, [dst, #16]
- ldrd B_l, B_h, [src, #16]
- strd C_l, C_h, [dst, #24]
- ldrd C_l, C_h, [src, #24]
- strd D_l, D_h, [dst, #32]
- ldrd D_l, D_h, [src, #32]
- bcs 2b
- /* Save the remaining bytes and restore the callee-saved regs. */
- strd A_l, A_h, [dst, #40]
- add src, src, #40
- strd B_l, B_h, [dst, #48]
- ldrd B_l, B_h, [sp, #8]
- strd C_l, C_h, [dst, #56]
- ldrd C_l, C_h, [sp, #16]
- strd D_l, D_h, [dst, #64]
- ldrd D_l, D_h, [sp, #24]
- add dst, dst, #72
- tst tmp2, #0x3f
- bne .Ltail63aligned
- ldr tmp2, [sp], #FRAME_SIZE
- bx lr
-#endif
-
-.Lcpy_notaligned:
- pld [src]
- pld [src, #64]
- /* There's at least 64 bytes to copy, but there is no mutual
- alignment. */
- /* Bring DST to 64-bit alignment. */
- lsls tmp2, dst, #29
- pld [src, #(2 * 64)]
- beq 1f
- rsbs tmp2, tmp2, #0
- sub count, count, tmp2, lsr #29
- ldrmi tmp1, [src], #4
- strmi tmp1, [dst], #4
- lsls tmp2, tmp2, #2
- ldrbne tmp1, [src], #1
- ldrhcs tmp2, [src], #2
- strbne tmp1, [dst], #1
- strhcs tmp2, [dst], #2
-1:
- pld [src, #(3 * 64)]
- subs count, count, #64
- ldrmi tmp2, [sp], #FRAME_SIZE
- bmi .Ltail63unaligned
- pld [src, #(4 * 64)]
-
-#ifdef USE_NEON
- vld1.8 {d0-d3}, [src]!
- vld1.8 {d4-d7}, [src]!
- subs count, count, #64
- bmi 2f
-1:
- pld [src, #(4 * 64)]
- vst1.8 {d0-d3}, [ALIGN (dst, 64)]!
- vld1.8 {d0-d3}, [src]!
- vst1.8 {d4-d7}, [ALIGN (dst, 64)]!
- vld1.8 {d4-d7}, [src]!
- subs count, count, #64
- bpl 1b
-2:
- vst1.8 {d0-d3}, [ALIGN (dst, 64)]!
- vst1.8 {d4-d7}, [ALIGN (dst, 64)]!
- ands count, count, #0x3f
-#else
- /* Use an SMS style loop to maximize the I/O bandwidth. */
- sub src, src, #4
- sub dst, dst, #8
- subs tmp2, count, #64 /* Use tmp2 for count. */
- ldr A_l, [src, #4]
- ldr A_h, [src, #8]
- strd B_l, B_h, [sp, #8]
- ldr B_l, [src, #12]
- ldr B_h, [src, #16]
- strd C_l, C_h, [sp, #16]
- ldr C_l, [src, #20]
- ldr C_h, [src, #24]
- strd D_l, D_h, [sp, #24]
- ldr D_l, [src, #28]
- ldr D_h, [src, #32]!
- b 1f
- .p2align 6
-2:
- pld [src, #(5 * 64) - (32 - 4)]
- strd A_l, A_h, [dst, #40]
- ldr A_l, [src, #36]
- ldr A_h, [src, #40]
- strd B_l, B_h, [dst, #48]
- ldr B_l, [src, #44]
- ldr B_h, [src, #48]
- strd C_l, C_h, [dst, #56]
- ldr C_l, [src, #52]
- ldr C_h, [src, #56]
- strd D_l, D_h, [dst, #64]!
- ldr D_l, [src, #60]
- ldr D_h, [src, #64]!
- subs tmp2, tmp2, #64
-1:
- strd A_l, A_h, [dst, #8]
- ldr A_l, [src, #4]
- ldr A_h, [src, #8]
- strd B_l, B_h, [dst, #16]
- ldr B_l, [src, #12]
- ldr B_h, [src, #16]
- strd C_l, C_h, [dst, #24]
- ldr C_l, [src, #20]
- ldr C_h, [src, #24]
- strd D_l, D_h, [dst, #32]
- ldr D_l, [src, #28]
- ldr D_h, [src, #32]
- bcs 2b
-
- /* Save the remaining bytes and restore the callee-saved regs. */
- strd A_l, A_h, [dst, #40]
- add src, src, #36
- strd B_l, B_h, [dst, #48]
- ldrd B_l, B_h, [sp, #8]
- strd C_l, C_h, [dst, #56]
- ldrd C_l, C_h, [sp, #16]
- strd D_l, D_h, [dst, #64]
- ldrd D_l, D_h, [sp, #24]
- add dst, dst, #72
- ands count, tmp2, #0x3f
-#endif
- ldr tmp2, [sp], #FRAME_SIZE
- bne .Ltail63unaligned
- bx lr
-
- .size memcpy, . - memcpy
diff --git a/contrib/cortex-strings/src/arm/memset.S b/contrib/cortex-strings/src/arm/memset.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/arm/memset.S
+++ /dev/null
@@ -1,122 +0,0 @@
-/* Copyright (c) 2010-2011, Linaro Limited
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
- * Neither the name of Linaro Limited nor the names of its
- contributors may be used to endorse or promote products derived
- from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- Written by Dave Gilbert <david.gilbert@linaro.org>
-
- This memset routine is optimised on a Cortex-A9 and should work on
- all ARMv7 processors.
-
- */
-
- .syntax unified
- .arch armv7-a
-
-@ 2011-08-30 david.gilbert@linaro.org
-@ Extracted from local git 2f11b436
-
-@ this lets us check a flag in a 00/ff byte easily in either endianness
-#ifdef __ARMEB__
-#define CHARTSTMASK(c) 1<<(31-(c*8))
-#else
-#define CHARTSTMASK(c) 1<<(c*8)
-#endif
- .text
- .thumb
-
-@ ---------------------------------------------------------------------------
- .thumb_func
- .align 2
- .p2align 4,,15
- .global memset
- .type memset,%function
-memset:
- @ r0 = address
- @ r1 = character
- @ r2 = count
- @ returns original address in r0
-
- mov r3, r0 @ Leave r0 alone
- cbz r2, 10f @ Exit if 0 length
-
- tst r0, #7
- beq 2f @ Already aligned
-
- @ Ok, so we're misaligned here
-1:
- strb r1, [r3], #1
- subs r2,r2,#1
- tst r3, #7
- cbz r2, 10f @ Exit if we hit the end
- bne 1b @ go round again if still misaligned
-
-2:
- @ OK, so we're aligned
- push {r4,r5,r6,r7}
- bics r4, r2, #15 @ if less than 16 bytes then need to finish it off
- beq 5f
-
-3:
- @ POSIX says that ch is cast to an unsigned char. A uxtb is one
- @ byte and takes two cycles, where an AND is four bytes but one
- @ cycle.
- and r1, #0xFF
- orr r1, r1, r1, lsl#8 @ Same character into all bytes
- orr r1, r1, r1, lsl#16
- mov r5,r1
- mov r6,r1
- mov r7,r1
-
-4:
- subs r4,r4,#16
- stmia r3!,{r1,r5,r6,r7}
- bne 4b
- and r2,r2,#15
-
- @ At this point we're still aligned and we have upto align-1 bytes left to right
- @ we can avoid some of the byte-at-a time now by testing for some big chunks
- tst r2,#8
- itt ne
- subne r2,r2,#8
- stmiane r3!,{r1,r5}
-
-5:
- pop {r4,r5,r6,r7}
- cbz r2, 10f
-
- @ Got to do any last < alignment bytes
-6:
- subs r2,r2,#1
- strb r1,[r3],#1
- bne 6b
-
-10:
- bx lr @ goodbye
diff --git a/contrib/cortex-strings/src/arm/strchr.S b/contrib/cortex-strings/src/arm/strchr.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/arm/strchr.S
+++ /dev/null
@@ -1,80 +0,0 @@
-/* Copyright (c) 2010-2011, Linaro Limited
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
- * Neither the name of Linaro Limited nor the names of its
- contributors may be used to endorse or promote products derived
- from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- Written by Dave Gilbert <david.gilbert@linaro.org>
-
- A very simple strchr routine, from benchmarks on A9 it's a bit faster than
- the current version in eglibc (2.12.1-0ubuntu14 package)
- I don't think doing a word at a time version is worth it since a lot
- of strchr cases are very short anyway.
-
- */
-
-@ 2011-02-07 david.gilbert@linaro.org
-@ Extracted from local git a5b438d861
-
- .syntax unified
- .arch armv7-a
-
- .text
- .thumb
-
-@ ---------------------------------------------------------------------------
-
- .thumb_func
- .align 2
- .p2align 4,,15
- .global strchr
- .type strchr,%function
-strchr:
- @ r0 = start of string
- @ r1 = character to match
- @ returns NULL for no match, or a pointer to the match
- and r1,r1, #255
-
-1:
- ldrb r2,[r0],#1
- cmp r2,r1
- cbz r2,10f
- bne 1b
-
- @ We're here if it matched
-5:
- subs r0,r0,#1
- bx lr
-
-10:
- @ We're here if we ran off the end
- cmp r1, #0 @ Corner case - you're allowed to search for the nil and get a pointer to it
- beq 5b @ A bit messy, if it's common we should branch at the start to a special loop
- mov r0,#0
- bx lr
diff --git a/contrib/cortex-strings/src/arm/strcmp.S b/contrib/cortex-strings/src/arm/strcmp.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/arm/strcmp.S
+++ /dev/null
@@ -1,500 +0,0 @@
-/*
- * Copyright (c) 2012-2014 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- * products derived from this software without specific prior written
- * permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* Implementation of strcmp for ARMv7 when DSP instructions are
- available. Use ldrd to support wider loads, provided the data
- is sufficiently aligned. Use saturating arithmetic to optimize
- the compares. */
-
-/* Build Options:
- STRCMP_NO_PRECHECK: Don't run a quick pre-check of the first
- byte in the string. If comparing completely random strings
- the pre-check will save time, since there is a very high
- probability of a mismatch in the first character: we save
- significant overhead if this is the common case. However,
- if strings are likely to be identical (eg because we're
- verifying a hit in a hash table), then this check is largely
- redundant. */
-
-#define STRCMP_NO_PRECHECK 0
-
- /* This version uses Thumb-2 code. */
- .thumb
- .syntax unified
-
-#ifdef __ARM_BIG_ENDIAN
-#define S2LO lsl
-#define S2LOEQ lsleq
-#define S2HI lsr
-#define MSB 0x000000ff
-#define LSB 0xff000000
-#define BYTE0_OFFSET 24
-#define BYTE1_OFFSET 16
-#define BYTE2_OFFSET 8
-#define BYTE3_OFFSET 0
-#else /* not __ARM_BIG_ENDIAN */
-#define S2LO lsr
-#define S2LOEQ lsreq
-#define S2HI lsl
-#define BYTE0_OFFSET 0
-#define BYTE1_OFFSET 8
-#define BYTE2_OFFSET 16
-#define BYTE3_OFFSET 24
-#define MSB 0xff000000
-#define LSB 0x000000ff
-#endif /* not __ARM_BIG_ENDIAN */
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-/* Parameters and result. */
-#define src1 r0
-#define src2 r1
-#define result r0 /* Overlaps src1. */
-
-/* Internal variables. */
-#define tmp1 r4
-#define tmp2 r5
-#define const_m1 r12
-
-/* Additional internal variables for 64-bit aligned data. */
-#define data1a r2
-#define data1b r3
-#define data2a r6
-#define data2b r7
-#define syndrome_a tmp1
-#define syndrome_b tmp2
-
-/* Additional internal variables for 32-bit aligned data. */
-#define data1 r2
-#define data2 r3
-#define syndrome tmp2
-
-
- /* Macro to compute and return the result value for word-aligned
- cases. */
- .macro strcmp_epilogue_aligned synd d1 d2 restore_r6
-#ifdef __ARM_BIG_ENDIAN
- /* If data1 contains a zero byte, then syndrome will contain a 1 in
- bit 7 of that byte. Otherwise, the highest set bit in the
- syndrome will highlight the first different bit. It is therefore
- sufficient to extract the eight bits starting with the syndrome
- bit. */
- clz tmp1, \synd
- lsl r1, \d2, tmp1
- .if \restore_r6
- ldrd r6, r7, [sp, #8]
- .endif
- .cfi_restore 6
- .cfi_restore 7
- lsl \d1, \d1, tmp1
- .cfi_remember_state
- lsr result, \d1, #24
- ldrd r4, r5, [sp], #16
- .cfi_restore 4
- .cfi_restore 5
- sub result, result, r1, lsr #24
- bx lr
-#else
- /* To use the big-endian trick we'd have to reverse all three words.
- that's slower than this approach. */
- rev \synd, \synd
- clz tmp1, \synd
- bic tmp1, tmp1, #7
- lsr r1, \d2, tmp1
- .cfi_remember_state
- .if \restore_r6
- ldrd r6, r7, [sp, #8]
- .endif
- .cfi_restore 6
- .cfi_restore 7
- lsr \d1, \d1, tmp1
- and result, \d1, #255
- and r1, r1, #255
- ldrd r4, r5, [sp], #16
- .cfi_restore 4
- .cfi_restore 5
- sub result, result, r1
-
- bx lr
-#endif
- .endm
-
- .text
- .p2align 5
-.Lstrcmp_start_addr:
-#if STRCMP_NO_PRECHECK == 0
-.Lfastpath_exit:
- sub r0, r2, r3
- bx lr
- nop
-#endif
-def_fn strcmp
-#if STRCMP_NO_PRECHECK == 0
- ldrb r2, [src1]
- ldrb r3, [src2]
- cmp r2, #1
- it cs
- cmpcs r2, r3
- bne .Lfastpath_exit
-#endif
- .cfi_startproc
- strd r4, r5, [sp, #-16]!
- .cfi_def_cfa_offset 16
- .cfi_offset 4, -16
- .cfi_offset 5, -12
- orr tmp1, src1, src2
- strd r6, r7, [sp, #8]
- .cfi_offset 6, -8
- .cfi_offset 7, -4
- mvn const_m1, #0
- lsl r2, tmp1, #29
- cbz r2, .Lloop_aligned8
-
-.Lnot_aligned:
- eor tmp1, src1, src2
- tst tmp1, #7
- bne .Lmisaligned8
-
- /* Deal with mutual misalignment by aligning downwards and then
- masking off the unwanted loaded data to prevent a difference. */
- and tmp1, src1, #7
- bic src1, src1, #7
- and tmp2, tmp1, #3
- bic src2, src2, #7
- lsl tmp2, tmp2, #3 /* Bytes -> bits. */
- ldrd data1a, data1b, [src1], #16
- tst tmp1, #4
- ldrd data2a, data2b, [src2], #16
- /* In thumb code we can't use MVN with a register shift, but
- we do have ORN. */
- S2HI tmp1, const_m1, tmp2
- orn data1a, data1a, tmp1
- orn data2a, data2a, tmp1
- beq .Lstart_realigned8
- orn data1b, data1b, tmp1
- mov data1a, const_m1
- orn data2b, data2b, tmp1
- mov data2a, const_m1
- b .Lstart_realigned8
-
- /* Unwind the inner loop by a factor of 2, giving 16 bytes per
- pass. */
- .p2align 5,,12 /* Don't start in the tail bytes of a cache line. */
- .p2align 2 /* Always word aligned. */
-.Lloop_aligned8:
- ldrd data1a, data1b, [src1], #16
- ldrd data2a, data2b, [src2], #16
-.Lstart_realigned8:
- uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */
- eor syndrome_a, data1a, data2a
- sel syndrome_a, syndrome_a, const_m1
- cbnz syndrome_a, .Ldiff_in_a
- uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */
- eor syndrome_b, data1b, data2b
- sel syndrome_b, syndrome_b, const_m1
- cbnz syndrome_b, .Ldiff_in_b
-
- ldrd data1a, data1b, [src1, #-8]
- ldrd data2a, data2b, [src2, #-8]
- uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */
- eor syndrome_a, data1a, data2a
- sel syndrome_a, syndrome_a, const_m1
- uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */
- eor syndrome_b, data1b, data2b
- sel syndrome_b, syndrome_b, const_m1
- /* Can't use CBZ for backwards branch. */
- orrs syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */
- beq .Lloop_aligned8
-
-.Ldiff_found:
- cbnz syndrome_a, .Ldiff_in_a
-
-.Ldiff_in_b:
- strcmp_epilogue_aligned syndrome_b, data1b, data2b 1
-
-.Ldiff_in_a:
- .cfi_restore_state
- strcmp_epilogue_aligned syndrome_a, data1a, data2a 1
-
- .cfi_restore_state
-.Lmisaligned8:
- tst tmp1, #3
- bne .Lmisaligned4
- ands tmp1, src1, #3
- bne .Lmutual_align4
-
- /* Unrolled by a factor of 2, to reduce the number of post-increment
- operations. */
-.Lloop_aligned4:
- ldr data1, [src1], #8
- ldr data2, [src2], #8
-.Lstart_realigned4:
- uadd8 syndrome, data1, const_m1 /* Only need GE bits. */
- eor syndrome, data1, data2
- sel syndrome, syndrome, const_m1
- cbnz syndrome, .Laligned4_done
- ldr data1, [src1, #-4]
- ldr data2, [src2, #-4]
- uadd8 syndrome, data1, const_m1
- eor syndrome, data1, data2
- sel syndrome, syndrome, const_m1
- cmp syndrome, #0
- beq .Lloop_aligned4
-
-.Laligned4_done:
- strcmp_epilogue_aligned syndrome, data1, data2, 0
-
-.Lmutual_align4:
- .cfi_restore_state
- /* Deal with mutual misalignment by aligning downwards and then
- masking off the unwanted loaded data to prevent a difference. */
- lsl tmp1, tmp1, #3 /* Bytes -> bits. */
- bic src1, src1, #3
- ldr data1, [src1], #8
- bic src2, src2, #3
- ldr data2, [src2], #8
-
- /* In thumb code we can't use MVN with a register shift, but
- we do have ORN. */
- S2HI tmp1, const_m1, tmp1
- orn data1, data1, tmp1
- orn data2, data2, tmp1
- b .Lstart_realigned4
-
-.Lmisaligned4:
- ands tmp1, src1, #3
- beq .Lsrc1_aligned
- sub src2, src2, tmp1
- bic src1, src1, #3
- lsls tmp1, tmp1, #31
- ldr data1, [src1], #4
- beq .Laligned_m2
- bcs .Laligned_m1
-
-#if STRCMP_NO_PRECHECK == 1
- ldrb data2, [src2, #1]
- uxtb tmp1, data1, ror #BYTE1_OFFSET
- subs tmp1, tmp1, data2
- bne .Lmisaligned_exit
- cbz data2, .Lmisaligned_exit
-
-.Laligned_m2:
- ldrb data2, [src2, #2]
- uxtb tmp1, data1, ror #BYTE2_OFFSET
- subs tmp1, tmp1, data2
- bne .Lmisaligned_exit
- cbz data2, .Lmisaligned_exit
-
-.Laligned_m1:
- ldrb data2, [src2, #3]
- uxtb tmp1, data1, ror #BYTE3_OFFSET
- subs tmp1, tmp1, data2
- bne .Lmisaligned_exit
- add src2, src2, #4
- cbnz data2, .Lsrc1_aligned
-#else /* STRCMP_NO_PRECHECK */
- /* If we've done the pre-check, then we don't need to check the
- first byte again here. */
- ldrb data2, [src2, #2]
- uxtb tmp1, data1, ror #BYTE2_OFFSET
- subs tmp1, tmp1, data2
- bne .Lmisaligned_exit
- cbz data2, .Lmisaligned_exit
-
-.Laligned_m2:
- ldrb data2, [src2, #3]
- uxtb tmp1, data1, ror #BYTE3_OFFSET
- subs tmp1, tmp1, data2
- bne .Lmisaligned_exit
- cbnz data2, .Laligned_m1
-#endif
-
-.Lmisaligned_exit:
- .cfi_remember_state
- mov result, tmp1
- ldr r4, [sp], #16
- .cfi_restore 4
- bx lr
-
-#if STRCMP_NO_PRECHECK == 0
-.Laligned_m1:
- add src2, src2, #4
-#endif
-.Lsrc1_aligned:
- .cfi_restore_state
- /* src1 is word aligned, but src2 has no common alignment
- with it. */
- ldr data1, [src1], #4
- lsls tmp1, src2, #31 /* C=src2[1], Z=src2[0]. */
-
- bic src2, src2, #3
- ldr data2, [src2], #4
- bhi .Loverlap1 /* C=1, Z=0 => src2[1:0] = 0b11. */
- bcs .Loverlap2 /* C=1, Z=1 => src2[1:0] = 0b10. */
-
- /* (overlap3) C=0, Z=0 => src2[1:0] = 0b01. */
-.Loverlap3:
- bic tmp1, data1, #MSB
- uadd8 syndrome, data1, const_m1
- eors syndrome, tmp1, data2, S2LO #8
- sel syndrome, syndrome, const_m1
- bne 4f
- cbnz syndrome, 5f
- ldr data2, [src2], #4
- eor tmp1, tmp1, data1
- cmp tmp1, data2, S2HI #24
- bne 6f
- ldr data1, [src1], #4
- b .Loverlap3
-4:
- S2LO data2, data2, #8
- b .Lstrcmp_tail
-
-5:
- bics syndrome, syndrome, #MSB
- bne .Lstrcmp_done_equal
-
- /* We can only get here if the MSB of data1 contains 0, so
- fast-path the exit. */
- ldrb result, [src2]
- .cfi_remember_state
- ldrd r4, r5, [sp], #16
- .cfi_restore 4
- .cfi_restore 5
- /* R6/7 Not used in this sequence. */
- .cfi_restore 6
- .cfi_restore 7
- neg result, result
- bx lr
-
-6:
- .cfi_restore_state
- S2LO data1, data1, #24
- and data2, data2, #LSB
- b .Lstrcmp_tail
-
- .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */
-.Loverlap2:
- and tmp1, data1, const_m1, S2LO #16
- uadd8 syndrome, data1, const_m1
- eors syndrome, tmp1, data2, S2LO #16
- sel syndrome, syndrome, const_m1
- bne 4f
- cbnz syndrome, 5f
- ldr data2, [src2], #4
- eor tmp1, tmp1, data1
- cmp tmp1, data2, S2HI #16
- bne 6f
- ldr data1, [src1], #4
- b .Loverlap2
-4:
- S2LO data2, data2, #16
- b .Lstrcmp_tail
-5:
- ands syndrome, syndrome, const_m1, S2LO #16
- bne .Lstrcmp_done_equal
-
- ldrh data2, [src2]
- S2LO data1, data1, #16
-#ifdef __ARM_BIG_ENDIAN
- lsl data2, data2, #16
-#endif
- b .Lstrcmp_tail
-
-6:
- S2LO data1, data1, #16
- and data2, data2, const_m1, S2LO #16
- b .Lstrcmp_tail
-
- .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */
-.Loverlap1:
- and tmp1, data1, #LSB
- uadd8 syndrome, data1, const_m1
- eors syndrome, tmp1, data2, S2LO #24
- sel syndrome, syndrome, const_m1
- bne 4f
- cbnz syndrome, 5f
- ldr data2, [src2], #4
- eor tmp1, tmp1, data1
- cmp tmp1, data2, S2HI #8
- bne 6f
- ldr data1, [src1], #4
- b .Loverlap1
-4:
- S2LO data2, data2, #24
- b .Lstrcmp_tail
-5:
- tst syndrome, #LSB
- bne .Lstrcmp_done_equal
- ldr data2, [src2]
-6:
- S2LO data1, data1, #8
- bic data2, data2, #MSB
- b .Lstrcmp_tail
-
-.Lstrcmp_done_equal:
- mov result, #0
- .cfi_remember_state
- ldrd r4, r5, [sp], #16
- .cfi_restore 4
- .cfi_restore 5
- /* R6/7 not used in this sequence. */
- .cfi_restore 6
- .cfi_restore 7
- bx lr
-
-.Lstrcmp_tail:
- .cfi_restore_state
-#ifndef __ARM_BIG_ENDIAN
- rev data1, data1
- rev data2, data2
- /* Now everything looks big-endian... */
-#endif
- uadd8 tmp1, data1, const_m1
- eor tmp1, data1, data2
- sel syndrome, tmp1, const_m1
- clz tmp1, syndrome
- lsl data1, data1, tmp1
- lsl data2, data2, tmp1
- lsr result, data1, #24
- ldrd r4, r5, [sp], #16
- .cfi_restore 4
- .cfi_restore 5
- /* R6/7 not used in this sequence. */
- .cfi_restore 6
- .cfi_restore 7
- sub result, result, data2, lsr #24
- bx lr
- .cfi_endproc
- .size strcmp, . - .Lstrcmp_start_addr
diff --git a/contrib/cortex-strings/src/thumb-2/strcpy.c b/contrib/cortex-strings/src/thumb-2/strcpy.c
deleted file mode 100644
--- a/contrib/cortex-strings/src/thumb-2/strcpy.c
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright (c) 2008 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- * products derived from this software without specific prior written
- * permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* For GLIBC:
-#include <string.h>
-#include <memcopy.h>
-
-#undef strcmp
-*/
-
-#ifdef __thumb2__
-#define magic1(REG) "#0x01010101"
-#define magic2(REG) "#0x80808080"
-#else
-#define magic1(REG) #REG
-#define magic2(REG) #REG ", lsl #7"
-#endif
-
-char* __attribute__((naked))
-strcpy (char* dst, const char* src)
-{
- asm (
-#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
- (defined (__thumb__) && !defined (__thumb2__)))
- "pld [r1, #0]\n\t"
- "eor r2, r0, r1\n\t"
- "mov ip, r0\n\t"
- "tst r2, #3\n\t"
- "bne 4f\n\t"
- "tst r1, #3\n\t"
- "bne 3f\n"
- "5:\n\t"
-#ifndef __thumb2__
- "str r5, [sp, #-4]!\n\t"
- "mov r5, #0x01\n\t"
- "orr r5, r5, r5, lsl #8\n\t"
- "orr r5, r5, r5, lsl #16\n\t"
-#endif
-
- "str r4, [sp, #-4]!\n\t"
- "tst r1, #4\n\t"
- "ldr r3, [r1], #4\n\t"
- "beq 2f\n\t"
- "sub r2, r3, "magic1(r5)"\n\t"
- "bics r2, r2, r3\n\t"
- "tst r2, "magic2(r5)"\n\t"
- "itt eq\n\t"
- "streq r3, [ip], #4\n\t"
- "ldreq r3, [r1], #4\n"
- "bne 1f\n\t"
- /* Inner loop. We now know that r1 is 64-bit aligned, so we
- can safely fetch up to two words. This allows us to avoid
- load stalls. */
- ".p2align 2\n"
- "2:\n\t"
- "pld [r1, #8]\n\t"
- "ldr r4, [r1], #4\n\t"
- "sub r2, r3, "magic1(r5)"\n\t"
- "bics r2, r2, r3\n\t"
- "tst r2, "magic2(r5)"\n\t"
- "sub r2, r4, "magic1(r5)"\n\t"
- "bne 1f\n\t"
- "str r3, [ip], #4\n\t"
- "bics r2, r2, r4\n\t"
- "tst r2, "magic2(r5)"\n\t"
- "itt eq\n\t"
- "ldreq r3, [r1], #4\n\t"
- "streq r4, [ip], #4\n\t"
- "beq 2b\n\t"
- "mov r3, r4\n"
- "1:\n\t"
-#ifdef __ARMEB__
- "rors r3, r3, #24\n\t"
-#endif
- "strb r3, [ip], #1\n\t"
- "tst r3, #0xff\n\t"
-#ifdef __ARMEL__
- "ror r3, r3, #8\n\t"
-#endif
- "bne 1b\n\t"
- "ldr r4, [sp], #4\n\t"
-#ifndef __thumb2__
- "ldr r5, [sp], #4\n\t"
-#endif
- "BX LR\n"
-
- /* Strings have the same offset from word alignment, but it's
- not zero. */
- "3:\n\t"
- "tst r1, #1\n\t"
- "beq 1f\n\t"
- "ldrb r2, [r1], #1\n\t"
- "strb r2, [ip], #1\n\t"
- "cmp r2, #0\n\t"
- "it eq\n"
- "BXEQ LR\n"
- "1:\n\t"
- "tst r1, #2\n\t"
- "beq 5b\n\t"
- "ldrh r2, [r1], #2\n\t"
-#ifdef __ARMEB__
- "tst r2, #0xff00\n\t"
- "iteet ne\n\t"
- "strneh r2, [ip], #2\n\t"
- "lsreq r2, r2, #8\n\t"
- "streqb r2, [ip]\n\t"
- "tstne r2, #0xff\n\t"
-#else
- "tst r2, #0xff\n\t"
- "itet ne\n\t"
- "strneh r2, [ip], #2\n\t"
- "streqb r2, [ip]\n\t"
- "tstne r2, #0xff00\n\t"
-#endif
- "bne 5b\n\t"
- "BX LR\n"
-
- /* src and dst do not have a common word-alignement. Fall back to
- byte copying. */
- "4:\n\t"
- "ldrb r2, [r1], #1\n\t"
- "strb r2, [ip], #1\n\t"
- "cmp r2, #0\n\t"
- "bne 4b\n\t"
- "BX LR"
-
-#elif !defined (__thumb__) || defined (__thumb2__)
- "mov r3, r0\n\t"
- "1:\n\t"
- "ldrb r2, [r1], #1\n\t"
- "strb r2, [r3], #1\n\t"
- "cmp r2, #0\n\t"
- "bne 1b\n\t"
- "BX LR"
-#else
- "mov r3, r0\n\t"
- "1:\n\t"
- "ldrb r2, [r1]\n\t"
- "add r1, r1, #1\n\t"
- "strb r2, [r3]\n\t"
- "add r3, r3, #1\n\t"
- "cmp r2, #0\n\t"
- "bne 1b\n\t"
- "BX LR"
-#endif
- );
-}
-/* For GLIBC: libc_hidden_builtin_def (strcpy) */
diff --git a/contrib/cortex-strings/src/thumb-2/strlen.S b/contrib/cortex-strings/src/thumb-2/strlen.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/thumb-2/strlen.S
+++ /dev/null
@@ -1,150 +0,0 @@
-/* Copyright (c) 2010-2011,2013 Linaro Limited
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
- * Neither the name of Linaro Limited nor the names of its
- contributors may be used to endorse or promote products derived
- from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- Assumes:
- ARMv6T2, AArch32
-
- */
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-#ifdef __ARMEB__
-#define S2LO lsl
-#define S2HI lsr
-#else
-#define S2LO lsr
-#define S2HI lsl
-#endif
-
- /* This code requires Thumb. */
- .thumb
- .syntax unified
-
-/* Parameters and result. */
-#define srcin r0
-#define result r0
-
-/* Internal variables. */
-#define src r1
-#define data1a r2
-#define data1b r3
-#define const_m1 r12
-#define const_0 r4
-#define tmp1 r4 /* Overlaps const_0 */
-#define tmp2 r5
-
-def_fn strlen p2align=6
- pld [srcin, #0]
- strd r4, r5, [sp, #-8]!
- bic src, srcin, #7
- mvn const_m1, #0
- ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */
- pld [src, #32]
- bne.w .Lmisaligned8
- mov const_0, #0
- mov result, #-8
-.Lloop_aligned:
- /* Bytes 0-7. */
- ldrd data1a, data1b, [src]
- pld [src, #64]
- add result, result, #8
-.Lstart_realigned:
- uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
- sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
- uadd8 data1b, data1b, const_m1
- sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
- cbnz data1b, .Lnull_found
-
- /* Bytes 8-15. */
- ldrd data1a, data1b, [src, #8]
- uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
- add result, result, #8
- sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
- uadd8 data1b, data1b, const_m1
- sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
- cbnz data1b, .Lnull_found
-
- /* Bytes 16-23. */
- ldrd data1a, data1b, [src, #16]
- uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
- add result, result, #8
- sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
- uadd8 data1b, data1b, const_m1
- sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
- cbnz data1b, .Lnull_found
-
- /* Bytes 24-31. */
- ldrd data1a, data1b, [src, #24]
- add src, src, #32
- uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
- add result, result, #8
- sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
- uadd8 data1b, data1b, const_m1
- sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
- cmp data1b, #0
- beq .Lloop_aligned
-
-.Lnull_found:
- cmp data1a, #0
- itt eq
- addeq result, result, #4
- moveq data1a, data1b
-#ifndef __ARMEB__
- rev data1a, data1a
-#endif
- clz data1a, data1a
- ldrd r4, r5, [sp], #8
- add result, result, data1a, lsr #3 /* Bits -> Bytes. */
- bx lr
-
-.Lmisaligned8:
- ldrd data1a, data1b, [src]
- and tmp2, tmp1, #3
- rsb result, tmp1, #0
- lsl tmp2, tmp2, #3 /* Bytes -> bits. */
- tst tmp1, #4
- pld [src, #64]
- S2HI tmp2, const_m1, tmp2
- orn data1a, data1a, tmp2
- itt ne
- ornne data1b, data1b, tmp2
- movne data1a, const_m1
- mov const_0, #0
- b .Lstart_realigned
- .size strlen, . - strlen
-
diff --git a/contrib/cortex-strings/src/thumb/aeabi_idiv.S b/contrib/cortex-strings/src/thumb/aeabi_idiv.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/thumb/aeabi_idiv.S
+++ /dev/null
@@ -1,318 +0,0 @@
-/*
- * Copyright (c) 2014 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- * products derived from this software without specific prior written
- * permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* An executable stack is *not* required for these functions. */
-
-.section .note.GNU-stack,"",%progbits
-.previous
-.eabi_attribute 25, 1
-
-/* ANSI concatenation macros. */
-
-#define CONCAT1(a, b) CONCAT2(a, b)
-#define CONCAT2(a, b) a ## b
-
-/* Use the right prefix for global labels. */
-
-#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
-
-#define TYPE(x) .type SYM(x),function
-#define SIZE(x) .size SYM(x), . - SYM(x)
-#define LSYM(x) .x
-
-.macro cfi_start start_label, end_label
- .pushsection .debug_frame
-LSYM(Lstart_frame):
- .4byte LSYM(Lend_cie) - LSYM(Lstart_cie)
-LSYM(Lstart_cie):
- .4byte 0xffffffff
- .byte 0x1
- .ascii "\0"
- .uleb128 0x1
- .sleb128 -4
- .byte 0xe
- .byte 0xc
- .uleb128 0xd
- .uleb128 0x0
-
- .align 2
-LSYM(Lend_cie):
- .4byte LSYM(Lend_fde)-LSYM(Lstart_fde)
-LSYM(Lstart_fde):
- .4byte LSYM(Lstart_frame)
- .4byte \start_label
- .4byte \end_label-\start_label
- .popsection
-.endm
-
-.macro cfi_end end_label
- .pushsection .debug_frame
- .align 2
-LSYM(Lend_fde):
- .popsection
-\end_label:
-.endm
-
-.macro THUMB_LDIV0 name signed
- push {r0, lr}
- movs r0, #0
- bl SYM(__aeabi_idiv0)
- pop {r1, pc}
-.endm
-
-.macro FUNC_END name
- SIZE (__\name)
-.endm
-
-.macro DIV_FUNC_END name signed
- cfi_start __\name, LSYM(Lend_div0)
-LSYM(Ldiv0):
- THUMB_LDIV0 \name \signed
- cfi_end LSYM(Lend_div0)
- FUNC_END \name
-.endm
-
-.macro THUMB_FUNC_START name
- .globl SYM (\name)
- TYPE (\name)
- .thumb_func
-SYM (\name):
-.endm
-
-.macro FUNC_START name
- .text
- .globl SYM (__\name)
- TYPE (__\name)
- .align 0
- .force_thumb
- .thumb_func
- .syntax unified
-SYM (__\name):
-.endm
-
-.macro FUNC_ALIAS new old
- .globl SYM (__\new)
- .thumb_set SYM (__\new), SYM (__\old)
-.endm
-
-/* Register aliases. */
-work .req r4
-dividend .req r0
-divisor .req r1
-overdone .req r2
-result .req r2
-curbit .req r3
-
-/* ------------------------------------------------------------------------ */
-/* Bodies of the division and modulo routines. */
-/* ------------------------------------------------------------------------ */
-.macro BranchToDiv n, label
- lsrs curbit, dividend, \n
- cmp curbit, divisor
- bcc \label
-.endm
-
-.macro DoDiv n
- lsrs curbit, dividend, \n
- cmp curbit, divisor
- bcc 1f
- lsls curbit, divisor, \n
- subs dividend, dividend, curbit
-
-1: adcs result, result
-.endm
-
-.macro THUMB1_Div_Positive
- movs result, #0
- BranchToDiv #1, LSYM(Lthumb1_div1)
- BranchToDiv #4, LSYM(Lthumb1_div4)
- BranchToDiv #8, LSYM(Lthumb1_div8)
- BranchToDiv #12, LSYM(Lthumb1_div12)
- BranchToDiv #16, LSYM(Lthumb1_div16)
-LSYM(Lthumb1_div_large_positive):
- movs result, #0xff
- lsls divisor, divisor, #8
- rev result, result
- lsrs curbit, dividend, #16
- cmp curbit, divisor
- bcc 1f
- asrs result, #8
- lsls divisor, divisor, #8
- beq LSYM(Ldivbyzero_waypoint)
-
-1: lsrs curbit, dividend, #12
- cmp curbit, divisor
- bcc LSYM(Lthumb1_div12)
- b LSYM(Lthumb1_div16)
-LSYM(Lthumb1_div_loop):
- lsrs divisor, divisor, #8
-LSYM(Lthumb1_div16):
- Dodiv #15
- Dodiv #14
- Dodiv #13
- Dodiv #12
-LSYM(Lthumb1_div12):
- Dodiv #11
- Dodiv #10
- Dodiv #9
- Dodiv #8
- bcs LSYM(Lthumb1_div_loop)
-LSYM(Lthumb1_div8):
- Dodiv #7
- Dodiv #6
- Dodiv #5
-LSYM(Lthumb1_div5):
- Dodiv #4
-LSYM(Lthumb1_div4):
- Dodiv #3
-LSYM(Lthumb1_div3):
- Dodiv #2
-LSYM(Lthumb1_div2):
- Dodiv #1
-LSYM(Lthumb1_div1):
- subs divisor, dividend, divisor
- bcs 1f
- mov divisor, dividend
-
-1: adcs result, result
- mov dividend, result
- bx lr
-
-LSYM(Ldivbyzero_waypoint):
- b LSYM(Ldiv0)
-.endm
-
-.macro THUMB1_Div_Negative
- lsrs result, divisor, #31
- beq 1f
- rsbs divisor, divisor, #0
-
-1: asrs curbit, dividend, #32
- bcc 2f
- rsbs dividend, dividend, #0
-
-2: eors curbit, result
- movs result, #0
- mov ip, curbit
- BranchToDiv #4, LSYM(Lthumb1_div_negative4)
- BranchToDiv #8, LSYM(Lthumb1_div_negative8)
-LSYM(Lthumb1_div_large):
- movs result, #0xfc
- lsls divisor, divisor, #6
- rev result, result
- lsrs curbit, dividend, #8
- cmp curbit, divisor
- bcc LSYM(Lthumb1_div_negative8)
-
- lsls divisor, divisor, #6
- asrs result, result, #6
- cmp curbit, divisor
- bcc LSYM(Lthumb1_div_negative8)
-
- lsls divisor, divisor, #6
- asrs result, result, #6
- cmp curbit, divisor
- bcc LSYM(Lthumb1_div_negative8)
-
- lsls divisor, divisor, #6
- beq LSYM(Ldivbyzero_negative)
- asrs result, result, #6
- b LSYM(Lthumb1_div_negative8)
-LSYM(Lthumb1_div_negative_loop):
- lsrs divisor, divisor, #6
-LSYM(Lthumb1_div_negative8):
- DoDiv #7
- DoDiv #6
- DoDiv #5
- DoDiv #4
-LSYM(Lthumb1_div_negative4):
- DoDiv #3
- DoDiv #2
- bcs LSYM(Lthumb1_div_negative_loop)
- DoDiv #1
- subs divisor, dividend, divisor
- bcs 1f
- mov divisor, dividend
-
-1: mov curbit, ip
- adcs result, result
- asrs curbit, curbit, #1
- mov dividend, result
- bcc 2f
- rsbs dividend, dividend, #0
- cmp curbit, #0
-
-2: bpl 3f
- rsbs divisor, divisor, #0
-
-3: bx lr
-
-LSYM(Ldivbyzero_negative):
- mov curbit, ip
- asrs curbit, curbit, #1
- bcc LSYM(Ldiv0)
- rsbs dividend, dividend, #0
-.endm
-
-/* ------------------------------------------------------------------------ */
-/* Start of the Real Functions */
-/* ------------------------------------------------------------------------ */
-
- FUNC_START aeabi_idiv0
- bx lr
- FUNC_END aeabi_idiv0
-
- FUNC_START divsi3
- FUNC_ALIAS aeabi_idiv divsi3
-
-LSYM(divsi3_skip_div0_test):
- mov curbit, dividend
- orrs curbit, divisor
- bmi LSYM(Lthumb1_div_negative)
-
-LSYM(Lthumb1_div_positive):
- THUMB1_Div_Positive
-
-LSYM(Lthumb1_div_negative):
- THUMB1_Div_Negative
-
- DIV_FUNC_END divsi3 signed
-
- FUNC_START aeabi_idivmod
-
- cmp r1, #0
- beq LSYM(Ldiv0)
- push {r0, r1, lr}
- bl LSYM(divsi3_skip_div0_test)
- POP {r1, r2, r3}
- mul r2, r0
- sub r1, r1, r2
- bx r3
-
- FUNC_END aeabi_idivmod
-/* ------------------------------------------------------------------------ */
diff --git a/contrib/cortex-strings/src/thumb/strcmp-armv6m.S b/contrib/cortex-strings/src/thumb/strcmp-armv6m.S
deleted file mode 100644
--- a/contrib/cortex-strings/src/thumb/strcmp-armv6m.S
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright (c) 2014 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- * products derived from this software without specific prior written
- * permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* Implementation of strcmp for ARMv6m. This version is only used in
- ARMv6-M when we want an efficient implementation. Otherwize if the
- code size is preferred, strcmp-armv4t.S will be used. */
-
- .thumb_func
- .syntax unified
- .arch armv6-m
-
- .macro DoSub n, label
- subs r0, r0, r1
-#ifdef __ARM_BIG_ENDIAN
- lsrs r1, r4, \n
-#else
- lsls r1, r4, \n
-#endif
- orrs r1, r0
- bne \label
- .endm
-
- .macro Byte_Test n, label
- lsrs r0, r2, \n
- lsrs r1, r3, \n
- DoSub \n, \label
- .endm
-
- .text
- .p2align 0
- .global strcmp
- .type strcmp, %function
-strcmp:
- .cfi_startproc
- mov r2, r0
- push {r4, r5, r6, lr}
- orrs r2, r1
- lsls r2, r2, #30
- bne 6f
- ldr r5, =0x01010101
- lsls r6, r5, #7
-1:
- ldmia r0!, {r2}
- ldmia r1!, {r3}
- subs r4, r2, r5
- bics r4, r2
- ands r4, r6
- beq 3f
-
-#ifdef __ARM_BIG_ENDIAN
- Byte_Test #24, 4f
- Byte_Test #16, 4f
- Byte_Test #8, 4f
-
- b 7f
-3:
- cmp r2, r3
- beq 1b
- cmp r2, r3
-#else
- uxtb r0, r2
- uxtb r1, r3
- DoSub #24, 2f
-
- uxth r0, r2
- uxth r1, r3
- DoSub #16, 2f
-
- lsls r0, r2, #8
- lsls r1, r3, #8
- lsrs r0, r0, #8
- lsrs r1, r1, #8
- DoSub #8, 2f
-
- lsrs r0, r2, #24
- lsrs r1, r3, #24
- subs r0, r0, r1
-2:
- pop {r4, r5, r6, pc}
-
-3:
- cmp r2, r3
- beq 1b
- rev r0, r2
- rev r1, r3
- cmp r0, r1
-#endif
-
- bls 5f
- movs r0, #1
-4:
- pop {r4, r5, r6, pc}
-5:
- movs r0, #0
- mvns r0, r0
- pop {r4, r5, r6, pc}
-6:
- ldrb r2, [r0, #0]
- ldrb r3, [r1, #0]
- adds r0, #1
- adds r1, #1
- cmp r2, #0
- beq 7f
- cmp r2, r3
- bne 7f
- ldrb r2, [r0, #0]
- ldrb r3, [r1, #0]
- adds r0, #1
- adds r1, #1
- cmp r2, #0
- beq 7f
- cmp r2, r3
- beq 6b
-7:
- subs r0, r2, r3
- pop {r4, r5, r6, pc}
- .cfi_endproc
- .size strcmp, . - strcmp

File Metadata

Mime Type
text/plain
Expires
Thu, Feb 6, 9:43 AM (20 h, 51 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16491370
Default Alt Text
D48500.diff (237 KB)

Event Timeline